From 5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs?= Date: Mon, 18 Oct 2021 15:24:21 -0500 Subject: update from upstream --- hypervideo_dl/YoutubeDL.py | 2739 ++++++++++----- hypervideo_dl/__init__.py | 479 ++- hypervideo_dl/__main__.py | 2 +- hypervideo_dl/aes.py | 265 +- hypervideo_dl/cache.py | 2 + hypervideo_dl/compat.py | 3048 +--------------- hypervideo_dl/cookies.py | 745 ++++ hypervideo_dl/downloader/__init__.py | 105 +- hypervideo_dl/downloader/common.py | 171 +- hypervideo_dl/downloader/dash.py | 88 +- hypervideo_dl/downloader/external.py | 258 +- hypervideo_dl/downloader/f4m.py | 19 +- hypervideo_dl/downloader/fragment.py | 255 +- hypervideo_dl/downloader/hls.py | 285 +- hypervideo_dl/downloader/http.py | 37 +- hypervideo_dl/downloader/ism.py | 58 +- hypervideo_dl/downloader/mhtml.py | 202 ++ hypervideo_dl/downloader/niconico.py | 57 + hypervideo_dl/downloader/rtmp.py | 18 +- hypervideo_dl/downloader/rtsp.py | 4 +- hypervideo_dl/downloader/websocket.py | 59 + hypervideo_dl/downloader/youtube_live_chat.py | 236 ++ hypervideo_dl/extractor/__init__.py | 9 +- hypervideo_dl/extractor/abc.py | 104 +- hypervideo_dl/extractor/abcnews.py | 3 +- hypervideo_dl/extractor/abcotvs.py | 3 +- hypervideo_dl/extractor/acast.py | 3 +- hypervideo_dl/extractor/adobepass.py | 155 +- hypervideo_dl/extractor/adobetv.py | 8 +- hypervideo_dl/extractor/adultswim.py | 3 +- hypervideo_dl/extractor/aenetworks.py | 11 +- hypervideo_dl/extractor/afreecatv.py | 23 +- hypervideo_dl/extractor/aljazeera.py | 5 +- hypervideo_dl/extractor/alura.py | 179 + hypervideo_dl/extractor/amcnetworks.py | 69 +- hypervideo_dl/extractor/americastestkitchen.py | 5 +- hypervideo_dl/extractor/animelab.py | 285 ++ hypervideo_dl/extractor/anvato.py | 25 +- .../extractor/anvato_token_generator/__init__.py | 7 + .../extractor/anvato_token_generator/common.py | 6 + .../extractor/anvato_token_generator/nfl.py | 30 + hypervideo_dl/extractor/aol.py | 9 +- hypervideo_dl/extractor/apa.py | 2 +- hypervideo_dl/extractor/aparat.py | 3 +- hypervideo_dl/extractor/appleconnect.py | 13 +- hypervideo_dl/extractor/appletrailers.py | 2 +- hypervideo_dl/extractor/archiveorg.py | 427 ++- hypervideo_dl/extractor/arcpublishing.py | 11 +- hypervideo_dl/extractor/ard.py | 181 +- hypervideo_dl/extractor/arkena.py | 6 +- hypervideo_dl/extractor/arte.py | 15 +- hypervideo_dl/extractor/asiancrush.py | 4 +- hypervideo_dl/extractor/atresplayer.py | 13 +- hypervideo_dl/extractor/atvat.py | 125 +- hypervideo_dl/extractor/audius.py | 274 ++ hypervideo_dl/extractor/awaan.py | 7 +- hypervideo_dl/extractor/azmedien.py | 3 +- hypervideo_dl/extractor/baidu.py | 3 +- hypervideo_dl/extractor/bandcamp.py | 48 +- hypervideo_dl/extractor/bannedvideo.py | 158 + hypervideo_dl/extractor/bbc.py | 9 +- hypervideo_dl/extractor/beatport.py | 4 +- hypervideo_dl/extractor/beeg.py | 4 +- hypervideo_dl/extractor/behindkink.py | 3 +- hypervideo_dl/extractor/bellmedia.py | 3 +- hypervideo_dl/extractor/bet.py | 2 + hypervideo_dl/extractor/bilibili.py | 468 ++- hypervideo_dl/extractor/bitchute.py | 32 +- hypervideo_dl/extractor/bitwave.py | 61 + hypervideo_dl/extractor/blackboardcollaborate.py | 67 + hypervideo_dl/extractor/blinkx.py | 86 + hypervideo_dl/extractor/bokecc.py | 5 +- hypervideo_dl/extractor/bongacams.py | 3 +- hypervideo_dl/extractor/box.py | 3 +- hypervideo_dl/extractor/bpb.py | 2 +- hypervideo_dl/extractor/br.py | 5 +- hypervideo_dl/extractor/bravotv.py | 38 +- hypervideo_dl/extractor/breakcom.py | 3 +- hypervideo_dl/extractor/brightcove.py | 33 +- hypervideo_dl/extractor/byutv.py | 17 +- hypervideo_dl/extractor/c56.py | 3 +- hypervideo_dl/extractor/cam4.py | 32 + hypervideo_dl/extractor/cammodels.py | 2 +- hypervideo_dl/extractor/canalplus.py | 5 +- hypervideo_dl/extractor/canvas.py | 83 +- hypervideo_dl/extractor/cbc.py | 476 ++- hypervideo_dl/extractor/cbs.py | 134 +- hypervideo_dl/extractor/cbsinteractive.py | 3 +- hypervideo_dl/extractor/cbssports.py | 3 +- hypervideo_dl/extractor/ccma.py | 3 +- hypervideo_dl/extractor/cctv.py | 2 +- hypervideo_dl/extractor/cda.py | 44 +- hypervideo_dl/extractor/ceskatelevize.py | 5 +- hypervideo_dl/extractor/cgtn.py | 64 + hypervideo_dl/extractor/channel9.py | 8 +- hypervideo_dl/extractor/chilloutzone.py | 3 +- hypervideo_dl/extractor/chingari.py | 209 ++ hypervideo_dl/extractor/cinemax.py | 3 +- hypervideo_dl/extractor/ciscolive.py | 7 +- hypervideo_dl/extractor/ciscowebex.py | 90 + hypervideo_dl/extractor/cjsw.py | 3 +- hypervideo_dl/extractor/clyp.py | 7 +- hypervideo_dl/extractor/cmt.py | 6 +- hypervideo_dl/extractor/cnbc.py | 3 +- hypervideo_dl/extractor/cnn.py | 3 +- hypervideo_dl/extractor/comedycentral.py | 5 +- hypervideo_dl/extractor/common.py | 1653 ++++++--- hypervideo_dl/extractor/commonmistakes.py | 4 +- hypervideo_dl/extractor/commonprotocols.py | 14 + hypervideo_dl/extractor/condenast.py | 2 +- hypervideo_dl/extractor/corus.py | 5 +- hypervideo_dl/extractor/coub.py | 6 +- hypervideo_dl/extractor/crackle.py | 279 +- hypervideo_dl/extractor/crunchyroll.py | 133 +- hypervideo_dl/extractor/cultureunplugged.py | 3 +- hypervideo_dl/extractor/curiositystream.py | 22 +- hypervideo_dl/extractor/dailymotion.py | 10 +- hypervideo_dl/extractor/damtomo.py | 113 + hypervideo_dl/extractor/daum.py | 9 +- hypervideo_dl/extractor/dbtv.py | 2 +- hypervideo_dl/extractor/deezer.py | 127 +- hypervideo_dl/extractor/dfb.py | 3 +- hypervideo_dl/extractor/digiteka.py | 2 +- hypervideo_dl/extractor/discovery.py | 3 +- hypervideo_dl/extractor/discoverynetworks.py | 3 +- hypervideo_dl/extractor/discoveryplusindia.py | 98 + hypervideo_dl/extractor/disney.py | 5 +- hypervideo_dl/extractor/dispeak.py | 2 +- hypervideo_dl/extractor/dlive.py | 3 +- hypervideo_dl/extractor/doodstream.py | 71 + hypervideo_dl/extractor/dplay.py | 112 +- hypervideo_dl/extractor/drbonanza.py | 3 +- hypervideo_dl/extractor/dropbox.py | 4 +- hypervideo_dl/extractor/drtuber.py | 2 +- hypervideo_dl/extractor/drtv.py | 4 +- hypervideo_dl/extractor/dtube.py | 3 +- hypervideo_dl/extractor/duboku.py | 242 ++ hypervideo_dl/extractor/dw.py | 14 +- hypervideo_dl/extractor/eagleplatform.py | 2 +- hypervideo_dl/extractor/egghead.py | 19 +- hypervideo_dl/extractor/eighttracks.py | 20 +- hypervideo_dl/extractor/einthusan.py | 3 +- hypervideo_dl/extractor/elonet.py | 89 + hypervideo_dl/extractor/epicon.py | 119 + hypervideo_dl/extractor/eporner.py | 3 +- hypervideo_dl/extractor/eroprofile.py | 39 + hypervideo_dl/extractor/espn.py | 2 +- hypervideo_dl/extractor/europa.py | 4 +- hypervideo_dl/extractor/euscreen.py | 64 + hypervideo_dl/extractor/everyonesmixtape.py | 76 + hypervideo_dl/extractor/extractors.py | 320 +- hypervideo_dl/extractor/facebook.py | 151 +- hypervideo_dl/extractor/fancode.py | 187 + hypervideo_dl/extractor/fc2.py | 3 +- hypervideo_dl/extractor/filmmodu.py | 74 + hypervideo_dl/extractor/filmweb.py | 3 +- hypervideo_dl/extractor/firsttv.py | 2 +- hypervideo_dl/extractor/fivetv.py | 3 +- hypervideo_dl/extractor/flickr.py | 2 +- hypervideo_dl/extractor/fourtube.py | 4 +- hypervideo_dl/extractor/foxnews.py | 2 +- hypervideo_dl/extractor/francetv.py | 385 +- hypervideo_dl/extractor/frontendmasters.py | 2 +- hypervideo_dl/extractor/funimation.py | 408 ++- hypervideo_dl/extractor/funk.py | 3 +- hypervideo_dl/extractor/fxnetworks.py | 77 + hypervideo_dl/extractor/gab.py | 64 + hypervideo_dl/extractor/gaia.py | 3 +- hypervideo_dl/extractor/gamestar.py | 3 +- hypervideo_dl/extractor/gaskrank.py | 2 +- hypervideo_dl/extractor/gazeta.py | 3 +- hypervideo_dl/extractor/gdcvault.py | 2 +- hypervideo_dl/extractor/gedidigital.py | 57 +- hypervideo_dl/extractor/generic.py | 320 +- hypervideo_dl/extractor/gettr.py | 110 + hypervideo_dl/extractor/giantbomb.py | 3 +- hypervideo_dl/extractor/globo.py | 157 +- hypervideo_dl/extractor/go.py | 19 +- hypervideo_dl/extractor/godtube.py | 3 +- hypervideo_dl/extractor/googledrive.py | 4 +- hypervideo_dl/extractor/googlepodcasts.py | 3 +- hypervideo_dl/extractor/googlesearch.py | 28 +- hypervideo_dl/extractor/gopro.py | 110 + hypervideo_dl/extractor/gotostage.py | 73 + hypervideo_dl/extractor/gronkh.py | 43 + hypervideo_dl/extractor/hearthisat.py | 90 +- hypervideo_dl/extractor/hidive.py | 100 +- hypervideo_dl/extractor/hotstar.py | 303 +- hypervideo_dl/extractor/hrfensehen.py | 102 + hypervideo_dl/extractor/hrti.py | 5 +- hypervideo_dl/extractor/hungama.py | 58 +- hypervideo_dl/extractor/ichinanalive.py | 167 + hypervideo_dl/extractor/ign.py | 2 +- hypervideo_dl/extractor/imggaming.py | 5 +- hypervideo_dl/extractor/imgur.py | 2 +- hypervideo_dl/extractor/instagram.py | 35 +- hypervideo_dl/extractor/internetvideoarchive.py | 7 +- hypervideo_dl/extractor/iprima.py | 2 +- hypervideo_dl/extractor/iqiyi.py | 2 +- hypervideo_dl/extractor/itv.py | 173 +- hypervideo_dl/extractor/ivi.py | 33 +- hypervideo_dl/extractor/ivideon.py | 3 +- hypervideo_dl/extractor/iwara.py | 24 + hypervideo_dl/extractor/jeuxvideo.py | 3 +- hypervideo_dl/extractor/joj.py | 216 +- hypervideo_dl/extractor/jove.py | 3 +- hypervideo_dl/extractor/jwplatform.py | 11 +- hypervideo_dl/extractor/kakao.py | 121 +- hypervideo_dl/extractor/kaltura.py | 4 +- hypervideo_dl/extractor/kanalplay.py | 96 + hypervideo_dl/extractor/keezmovies.py | 4 +- hypervideo_dl/extractor/kinja.py | 2 +- hypervideo_dl/extractor/koo.py | 116 + hypervideo_dl/extractor/kusi.py | 3 +- hypervideo_dl/extractor/kuwo.py | 2 +- hypervideo_dl/extractor/la7.py | 174 +- hypervideo_dl/extractor/lbry.py | 30 +- hypervideo_dl/extractor/lecturio.py | 4 +- hypervideo_dl/extractor/leeco.py | 2 +- hypervideo_dl/extractor/lego.py | 3 +- hypervideo_dl/extractor/libsyn.py | 3 +- hypervideo_dl/extractor/lifenews.py | 2 +- hypervideo_dl/extractor/limelight.py | 11 +- hypervideo_dl/extractor/line.py | 10 +- hypervideo_dl/extractor/linkedin.py | 32 +- hypervideo_dl/extractor/linuxacademy.py | 21 +- hypervideo_dl/extractor/litv.py | 2 +- hypervideo_dl/extractor/livestream.py | 8 +- hypervideo_dl/extractor/lnkgo.py | 3 +- hypervideo_dl/extractor/localnews8.py | 3 +- hypervideo_dl/extractor/lovehomeporn.py | 3 +- hypervideo_dl/extractor/lrt.py | 3 +- hypervideo_dl/extractor/lynda.py | 6 +- hypervideo_dl/extractor/magentamusik360.py | 61 + hypervideo_dl/extractor/mailru.py | 25 +- hypervideo_dl/extractor/manoto.py | 138 + hypervideo_dl/extractor/massengeschmacktv.py | 2 +- hypervideo_dl/extractor/mdr.py | 4 +- hypervideo_dl/extractor/medaltv.py | 4 +- hypervideo_dl/extractor/mediaite.py | 93 + hypervideo_dl/extractor/mediaklikk.py | 104 + hypervideo_dl/extractor/mediaset.py | 116 +- hypervideo_dl/extractor/mediasite.py | 69 +- hypervideo_dl/extractor/metacafe.py | 4 +- hypervideo_dl/extractor/metacritic.py | 2 +- hypervideo_dl/extractor/mgoon.py | 3 +- hypervideo_dl/extractor/microsoftvirtualacademy.py | 4 +- hypervideo_dl/extractor/mildom.py | 258 ++ hypervideo_dl/extractor/minoto.py | 5 +- hypervideo_dl/extractor/mirrativ.py | 134 + hypervideo_dl/extractor/mit.py | 2 +- hypervideo_dl/extractor/mixcloud.py | 7 +- hypervideo_dl/extractor/moevideo.py | 3 +- hypervideo_dl/extractor/mojvideo.py | 3 +- hypervideo_dl/extractor/morningstar.py | 3 +- hypervideo_dl/extractor/motherless.py | 30 +- hypervideo_dl/extractor/moviezine.py | 3 +- hypervideo_dl/extractor/msn.py | 4 +- hypervideo_dl/extractor/mtv.py | 188 +- hypervideo_dl/extractor/muenchentv.py | 2 +- hypervideo_dl/extractor/musescore.py | 67 + hypervideo_dl/extractor/mxplayer.py | 222 ++ hypervideo_dl/extractor/mychannels.py | 3 +- hypervideo_dl/extractor/myspace.py | 16 +- hypervideo_dl/extractor/myvideoge.py | 56 + hypervideo_dl/extractor/n1.py | 136 + hypervideo_dl/extractor/naver.py | 85 + hypervideo_dl/extractor/nba.py | 13 +- hypervideo_dl/extractor/nbc.py | 161 +- hypervideo_dl/extractor/ndr.py | 179 +- hypervideo_dl/extractor/nebula.py | 238 ++ hypervideo_dl/extractor/neteasemusic.py | 2 +- hypervideo_dl/extractor/netzkino.py | 50 +- hypervideo_dl/extractor/newgrounds.py | 217 +- hypervideo_dl/extractor/nexx.py | 2 +- hypervideo_dl/extractor/nfhsnetwork.py | 144 + hypervideo_dl/extractor/nhk.py | 5 +- hypervideo_dl/extractor/nhl.py | 3 +- hypervideo_dl/extractor/nick.py | 85 +- hypervideo_dl/extractor/niconico.py | 660 +++- hypervideo_dl/extractor/ninecninemedia.py | 9 +- hypervideo_dl/extractor/ninenow.py | 58 +- hypervideo_dl/extractor/nitter.py | 228 ++ hypervideo_dl/extractor/noco.py | 235 ++ hypervideo_dl/extractor/nova.py | 4 +- hypervideo_dl/extractor/novaplay.py | 63 + hypervideo_dl/extractor/npo.py | 7 +- hypervideo_dl/extractor/nrk.py | 8 +- hypervideo_dl/extractor/ntvde.py | 2 +- hypervideo_dl/extractor/nuvid.py | 86 +- hypervideo_dl/extractor/nytimes.py | 10 +- hypervideo_dl/extractor/nzherald.py | 98 + hypervideo_dl/extractor/odnoklassniki.py | 5 +- hypervideo_dl/extractor/olympics.py | 56 + hypervideo_dl/extractor/on24.py | 91 + hypervideo_dl/extractor/ondemandkorea.py | 38 +- hypervideo_dl/extractor/onet.py | 4 +- hypervideo_dl/extractor/ooyala.py | 5 +- hypervideo_dl/extractor/openload.py | 3 +- hypervideo_dl/extractor/openrec.py | 126 + hypervideo_dl/extractor/ora.py | 2 +- hypervideo_dl/extractor/orf.py | 5 +- hypervideo_dl/extractor/packtpub.py | 5 +- hypervideo_dl/extractor/palcomp3.py | 7 +- hypervideo_dl/extractor/pandoratv.py | 7 +- hypervideo_dl/extractor/paramountplus.py | 145 + hypervideo_dl/extractor/parliamentliveuk.py | 76 +- hypervideo_dl/extractor/parlview.py | 68 + hypervideo_dl/extractor/patreon.py | 86 + hypervideo_dl/extractor/pbs.py | 33 +- hypervideo_dl/extractor/peertube.py | 818 ++++- hypervideo_dl/extractor/peloton.py | 222 ++ hypervideo_dl/extractor/performgroup.py | 3 +- hypervideo_dl/extractor/periscope.py | 8 +- hypervideo_dl/extractor/philharmoniedeparis.py | 2 +- hypervideo_dl/extractor/photobucket.py | 3 +- hypervideo_dl/extractor/piksel.py | 2 +- hypervideo_dl/extractor/pinterest.py | 6 +- hypervideo_dl/extractor/pladform.py | 4 +- hypervideo_dl/extractor/playfm.py | 3 +- hypervideo_dl/extractor/playplustv.py | 3 +- hypervideo_dl/extractor/playtvak.py | 2 +- hypervideo_dl/extractor/playwire.py | 3 +- hypervideo_dl/extractor/pluralsight.py | 9 +- hypervideo_dl/extractor/plutotv.py | 184 + hypervideo_dl/extractor/podomatic.py | 3 +- hypervideo_dl/extractor/pokemon.py | 73 +- hypervideo_dl/extractor/polskieradio.py | 47 +- hypervideo_dl/extractor/popcorntimes.py | 3 +- hypervideo_dl/extractor/popcorntv.py | 3 +- hypervideo_dl/extractor/porncom.py | 2 +- hypervideo_dl/extractor/pornflip.py | 82 + hypervideo_dl/extractor/pornhd.py | 3 +- hypervideo_dl/extractor/pornhub.py | 126 +- hypervideo_dl/extractor/pornovoisines.py | 3 +- hypervideo_dl/extractor/pornoxo.py | 3 +- hypervideo_dl/extractor/presstv.py | 3 +- hypervideo_dl/extractor/projectveritas.py | 55 + hypervideo_dl/extractor/prosiebensat1.py | 4 +- hypervideo_dl/extractor/pyvideo.py | 2 +- hypervideo_dl/extractor/qqmusic.py | 2 +- hypervideo_dl/extractor/radiko.py | 234 ++ hypervideo_dl/extractor/radiocanada.py | 3 +- hypervideo_dl/extractor/radiofrance.py | 4 +- hypervideo_dl/extractor/radlive.py | 179 + hypervideo_dl/extractor/rai.py | 146 +- hypervideo_dl/extractor/raywenderlich.py | 2 +- hypervideo_dl/extractor/rbmaradio.py | 3 +- hypervideo_dl/extractor/rcs.py | 427 +++ hypervideo_dl/extractor/rcti.py | 354 ++ hypervideo_dl/extractor/redbulltv.py | 5 +- hypervideo_dl/extractor/reddit.py | 32 +- hypervideo_dl/extractor/redtube.py | 3 +- hypervideo_dl/extractor/rice.py | 2 +- hypervideo_dl/extractor/rmcdecouverte.py | 29 +- hypervideo_dl/extractor/roosterteeth.py | 26 +- hypervideo_dl/extractor/roxwel.py | 3 +- hypervideo_dl/extractor/rtbf.py | 4 +- hypervideo_dl/extractor/rtl2.py | 4 +- hypervideo_dl/extractor/rtp.py | 88 +- hypervideo_dl/extractor/rts.py | 2 +- hypervideo_dl/extractor/rtve.py | 3 +- hypervideo_dl/extractor/rumble.py | 47 +- hypervideo_dl/extractor/rutube.py | 11 +- hypervideo_dl/extractor/rutv.py | 8 +- hypervideo_dl/extractor/ruutu.py | 6 +- hypervideo_dl/extractor/safari.py | 11 +- hypervideo_dl/extractor/saitosan.py | 78 + hypervideo_dl/extractor/sapo.py | 2 +- hypervideo_dl/extractor/savefrom.py | 3 +- hypervideo_dl/extractor/scrippsnetworks.py | 5 +- hypervideo_dl/extractor/seeker.py | 2 +- hypervideo_dl/extractor/senateisvp.py | 2 +- hypervideo_dl/extractor/sendtonews.py | 4 +- hypervideo_dl/extractor/sevenplus.py | 48 +- hypervideo_dl/extractor/seznamzpravy.py | 4 +- hypervideo_dl/extractor/shahid.py | 6 +- hypervideo_dl/extractor/shemaroome.py | 104 + hypervideo_dl/extractor/simplecast.py | 2 +- hypervideo_dl/extractor/sina.py | 9 +- hypervideo_dl/extractor/sixplay.py | 8 +- hypervideo_dl/extractor/skynewsau.py | 46 + hypervideo_dl/extractor/slideshare.py | 3 +- hypervideo_dl/extractor/snotr.py | 3 +- hypervideo_dl/extractor/sohu.py | 4 +- hypervideo_dl/extractor/sonyliv.py | 72 +- hypervideo_dl/extractor/soundcloud.py | 279 +- hypervideo_dl/extractor/soundgasm.py | 2 +- hypervideo_dl/extractor/southpark.py | 64 +- hypervideo_dl/extractor/sovietscloset.py | 221 ++ hypervideo_dl/extractor/spankbang.py | 32 +- hypervideo_dl/extractor/spankwire.py | 2 +- hypervideo_dl/extractor/spiegeltv.py | 17 + hypervideo_dl/extractor/sport5.py | 3 +- hypervideo_dl/extractor/sportdeutschland.py | 11 +- hypervideo_dl/extractor/springboardplatform.py | 2 +- hypervideo_dl/extractor/srgssr.py | 19 +- hypervideo_dl/extractor/stanfordoc.py | 2 +- hypervideo_dl/extractor/startv.py | 103 + hypervideo_dl/extractor/steam.py | 4 +- hypervideo_dl/extractor/streamable.py | 8 +- hypervideo_dl/extractor/streamanity.py | 51 + hypervideo_dl/extractor/streamcloud.py | 4 +- hypervideo_dl/extractor/stv.py | 3 +- hypervideo_dl/extractor/svt.py | 10 +- hypervideo_dl/extractor/tagesschau.py | 4 +- hypervideo_dl/extractor/tastytrade.py | 43 + hypervideo_dl/extractor/tbs.py | 11 +- hypervideo_dl/extractor/teachable.py | 4 +- hypervideo_dl/extractor/teachertube.py | 2 +- hypervideo_dl/extractor/techtalks.py | 2 +- hypervideo_dl/extractor/tele13.py | 2 +- hypervideo_dl/extractor/tele5.py | 4 +- hypervideo_dl/extractor/telemb.py | 4 +- hypervideo_dl/extractor/telemundo.py | 58 + hypervideo_dl/extractor/tennistv.py | 10 +- hypervideo_dl/extractor/tenplay.py | 88 +- hypervideo_dl/extractor/testurl.py | 2 +- hypervideo_dl/extractor/tf1.py | 3 +- hypervideo_dl/extractor/theplatform.py | 13 +- hypervideo_dl/extractor/theta.py | 87 + hypervideo_dl/extractor/theweatherchannel.py | 3 +- hypervideo_dl/extractor/thisav.py | 3 +- hypervideo_dl/extractor/threeqsdn.py | 24 +- hypervideo_dl/extractor/tiktok.py | 602 +++- hypervideo_dl/extractor/tinypic.py | 2 +- hypervideo_dl/extractor/tmz.py | 240 +- hypervideo_dl/extractor/tnaflix.py | 2 +- hypervideo_dl/extractor/toggle.py | 10 +- hypervideo_dl/extractor/tokentube.py | 152 + hypervideo_dl/extractor/toongoggles.py | 3 +- hypervideo_dl/extractor/toutv.py | 2 +- hypervideo_dl/extractor/traileraddict.py | 2 +- hypervideo_dl/extractor/trovo.py | 73 +- hypervideo_dl/extractor/trutv.py | 3 +- hypervideo_dl/extractor/tubitv.py | 43 +- hypervideo_dl/extractor/tumblr.py | 3 +- hypervideo_dl/extractor/turbo.py | 2 +- hypervideo_dl/extractor/turner.py | 7 +- hypervideo_dl/extractor/tv2.py | 136 +- hypervideo_dl/extractor/tv2hu.py | 132 +- hypervideo_dl/extractor/tv4.py | 31 +- hypervideo_dl/extractor/tv5mondeplus.py | 40 +- hypervideo_dl/extractor/tv5unis.py | 3 +- hypervideo_dl/extractor/tver.py | 3 +- hypervideo_dl/extractor/tvigle.py | 3 +- hypervideo_dl/extractor/tvland.py | 7 +- hypervideo_dl/extractor/tvnow.py | 172 +- hypervideo_dl/extractor/tvp.py | 2 +- hypervideo_dl/extractor/tvplay.py | 48 +- hypervideo_dl/extractor/twentyfourvideo.py | 3 +- hypervideo_dl/extractor/twentythreevideo.py | 3 +- hypervideo_dl/extractor/twitcasting.py | 111 +- hypervideo_dl/extractor/twitch.py | 10 +- hypervideo_dl/extractor/twitter.py | 46 +- hypervideo_dl/extractor/udemy.py | 2 +- hypervideo_dl/extractor/ukcolumn.py | 72 + hypervideo_dl/extractor/umg.py | 10 +- hypervideo_dl/extractor/unistra.py | 2 +- hypervideo_dl/extractor/uol.py | 1 - hypervideo_dl/extractor/uplynk.py | 5 +- hypervideo_dl/extractor/urort.py | 2 +- hypervideo_dl/extractor/urplay.py | 17 +- hypervideo_dl/extractor/usanetwork.py | 2 +- hypervideo_dl/extractor/ustream.py | 4 +- hypervideo_dl/extractor/ustudio.py | 5 +- hypervideo_dl/extractor/utreon.py | 85 + hypervideo_dl/extractor/varzesh3.py | 7 +- hypervideo_dl/extractor/veo.py | 74 + hypervideo_dl/extractor/vesti.py | 2 +- hypervideo_dl/extractor/vevo.py | 140 +- hypervideo_dl/extractor/vgtv.py | 2 +- hypervideo_dl/extractor/vh1.py | 27 +- hypervideo_dl/extractor/vice.py | 6 +- hypervideo_dl/extractor/viddler.py | 3 +- hypervideo_dl/extractor/videa.py | 54 +- hypervideo_dl/extractor/videomore.py | 12 +- hypervideo_dl/extractor/vidio.py | 234 +- hypervideo_dl/extractor/vidzi.py | 68 + hypervideo_dl/extractor/vier.py | 4 +- hypervideo_dl/extractor/viewlift.py | 6 +- hypervideo_dl/extractor/viidea.py | 2 +- hypervideo_dl/extractor/viki.py | 328 +- hypervideo_dl/extractor/vimeo.py | 368 +- hypervideo_dl/extractor/vine.py | 4 +- hypervideo_dl/extractor/viu.py | 151 +- hypervideo_dl/extractor/vk.py | 4 +- hypervideo_dl/extractor/vlive.py | 77 +- hypervideo_dl/extractor/voicy.py | 147 + hypervideo_dl/extractor/voot.py | 58 +- hypervideo_dl/extractor/vrt.py | 11 +- hypervideo_dl/extractor/vrv.py | 3 +- hypervideo_dl/extractor/vube.py | 10 +- hypervideo_dl/extractor/vupload.py | 51 + hypervideo_dl/extractor/vvvvid.py | 4 +- hypervideo_dl/extractor/vzaar.py | 2 +- hypervideo_dl/extractor/wakanim.py | 14 +- hypervideo_dl/extractor/walla.py | 2 +- hypervideo_dl/extractor/wat.py | 16 +- hypervideo_dl/extractor/watchbox.py | 3 +- hypervideo_dl/extractor/watchindianporn.py | 2 +- hypervideo_dl/extractor/wdr.py | 17 +- hypervideo_dl/extractor/whowatch.py | 99 + hypervideo_dl/extractor/wimtv.py | 163 + hypervideo_dl/extractor/wistia.py | 2 +- hypervideo_dl/extractor/xboxclips.py | 7 +- hypervideo_dl/extractor/xfileshare.py | 2 +- hypervideo_dl/extractor/xhamster.py | 9 +- hypervideo_dl/extractor/ximalaya.py | 2 +- hypervideo_dl/extractor/xnxx.py | 2 +- hypervideo_dl/extractor/xstream.py | 4 +- hypervideo_dl/extractor/xtube.py | 18 +- hypervideo_dl/extractor/xxxymovies.py | 3 +- hypervideo_dl/extractor/yahoo.py | 63 +- hypervideo_dl/extractor/yandexdisk.py | 3 +- hypervideo_dl/extractor/yandexmusic.py | 13 +- hypervideo_dl/extractor/yandexvideo.py | 88 + hypervideo_dl/extractor/youjizz.py | 3 +- hypervideo_dl/extractor/youku.py | 2 +- hypervideo_dl/extractor/youporn.py | 2 +- hypervideo_dl/extractor/youtube.py | 3668 ++++++++++++++------ hypervideo_dl/extractor/zapiks.py | 2 +- hypervideo_dl/extractor/zaq1.py | 101 + hypervideo_dl/extractor/zattoo.py | 6 +- hypervideo_dl/extractor/zdf.py | 42 +- hypervideo_dl/extractor/zee5.py | 244 ++ hypervideo_dl/extractor/zingmp3.py | 5 +- hypervideo_dl/extractor/zoom.py | 15 +- hypervideo_dl/extractor/zype.py | 7 +- hypervideo_dl/minicurses.py | 109 + hypervideo_dl/options.py | 1234 +++++-- hypervideo_dl/postprocessor/__init__.py | 43 +- hypervideo_dl/postprocessor/common.py | 125 +- hypervideo_dl/postprocessor/embedthumbnail.py | 279 +- hypervideo_dl/postprocessor/exec.py | 42 + hypervideo_dl/postprocessor/ffmpeg.py | 732 +++- hypervideo_dl/postprocessor/metadataparser.py | 116 + hypervideo_dl/postprocessor/modify_chapters.py | 336 ++ .../postprocessor/movefilesafterdownload.py | 54 + hypervideo_dl/postprocessor/sponskrub.py | 96 + hypervideo_dl/postprocessor/sponsorblock.py | 96 + hypervideo_dl/postprocessor/xattrpp.py | 13 +- hypervideo_dl/utils.py | 1053 +++++- hypervideo_dl/version.py | 2 +- hypervideo_dl/webvtt.py | 402 +++ 545 files changed, 33635 insertions(+), 11270 deletions(-) create mode 100644 hypervideo_dl/cookies.py create mode 100644 hypervideo_dl/downloader/mhtml.py create mode 100644 hypervideo_dl/downloader/niconico.py create mode 100644 hypervideo_dl/downloader/websocket.py create mode 100644 hypervideo_dl/downloader/youtube_live_chat.py create mode 100644 hypervideo_dl/extractor/alura.py create mode 100644 hypervideo_dl/extractor/animelab.py create mode 100644 hypervideo_dl/extractor/anvato_token_generator/__init__.py create mode 100644 hypervideo_dl/extractor/anvato_token_generator/common.py create mode 100644 hypervideo_dl/extractor/anvato_token_generator/nfl.py create mode 100644 hypervideo_dl/extractor/audius.py create mode 100644 hypervideo_dl/extractor/bannedvideo.py create mode 100644 hypervideo_dl/extractor/bitwave.py create mode 100644 hypervideo_dl/extractor/blackboardcollaborate.py create mode 100644 hypervideo_dl/extractor/blinkx.py create mode 100644 hypervideo_dl/extractor/cam4.py create mode 100644 hypervideo_dl/extractor/cgtn.py create mode 100644 hypervideo_dl/extractor/chingari.py create mode 100644 hypervideo_dl/extractor/ciscowebex.py create mode 100644 hypervideo_dl/extractor/damtomo.py create mode 100644 hypervideo_dl/extractor/discoveryplusindia.py create mode 100644 hypervideo_dl/extractor/doodstream.py create mode 100644 hypervideo_dl/extractor/duboku.py create mode 100644 hypervideo_dl/extractor/elonet.py create mode 100644 hypervideo_dl/extractor/epicon.py create mode 100644 hypervideo_dl/extractor/euscreen.py create mode 100644 hypervideo_dl/extractor/everyonesmixtape.py create mode 100644 hypervideo_dl/extractor/fancode.py create mode 100644 hypervideo_dl/extractor/filmmodu.py create mode 100644 hypervideo_dl/extractor/fxnetworks.py create mode 100644 hypervideo_dl/extractor/gab.py create mode 100644 hypervideo_dl/extractor/gettr.py create mode 100644 hypervideo_dl/extractor/gopro.py create mode 100644 hypervideo_dl/extractor/gotostage.py create mode 100644 hypervideo_dl/extractor/gronkh.py create mode 100644 hypervideo_dl/extractor/hrfensehen.py create mode 100644 hypervideo_dl/extractor/ichinanalive.py create mode 100644 hypervideo_dl/extractor/kanalplay.py create mode 100644 hypervideo_dl/extractor/koo.py create mode 100644 hypervideo_dl/extractor/magentamusik360.py create mode 100644 hypervideo_dl/extractor/manoto.py create mode 100644 hypervideo_dl/extractor/mediaite.py create mode 100644 hypervideo_dl/extractor/mediaklikk.py create mode 100644 hypervideo_dl/extractor/mildom.py create mode 100644 hypervideo_dl/extractor/mirrativ.py create mode 100644 hypervideo_dl/extractor/musescore.py create mode 100644 hypervideo_dl/extractor/mxplayer.py create mode 100644 hypervideo_dl/extractor/myvideoge.py create mode 100644 hypervideo_dl/extractor/n1.py create mode 100644 hypervideo_dl/extractor/nebula.py create mode 100644 hypervideo_dl/extractor/nfhsnetwork.py create mode 100644 hypervideo_dl/extractor/nitter.py create mode 100644 hypervideo_dl/extractor/noco.py create mode 100644 hypervideo_dl/extractor/novaplay.py create mode 100644 hypervideo_dl/extractor/nzherald.py create mode 100644 hypervideo_dl/extractor/olympics.py create mode 100644 hypervideo_dl/extractor/on24.py create mode 100644 hypervideo_dl/extractor/openrec.py create mode 100644 hypervideo_dl/extractor/paramountplus.py create mode 100644 hypervideo_dl/extractor/parlview.py create mode 100644 hypervideo_dl/extractor/peloton.py create mode 100644 hypervideo_dl/extractor/plutotv.py create mode 100644 hypervideo_dl/extractor/pornflip.py create mode 100644 hypervideo_dl/extractor/projectveritas.py create mode 100644 hypervideo_dl/extractor/radiko.py create mode 100644 hypervideo_dl/extractor/radlive.py create mode 100644 hypervideo_dl/extractor/rcs.py create mode 100644 hypervideo_dl/extractor/rcti.py create mode 100644 hypervideo_dl/extractor/saitosan.py create mode 100644 hypervideo_dl/extractor/shemaroome.py create mode 100644 hypervideo_dl/extractor/skynewsau.py create mode 100644 hypervideo_dl/extractor/sovietscloset.py create mode 100644 hypervideo_dl/extractor/spiegeltv.py create mode 100644 hypervideo_dl/extractor/startv.py create mode 100644 hypervideo_dl/extractor/streamanity.py create mode 100644 hypervideo_dl/extractor/tastytrade.py create mode 100644 hypervideo_dl/extractor/telemundo.py create mode 100644 hypervideo_dl/extractor/theta.py create mode 100644 hypervideo_dl/extractor/tokentube.py create mode 100644 hypervideo_dl/extractor/ukcolumn.py create mode 100644 hypervideo_dl/extractor/utreon.py create mode 100644 hypervideo_dl/extractor/veo.py create mode 100644 hypervideo_dl/extractor/vidzi.py create mode 100644 hypervideo_dl/extractor/voicy.py create mode 100644 hypervideo_dl/extractor/vupload.py create mode 100644 hypervideo_dl/extractor/whowatch.py create mode 100644 hypervideo_dl/extractor/wimtv.py create mode 100644 hypervideo_dl/extractor/zaq1.py create mode 100644 hypervideo_dl/extractor/zee5.py create mode 100644 hypervideo_dl/minicurses.py create mode 100644 hypervideo_dl/postprocessor/exec.py create mode 100644 hypervideo_dl/postprocessor/metadataparser.py create mode 100644 hypervideo_dl/postprocessor/modify_chapters.py create mode 100644 hypervideo_dl/postprocessor/movefilesafterdownload.py create mode 100644 hypervideo_dl/postprocessor/sponskrub.py create mode 100644 hypervideo_dl/postprocessor/sponsorblock.py create mode 100644 hypervideo_dl/webvtt.py (limited to 'hypervideo_dl') diff --git a/hypervideo_dl/YoutubeDL.py b/hypervideo_dl/YoutubeDL.py index d8621ed..5b5a0d7 100755 --- a/hypervideo_dl/YoutubeDL.py +++ b/hypervideo_dl/YoutubeDL.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # coding: utf-8 from __future__ import absolute_import, unicode_literals @@ -9,6 +9,7 @@ import copy import datetime import errno import fileinput +import functools import io import itertools import json @@ -19,29 +20,32 @@ import platform import re import shutil import subprocess -import socket import sys +import tempfile import time import tokenize import traceback import random +import unicodedata from string import ascii_letters from .compat import ( compat_basestring, - compat_cookiejar, compat_get_terminal_size, - compat_http_client, compat_kwargs, compat_numeric_types, compat_os_name, + compat_pycrypto_AES, + compat_shlex_quote, compat_str, compat_tokenize_tokenize, compat_urllib_error, compat_urllib_request, compat_urllib_request_DataHandler, + windows_enable_vt_mode, ) +from .cookies import load_cookies from .utils import ( age_restricted, args_to_str, @@ -51,21 +55,34 @@ from .utils import ( DEFAULT_OUTTMPL, determine_ext, determine_protocol, + DOT_DESKTOP_LINK_TEMPLATE, + DOT_URL_LINK_TEMPLATE, + DOT_WEBLOC_LINK_TEMPLATE, DownloadError, encode_compat_str, encodeFilename, + EntryNotInPlaylist, error_to_compat_str, + ExistingVideoReached, expand_path, ExtractorError, + float_or_none, format_bytes, + format_field, formatSeconds, GeoRestrictedError, + HEADRequest, int_or_none, + iri_to_uri, ISO3166Utils, + LazyList, locked_file, + make_dir, make_HTTPS_handler, MaxDownloadsReached, + network_exceptions, orderedSet, + OUTTMPL_TYPES, PagedList, parse_filesize, PerRequestProxyHandler, @@ -73,7 +90,9 @@ from .utils import ( PostProcessingError, preferredencoding, prepend_extension, + process_communicate_or_kill, register_socks_protocols, + RejectedVideoReached, render_table, replace_extension, SameFileError, @@ -82,30 +101,53 @@ from .utils import ( sanitize_url, sanitized_Request, std_headers, + STR_FORMAT_RE_TMPL, + STR_FORMAT_TYPES, str_or_none, + strftime_or_none, subtitles_filename, + supports_terminal_sequences, + TERMINAL_SEQUENCES, + ThrottledDownload, + to_high_limit_path, + traverse_obj, + try_get, UnavailableVideoError, url_basename, + variadic, version_tuple, write_json_file, write_string, - YoutubeDLCookieJar, YoutubeDLCookieProcessor, YoutubeDLHandler, YoutubeDLRedirectHandler, ) from .cache import Cache -from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER +from .extractor import ( + gen_extractor_classes, + get_info_extractor, + _LAZY_LOADER, + _PLUGIN_CLASSES as plugin_extractors +) from .extractor.openload import PhantomJSwrapper -from .downloader import get_suitable_downloader +from .downloader import ( + FFmpegFD, + get_suitable_downloader, + shorten_protocol_name +) from .downloader.rtmp import rtmpdump_version from .postprocessor import ( + get_postprocessor, + EmbedThumbnailPP, + FFmpegFixupDurationPP, FFmpegFixupM3u8PP, FFmpegFixupM4aPP, FFmpegFixupStretchedPP, + FFmpegFixupTimestampPP, FFmpegMergerPP, FFmpegPostProcessor, - get_postprocessor, + MoveFilesAfterDownloadPP, + _PLUGIN_CLASSES as plugin_postprocessors ) from .version import __version__ @@ -150,24 +192,57 @@ class YoutubeDL(object): verbose: Print additional info to stdout. quiet: Do not print messages to stdout. no_warnings: Do not print out anything for warnings. - forceurl: Force printing final URL. - forcetitle: Force printing title. - forceid: Force printing ID. - forcethumbnail: Force printing thumbnail URL. - forcedescription: Force printing description. - forcefilename: Force printing final filename. - forceduration: Force printing duration. + forceprint: A list of templates to force print + forceurl: Force printing final URL. (Deprecated) + forcetitle: Force printing title. (Deprecated) + forceid: Force printing ID. (Deprecated) + forcethumbnail: Force printing thumbnail URL. (Deprecated) + forcedescription: Force printing description. (Deprecated) + forcefilename: Force printing final filename. (Deprecated) + forceduration: Force printing duration. (Deprecated) forcejson: Force printing info_dict as JSON. dump_single_json: Force printing the info_dict of the whole playlist (or video) as a single JSON line. - simulate: Do not download the video files. - format: Video format code. See options.py for more information. - outtmpl: Template for output names. + force_write_download_archive: Force writing download archive regardless + of 'skip_download' or 'simulate'. + simulate: Do not download the video files. If unset (or None), + simulate only if listsubtitles, listformats or list_thumbnails is used + format: Video format code. see "FORMAT SELECTION" for more details. + allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded. + ignore_no_formats_error: Ignore "No video formats" error. Usefull for + extracting metadata even if the video is not actually + available for download (experimental) + format_sort: How to sort the video formats. see "Sorting Formats" + for more details. + format_sort_force: Force the given format_sort. see "Sorting Formats" + for more details. + allow_multiple_video_streams: Allow multiple video streams to be merged + into a single file + allow_multiple_audio_streams: Allow multiple audio streams to be merged + into a single file + check_formats Whether to test if the formats are downloadable. + Can be True (check all), False (check none) + or None (check only if requested by extractor) + paths: Dictionary of output paths. The allowed keys are 'home' + 'temp' and the keys of OUTTMPL_TYPES (in utils.py) + outtmpl: Dictionary of templates for output names. Allowed keys + are 'default' and the keys of OUTTMPL_TYPES (in utils.py). + For compatibility with youtube-dl, a single string can also be used outtmpl_na_placeholder: Placeholder for unavailable meta fields. restrictfilenames: Do not allow "&" and spaces in file names - ignoreerrors: Do not stop on download errors. + trim_file_name: Limit length of filename (extension excluded) + windowsfilenames: Force the filenames to be windows compatible + ignoreerrors: Do not stop on download/postprocessing errors. + Can be 'only_download' to ignore only download errors. + Default is 'only_download' for CLI, but False for API + skip_playlist_after_errors: Number of allowed failures until the rest of + the playlist is skipped force_generic_extractor: Force downloader to use the generic extractor - nooverwrites: Prevent overwriting files. + overwrites: Overwrite all video and metadata files if True, + overwrite only non-video files if None + and don't overwrite any file if False + For compatibility with youtube-dl, + "nooverwrites" may also be used instead playliststart: Playlist item to start at. playlistend: Playlist item to end at. playlist_items: Specific indices of playlist to download. @@ -177,18 +252,33 @@ class YoutubeDL(object): rejecttitle: Reject downloads for matching titles. logger: Log messages to a logging.Logger instance. logtostderr: Log messages to stderr instead of stdout. + consoletitle: Display progress in console window's titlebar. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file + clean_infojson: Remove private fields from the infojson + getcomments: Extract video comments. This will not be written to disk + unless writeinfojson is also given writeannotations: Write the video annotations to a .annotations.xml file writethumbnail: Write the thumbnail image to a file + allow_playlist_files: Whether to write playlists' description, infojson etc + also to disk when using the 'write*' options write_all_thumbnails: Write all thumbnail formats to files + writelink: Write an internet shortcut file, depending on the + current platform (.url/.webloc/.desktop) + writeurllink: Write a Windows internet shortcut file (.url) + writewebloclink: Write a macOS internet shortcut file (.webloc) + writedesktoplink: Write a Linux internet shortcut file (.desktop) writesubtitles: Write the video subtitles to a file writeautomaticsub: Write the automatically generated subtitles to a file - allsubtitles: Downloads all the subtitles of the video + allsubtitles: Deprecated - Use subtitleslangs = ['all'] + Downloads all the subtitles of the video (requires writesubtitles or writeautomaticsub) listsubtitles: Lists all available subtitles for the video subtitlesformat: The format code for subtitles - subtitleslangs: List of languages of the subtitles to download + subtitleslangs: List of languages of the subtitles to download (can be regex). + The list may contain "all" to refer to all the available + subtitles. The language can be prefixed with a "-" to + exclude it from the requested languages. Eg: ['all', '-live_chat'] keepvideo: Keep the video file after post-processing daterange: A DateRange object, download only if the upload_date is in the range. skip_download: Skip the actual download of the video file @@ -209,7 +299,14 @@ class YoutubeDL(object): download_archive: File name of a file where all downloads are recorded. Videos already present in the file are not downloaded again. - cookiefile: File name where cookies should be read from and dumped to. + break_on_existing: Stop the download process after attempting to download a + file that is in the archive. + break_on_reject: Stop the download process when encountering a video that + has been filtered out. + cookiefile: File name where cookies should be read from and dumped to + cookiesfrombrowser: A tuple containing the name of the browser and the profile + name/path from where cookies are loaded. + Eg: ('chrome', ) or (vivaldi, 'default') nocheckcertificate:Do not verify SSL certificates prefer_insecure: Use HTTP instead of HTTPS to retrieve information. At the moment, this is only supported by YouTube. @@ -230,12 +327,18 @@ class YoutubeDL(object): postprocessors: A list of dictionaries, each with an entry * key: The name of the postprocessor. See hypervideo_dl/postprocessor/__init__.py for a list. - as well as any further keyword arguments for the - postprocessor. + * when: When to run the postprocessor. Can be one of + pre_process|before_dl|post_process|after_move. + Assumed to be 'post_process' if not given + post_hooks: Deprecated - Register a custom postprocessor instead + A list of functions that get called as the final step + for each video file, after all postprocessors have been + called. The filename will be passed as the only argument. progress_hooks: A list of functions that get called on download progress, with a dictionary with the entries * status: One of "downloading", "error", or "finished". Check this first and ignore unknown values. + * info_dict: The extracted info_dict If status is one of "downloading", or "finished", the following properties may also be present: @@ -256,7 +359,19 @@ class YoutubeDL(object): Progress hooks are guaranteed to be called at least once (with status "finished") if the download is successful. + postprocessor_hooks: A list of functions that get called on postprocessing + progress, with a dictionary with the entries + * status: One of "started", "processing", or "finished". + Check this first and ignore unknown values. + * postprocessor: Name of the postprocessor + * info_dict: The extracted info_dict + + Progress hooks are guaranteed to be called at least twice + (with status "started" and "finished") if the processing is successful. merge_output_format: Extension to use when merging formats. + final_ext: Expected final extension; used to detect when the file was + already downloaded and converted. "merge_output_format" is + replaced by this extension when given fixup: Automatically correct known faults of the file. One of: - "never": do nothing @@ -265,7 +380,9 @@ class YoutubeDL(object): about it, warn otherwise (default) source_address: Client-side IP address to bind to. call_home: Boolean, true iff we are allowed to contact the - hypervideo servers for debugging. + hypervideo servers for debugging. (BROKEN) + sleep_interval_requests: Number of seconds to sleep between requests + during extraction sleep_interval: Number of seconds to sleep before each download when used alone or a lower bound of a range for randomized sleep before each download (minimum possible number @@ -276,6 +393,7 @@ class YoutubeDL(object): Must only be used along with sleep_interval. Actual sleep time will be a random float from range [sleep_interval; max_sleep_interval]. + sleep_interval_subtitles: Number of seconds to sleep before each subtitle download listformats: Print an overview of available video formats and exit. list_thumbnails: Print a table of all thumbnails and exit. match_filter: A function that gets called with the info_dict of @@ -295,48 +413,86 @@ class YoutubeDL(object): geo_bypass_country The following options determine which downloader is picked: - external_downloader: Executable of the external downloader to call. - None or unset for standard (built-in) downloader. - hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv + external_downloader: A dictionary of protocol keys and the executable of the + external downloader to use for it. The allowed protocols + are default|http|ftp|m3u8|dash|rtsp|rtmp|mms. + Set the value to 'native' to use the native downloader + hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'} + or {'m3u8': 'ffmpeg'} instead. + Use the native HLS downloader instead of ffmpeg/avconv if True, otherwise use ffmpeg/avconv if False, otherwise use downloader suggested by extractor if None. + compat_opts: Compatibility options. See "Differences in default behavior". + The following options do not work when used through the API: + filename, abort-on-error, multistreams, no-live-chat, format-sort + no-clean-infojson, no-playlist-metafiles, no-keep-subs. + Refer __init__.py for their implementation + progress_template: Dictionary of templates for progress outputs. + Allowed keys are 'download', 'postprocess', + 'download-title' (console title) and 'postprocess-title'. + The template is mapped on a dictionary with keys 'progress' and 'info' The following parameters are not used by YoutubeDL itself, they are used by the downloader (see hypervideo_dl/downloader/common.py): - nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, - noresizebuffer, retries, continuedl, noprogress, consoletitle, - xattr_set_filesize, external_downloader_args, hls_use_mpegts, - http_chunk_size. + nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize, + max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl, + noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size, + external_downloader_args. The following options are used by the post processors: prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available, - otherwise prefer ffmpeg. + otherwise prefer ffmpeg. (avconv support is deprecated) ffmpeg_location: Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory. - postprocessor_args: A list of additional command-line arguments for the - postprocessor. - - The following options are used by the Youtube extractor: - youtube_include_dash_manifest: If True (default), DASH manifests and related - data will be downloaded and processed by extractor. - You can reduce network I/O by disabling it if you don't - care about DASH. + postprocessor_args: A dictionary of postprocessor/executable keys (in lower case) + and a list of additional command-line arguments for the + postprocessor/executable. The dict can also have "PP+EXE" keys + which are used when the given exe is used by the given PP. + Use 'default' as the name for arguments to passed to all PP + For compatibility with youtube-dl, a single list of args + can also be used + + The following options are used by the extractors: + extractor_retries: Number of times to retry for known errors + dynamic_mpd: Whether to process dynamic DASH manifests (default: True) + hls_split_discontinuity: Split HLS playlists to different formats at + discontinuities such as ad breaks (default: False) + extractor_args: A dictionary of arguments to be passed to the extractors. + See "EXTRACTOR ARGUMENTS" for details. + Eg: {'youtube': {'skip': ['dash', 'hls']}} + youtube_include_dash_manifest: Deprecated - Use extractor_args instead. + If True (default), DASH manifests and related + data will be downloaded and processed by extractor. + You can reduce network I/O by disabling it if you don't + care about DASH. (only for youtube) + youtube_include_hls_manifest: Deprecated - Use extractor_args instead. + If True (default), HLS manifests and related + data will be downloaded and processed by extractor. + You can reduce network I/O by disabling it if you don't + care about HLS. (only for youtube) """ _NUMERIC_FIELDS = set(( 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', - 'timestamp', 'upload_year', 'upload_month', 'upload_day', + 'timestamp', 'release_timestamp', 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', 'average_rating', 'comment_count', 'age_limit', 'start_time', 'end_time', 'chapter_number', 'season_number', 'episode_number', 'track_number', 'disc_number', 'release_year', - 'playlist_index', )) + _format_selection_exts = { + 'audio': {'m4a', 'mp3', 'ogg', 'aac'}, + 'video': {'mp4', 'flv', 'webm', '3gp'}, + 'storyboards': {'mhtml'}, + } + params = None - _ies = [] - _pps = [] + _ies = {} + _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []} + _printed_messages = set() + _first_webpage_request = True _download_retcode = None _num_downloads = None _playlist_level = 0 @@ -344,28 +500,45 @@ class YoutubeDL(object): _screen_file = None def __init__(self, params=None, auto_init=True): - """Create a FileDownloader object with the given options.""" + """Create a FileDownloader object with the given options. + @param auto_init Whether to load the default extractors and print header (if verbose). + Set to 'no_verbose_header' to not ptint the header + """ if params is None: params = {} - self._ies = [] + self._ies = {} self._ies_instances = {} - self._pps = [] + self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []} + self._printed_messages = set() + self._first_webpage_request = True + self._post_hooks = [] self._progress_hooks = [] + self._postprocessor_hooks = [] self._download_retcode = 0 self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self._err_file = sys.stderr - self.params = { - # Default parameters - 'nocheckcertificate': False, - } - self.params.update(params) + self.params = params self.cache = Cache(self) + windows_enable_vt_mode() + # FIXME: This will break if we ever print color to stdout + self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file) + + if sys.version_info < (3, 6): + self.report_warning( + 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2]) + + if self.params.get('allow_unplayable_formats'): + self.report_warning( + f'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. ' + 'This is a developer option intended for debugging. \n' + ' If you experience any issues while using this option, ' + f'{self._color_text("DO NOT", "red")} open a bug report') + def check_deprecated(param, option, suggestion): if self.params.get(param) is not None: - self.report_warning( - '%s is deprecated. Use %s instead.' % (option, suggestion)) + self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion)) return True return False @@ -373,9 +546,22 @@ class YoutubeDL(object): if self.params.get('geo_verification_proxy') is None: self.params['geo_verification_proxy'] = self.params['cn_verification_proxy'] - check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits') check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"') check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') + check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"') + + for msg in self.params.get('warnings', []): + self.report_warning(msg) + + if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None: + # nooverwrites was unnecessarily changed to overwrites + # in 0c3d0f51778b153f65c21906031c2e091fcfb641 + # This ensures compatibility with both keys + self.params['overwrites'] = not self.params['nooverwrites'] + elif self.params.get('overwrites') is None: + self.params.pop('overwrites', None) + else: + self.params['nooverwrites'] = not self.params['overwrites'] if params.get('bidi_workaround', False): try: @@ -414,29 +600,53 @@ class YoutubeDL(object): 'Set the LC_ALL environment variable to fix this.') self.params['restrictfilenames'] = True - if isinstance(params.get('outtmpl'), bytes): - self.report_warning( - 'Parameter outtmpl is bytes, but should be a unicode string. ' - 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.') + self.outtmpl_dict = self.parse_outtmpl() + + # Creating format selector here allows us to catch syntax errors before the extraction + self.format_selector = ( + None if self.params.get('format') is None + else self.build_format_selector(self.params['format'])) self._setup_opener() if auto_init: - self.print_debug_header() + if auto_init != 'no_verbose_header': + self.print_debug_header() self.add_default_info_extractors() for pp_def_raw in self.params.get('postprocessors', []): - pp_class = get_postprocessor(pp_def_raw['key']) pp_def = dict(pp_def_raw) - del pp_def['key'] + when = pp_def.pop('when', 'post_process') + pp_class = get_postprocessor(pp_def.pop('key')) pp = pp_class(self, **compat_kwargs(pp_def)) - self.add_post_processor(pp) + self.add_post_processor(pp, when=when) + + for ph in self.params.get('post_hooks', []): + self.add_post_hook(ph) for ph in self.params.get('progress_hooks', []): self.add_progress_hook(ph) register_socks_protocols() + def preload_download_archive(fn): + """Preload the archive, if any is specified""" + if fn is None: + return False + self.write_debug('Loading archive file %r\n' % fn) + try: + with locked_file(fn, 'r', encoding='utf-8') as archive_file: + for line in archive_file: + self.archive.add(line.strip()) + except IOError as ioe: + if ioe.errno != errno.ENOENT: + raise + return False + return True + + self.archive = set() + preload_download_archive(self.params.get('download_archive')) + def warn_if_short_id(self, argv): # short YouTube ID starting with dash? idxs = [ @@ -455,11 +665,19 @@ class YoutubeDL(object): def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" - self._ies.append(ie) + ie_key = ie.ie_key() + self._ies[ie_key] = ie if not isinstance(ie, type): - self._ies_instances[ie.ie_key()] = ie + self._ies_instances[ie_key] = ie ie.set_downloader(self) + def _get_info_extractor_class(self, ie_key): + ie = self._ies.get(ie_key) + if ie is None: + ie = get_info_extractor(ie_key) + self.add_info_extractor(ie) + return ie + def get_info_extractor(self, ie_key): """ Get an instance of an IE with name ie_key, it will try to get one from @@ -479,15 +697,23 @@ class YoutubeDL(object): for ie in gen_extractor_classes(): self.add_info_extractor(ie) - def add_post_processor(self, pp): + def add_post_processor(self, pp, when='post_process'): """Add a PostProcessor object to the end of the chain.""" - self._pps.append(pp) + self._pps[when].append(pp) pp.set_downloader(self) + def add_post_hook(self, ph): + """Add the post hook""" + self._post_hooks.append(ph) + def add_progress_hook(self, ph): - """Add the progress hook (currently only for the file downloader)""" + """Add the download progress hook""" self._progress_hooks.append(ph) + def add_postprocessor_hook(self, ph): + """Add the postprocessing progress hook""" + self._postprocessor_hooks.append(ph) + def _bidi_workaround(self, message): if not hasattr(self, '_output_channel'): return message @@ -501,33 +727,29 @@ class YoutubeDL(object): for _ in range(line_count)) return res[:-len('\n')] - def to_screen(self, message, skip_eol=False): - """Print message to stdout if not in quiet mode.""" - return self.to_stdout(message, skip_eol, check_quiet=True) - - def _write_string(self, s, out=None): - write_string(s, out=out, encoding=self.params.get('encoding')) + def _write_string(self, message, out=None, only_once=False): + if only_once: + if message in self._printed_messages: + return + self._printed_messages.add(message) + write_string(message, out=out, encoding=self.params.get('encoding')) - def to_stdout(self, message, skip_eol=False, check_quiet=False): - """Print message to stdout if not in quiet mode.""" + def to_stdout(self, message, skip_eol=False, quiet=False): + """Print message to stdout""" if self.params.get('logger'): self.params['logger'].debug(message) - elif not check_quiet or not self.params.get('quiet', False): - message = self._bidi_workaround(message) - terminator = ['\n', ''][skip_eol] - output = message + terminator - - self._write_string(output, self._screen_file) + elif not quiet or self.params.get('verbose'): + self._write_string( + '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), + self._err_file if quiet else self._screen_file) - def to_stderr(self, message): - """Print message to stderr.""" + def to_stderr(self, message, only_once=False): + """Print message to stderr""" assert isinstance(message, compat_str) if self.params.get('logger'): self.params['logger'].error(message) else: - message = self._bidi_workaround(message) - output = message + '\n' - self._write_string(output, self._err_file) + self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once) def to_console_title(self, message): if not self.params.get('consoletitle', False): @@ -543,7 +765,7 @@ class YoutubeDL(object): def save_console_title(self): if not self.params.get('consoletitle', False): return - if self.params.get('simulate', False): + if self.params.get('simulate'): return if compat_os_name != 'nt' and 'TERM' in os.environ: # Save the title on stack @@ -552,7 +774,7 @@ class YoutubeDL(object): def restore_console_title(self): if not self.params.get('consoletitle', False): return - if self.params.get('simulate', False): + if self.params.get('simulate'): return if compat_os_name != 'nt' and 'TERM' in os.environ: # Restore the title from stack @@ -589,8 +811,9 @@ class YoutubeDL(object): else: tb_data = traceback.format_list(traceback.extract_stack()) tb = ''.join(tb_data) - self.to_stderr(tb) - if not self.params.get('ignoreerrors', False): + if tb: + self.to_stderr(tb) + if not self.params.get('ignoreerrors'): if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: exc_info = sys.exc_info()[1].exc_info else: @@ -598,7 +821,17 @@ class YoutubeDL(object): raise DownloadError(message, exc_info) self._download_retcode = 1 - def report_warning(self, message): + def to_screen(self, message, skip_eol=False): + """Print message to stdout if not in quiet mode""" + self.to_stdout( + message, skip_eol, quiet=self.params.get('quiet', False)) + + def _color_text(self, text, color): + if self.params.get('no_color'): + return text + return f'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}' + + def report_warning(self, message, only_once=False): ''' Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored @@ -608,24 +841,24 @@ class YoutubeDL(object): else: if self.params.get('no_warnings'): return - if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt': - _msg_header = '\033[0;33mWARNING:\033[0m' - else: - _msg_header = 'WARNING:' - warning_message = '%s %s' % (_msg_header, message) - self.to_stderr(warning_message) + self.to_stderr(f'{self._color_text("WARNING:", "yellow")} {message}', only_once) def report_error(self, message, tb=None): ''' Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. ''' - if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt': - _msg_header = '\033[0;31mERROR:\033[0m' + self.trouble(f'{self._color_text("ERROR:", "red")} {message}', tb) + + def write_debug(self, message, only_once=False): + '''Log debug message or Print message to stderr''' + if not self.params.get('verbose', False): + return + message = '[debug] %s' % message + if self.params.get('logger'): + self.params['logger'].debug(message) else: - _msg_header = 'ERROR:' - error_message = '%s %s' % (_msg_header, message) - self.trouble(error_message, tb) + self.to_stderr(message, only_once) def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" @@ -634,136 +867,360 @@ class YoutubeDL(object): except UnicodeEncodeError: self.to_screen('[download] The file has already been downloaded') - def prepare_filename(self, info_dict): - """Generate the output filename.""" + def report_file_delete(self, file_name): + """Report that existing file will be deleted.""" try: - template_dict = dict(info_dict) - - template_dict['epoch'] = int(time.time()) - autonumber_size = self.params.get('autonumber_size') - if autonumber_size is None: - autonumber_size = 5 - template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads - if template_dict.get('resolution') is None: - if template_dict.get('width') and template_dict.get('height'): - template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height']) - elif template_dict.get('height'): - template_dict['resolution'] = '%sp' % template_dict['height'] - elif template_dict.get('width'): - template_dict['resolution'] = '%dx?' % template_dict['width'] + self.to_screen('Deleting existing file %s' % file_name) + except UnicodeEncodeError: + self.to_screen('Deleting existing file') + + def raise_no_formats(self, info, forced=False): + has_drm = info.get('__has_drm') + msg = 'This video is DRM protected' if has_drm else 'No video formats found!' + expected = self.params.get('ignore_no_formats_error') + if forced or not expected: + raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'], + expected=has_drm or expected) + else: + self.report_warning(msg) + + def parse_outtmpl(self): + outtmpl_dict = self.params.get('outtmpl', {}) + if not isinstance(outtmpl_dict, dict): + outtmpl_dict = {'default': outtmpl_dict} + # Remove spaces in the default template + if self.params.get('restrictfilenames'): + sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-') + else: + sanitize = lambda x: x + outtmpl_dict.update({ + k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() + if outtmpl_dict.get(k) is None}) + for key, val in outtmpl_dict.items(): + if isinstance(val, bytes): + self.report_warning( + 'Parameter outtmpl is bytes, but should be a unicode string. ' + 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.') + return outtmpl_dict + + def get_output_path(self, dir_type='', filename=None): + paths = self.params.get('paths', {}) + assert isinstance(paths, dict) + path = os.path.join( + expand_path(paths.get('home', '').strip()), + expand_path(paths.get(dir_type, '').strip()) if dir_type else '', + filename or '') + + # Temporary fix for #4787 + # 'Treat' all problem characters by passing filename through preferredencoding + # to workaround encoding issues with subprocess on python2 @ Windows + if sys.version_info < (3, 0) and sys.platform == 'win32': + path = encodeFilename(path, True).decode(preferredencoding()) + return sanitize_path(path, force=self.params.get('windowsfilenames')) + + @staticmethod + def _outtmpl_expandpath(outtmpl): + # expand_path translates '%%' into '%' and '$$' into '$' + # correspondingly that is not what we want since we need to keep + # '%%' intact for template dict substitution step. Working around + # with boundary-alike separator hack. + sep = ''.join([random.choice(ascii_letters) for _ in range(32)]) + outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep)) + + # outtmpl should be expand_path'ed before template dict substitution + # because meta fields may contain env variables we don't want to + # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and + # title "Hello $PATH", we don't want `$PATH` to be expanded. + return expand_path(outtmpl).replace(sep, '') + + @staticmethod + def escape_outtmpl(outtmpl): + ''' Escape any remaining strings like %s, %abc% etc. ''' + return re.sub( + STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'), + lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0), + outtmpl) + + @classmethod + def validate_outtmpl(cls, outtmpl): + ''' @return None or Exception object ''' + outtmpl = re.sub( + STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'), + lambda mobj: f'{mobj.group(0)[:-1]}s', + cls._outtmpl_expandpath(outtmpl)) + try: + cls.escape_outtmpl(outtmpl) % collections.defaultdict(int) + return None + except ValueError as err: + return err + + @staticmethod + def _copy_infodict(info_dict): + info_dict = dict(info_dict) + for key in ('__original_infodict', '__postprocessors'): + info_dict.pop(key, None) + return info_dict + + def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): + """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """ + info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set + + info_dict = self._copy_infodict(info_dict) + info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs + formatSeconds(info_dict['duration'], '-' if sanitize else ':') + if info_dict.get('duration', None) is not None + else None) + info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads + if info_dict.get('resolution') is None: + info_dict['resolution'] = self.format_resolution(info_dict, default=None) + + # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences + # of %(field)s to %(field)0Nd for backward compatibility + field_size_compat_map = { + 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')), + 'playlist_autonumber': len(str(info_dict.get('n_entries') or '')), + 'autonumber': self.params.get('autonumber_size') or 5, + } + + TMPL_DICT = {} + EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]')) + MATH_FUNCTIONS = { + '+': float.__add__, + '-': float.__sub__, + } + # Field is of the form key1.key2... + # where keys (except first) can be string, int or slice + FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)') + MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?') + MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys())) + INTERNAL_FORMAT_RE = re.compile(r'''(?x) + (?P-)? + (?P{field}) + (?P(?:{math_op}{math_field})*) + (?:>(?P.+?))? + (?P(?.*?))? + $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE)) + + def _traverse_infodict(k): + k = k.split('.') + if k[0] == '': + k.pop(0) + return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True) + + def get_value(mdict): + # Object traversal + value = _traverse_infodict(mdict['fields']) + # Negative + if mdict['negate']: + value = float_or_none(value) + if value is not None: + value *= -1 + # Do maths + offset_key = mdict['maths'] + if offset_key: + value = float_or_none(value) + operator = None + while offset_key: + item = re.match( + MATH_FIELD_RE if operator else MATH_OPERATORS_RE, + offset_key).group(0) + offset_key = offset_key[len(item):] + if operator is None: + operator = MATH_FUNCTIONS[item] + continue + item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1) + offset = float_or_none(item) + if offset is None: + offset = float_or_none(_traverse_infodict(item)) + try: + value = operator(value, multiplier * offset) + except (TypeError, ZeroDivisionError): + return None + operator = None + # Datetime formatting + if mdict['strf_format']: + value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ',')) + + return value + + na = self.params.get('outtmpl_na_placeholder', 'NA') + + def _dumpjson_default(obj): + if isinstance(obj, (set, LazyList)): + return list(obj) + raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable') + + def create_key(outer_mobj): + if not outer_mobj.group('has_key'): + return outer_mobj.group(0) + key = outer_mobj.group('key') + mobj = re.match(INTERNAL_FORMAT_RE, key) + initial_field = mobj.group('fields').split('.')[-1] if mobj else '' + value, default = None, na + while mobj: + mobj = mobj.groupdict() + default = mobj['default'] if mobj['default'] is not None else default + value = get_value(mobj) + if value is None and mobj['alternate']: + mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:]) + else: + break + fmt = outer_mobj.group('format') + if fmt == 's' and value is not None and key in field_size_compat_map.keys(): + fmt = '0{:d}d'.format(field_size_compat_map[key]) + + value = default if value is None else value + + str_fmt = f'{fmt[:-1]}s' + if fmt[-1] == 'l': # list + delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', ' + value, fmt = delim.join(variadic(value)), str_fmt + elif fmt[-1] == 'j': # json + value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt + elif fmt[-1] == 'q': # quoted + value, fmt = compat_shlex_quote(str(value)), str_fmt + elif fmt[-1] == 'B': # bytes + value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8') + value, fmt = value.decode('utf-8', 'ignore'), 's' + elif fmt[-1] == 'U': # unicode normalized + opts = outer_mobj.group('conversion') or '' + value, fmt = unicodedata.normalize( + # "+" = compatibility equivalence, "#" = NFD + 'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'), + value), str_fmt + elif fmt[-1] == 'c': + if value: + value = str(value)[0] + else: + fmt = str_fmt + elif fmt[-1] not in 'rs': # numeric + value = float_or_none(value) + if value is None: + value, fmt = default, 's' + + if sanitize: + if fmt[-1] == 'r': + # If value is an object, sanitize might convert it to a string + # So we convert it to repr first + value, fmt = repr(value), str_fmt + if fmt[-1] in 'csr': + value = sanitize(initial_field, value) + + key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format')) + TMPL_DICT[key] = value + return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix')) + + return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT + + def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs): + outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) + return self.escape_outtmpl(outtmpl) % info_dict + + def _prepare_filename(self, info_dict, tmpl_type='default'): + try: sanitize = lambda k, v: sanitize_filename( compat_str(v), restricted=self.params.get('restrictfilenames'), is_id=(k == 'id' or k.endswith('_id'))) - template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v)) - for k, v in template_dict.items() - if v is not None and not isinstance(v, (list, tuple, dict))) - template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict) - - outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) - - # For fields playlist_index and autonumber convert all occurrences - # of %(field)s to %(field)0Nd for backward compatibility - field_size_compat_map = { - 'playlist_index': len(str(template_dict['n_entries'])), - 'autonumber': autonumber_size, - } - FIELD_SIZE_COMPAT_RE = r'(?autonumber|playlist_index)\)s' - mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl) - if mobj: - outtmpl = re.sub( - FIELD_SIZE_COMPAT_RE, - r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')], - outtmpl) - - # Missing numeric fields used together with integer presentation types - # in format specification will break the argument substitution since - # string NA placeholder is returned for missing fields. We will patch - # output template for missing fields to meet string presentation type. - for numeric_field in self._NUMERIC_FIELDS: - if numeric_field not in template_dict: - # As of [1] format syntax is: - # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type - # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting - FORMAT_RE = r'''(?x) - (? 2: + sub_ext = fn_groups[-2] + filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext])) + + return filename except ValueError as err: self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') return None - def _match_entry(self, info_dict, incomplete): - """ Returns None iff the file should be downloaded """ + def prepare_filename(self, info_dict, dir_type='', warn=False): + """Generate the output filename.""" + + filename = self._prepare_filename(info_dict, dir_type or 'default') + if not filename and dir_type not in ('', 'temp'): + return '' + + if warn: + if not self.params.get('paths'): + pass + elif filename == '-': + self.report_warning('--paths is ignored when an outputting to stdout', only_once=True) + elif os.path.isabs(filename): + self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True) + if filename == '-' or not filename: + return filename + + return self.get_output_path(dir_type, filename) + + def _match_entry(self, info_dict, incomplete=False, silent=False): + """ Returns None if the file should be downloaded """ video_title = info_dict.get('title', info_dict.get('id', 'video')) - if 'title' in info_dict: - # This can happen when we're just evaluating the playlist - title = info_dict['title'] - matchtitle = self.params.get('matchtitle', False) - if matchtitle: - if not re.search(matchtitle, title, re.IGNORECASE): - return '"' + title + '" title did not match pattern "' + matchtitle + '"' - rejecttitle = self.params.get('rejecttitle', False) - if rejecttitle: - if re.search(rejecttitle, title, re.IGNORECASE): - return '"' + title + '" title matched reject pattern "' + rejecttitle + '"' - date = info_dict.get('upload_date') - if date is not None: - dateRange = self.params.get('daterange', DateRange()) - if date not in dateRange: - return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) - view_count = info_dict.get('view_count') - if view_count is not None: - min_views = self.params.get('min_views') - if min_views is not None and view_count < min_views: - return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views) - max_views = self.params.get('max_views') - if max_views is not None and view_count > max_views: - return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) - if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): - return 'Skipping "%s" because it is age restricted' % video_title - if self.in_download_archive(info_dict): - return '%s has already been recorded in archive' % video_title - if not incomplete: + def check_filter(): + if 'title' in info_dict: + # This can happen when we're just evaluating the playlist + title = info_dict['title'] + matchtitle = self.params.get('matchtitle', False) + if matchtitle: + if not re.search(matchtitle, title, re.IGNORECASE): + return '"' + title + '" title did not match pattern "' + matchtitle + '"' + rejecttitle = self.params.get('rejecttitle', False) + if rejecttitle: + if re.search(rejecttitle, title, re.IGNORECASE): + return '"' + title + '" title matched reject pattern "' + rejecttitle + '"' + date = info_dict.get('upload_date') + if date is not None: + dateRange = self.params.get('daterange', DateRange()) + if date not in dateRange: + return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) + view_count = info_dict.get('view_count') + if view_count is not None: + min_views = self.params.get('min_views') + if min_views is not None and view_count < min_views: + return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views) + max_views = self.params.get('max_views') + if max_views is not None and view_count > max_views: + return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) + if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): + return 'Skipping "%s" because it is age restricted' % video_title + match_filter = self.params.get('match_filter') if match_filter is not None: - ret = match_filter(info_dict) + try: + ret = match_filter(info_dict, incomplete=incomplete) + except TypeError: + # For backward compatibility + ret = None if incomplete else match_filter(info_dict) if ret is not None: return ret + return None - return None + if self.in_download_archive(info_dict): + reason = '%s has already been recorded in the archive' % video_title + break_opt, break_err = 'break_on_existing', ExistingVideoReached + else: + reason = check_filter() + break_opt, break_err = 'break_on_reject', RejectedVideoReached + if reason is not None: + if not silent: + self.to_screen('[download] ' + reason) + if self.params.get(break_opt, False): + raise break_err() + return reason @staticmethod def add_extra_info(info_dict, extra_info): @@ -771,7 +1228,7 @@ class YoutubeDL(object): for key, value in extra_info.items(): info_dict.setdefault(key, value) - def extract_info(self, url, download=True, ie_key=None, extra_info={}, + def extract_info(self, url, download=True, ie_key=None, extra_info=None, process=True, force_generic_extractor=False): """ Return a list with a dictionary for each video extracted. @@ -788,28 +1245,36 @@ class YoutubeDL(object): force_generic_extractor -- force using the generic extractor """ + if extra_info is None: + extra_info = {} + if not ie_key and force_generic_extractor: ie_key = 'Generic' if ie_key: - ies = [self.get_info_extractor(ie_key)] + ies = {ie_key: self._get_info_extractor_class(ie_key)} else: ies = self._ies - for ie in ies: + for ie_key, ie in ies.items(): if not ie.suitable(url): continue - ie = self.get_info_extractor(ie.ie_key()) if not ie.working(): self.report_warning('The program functionality for this site has been marked as broken, ' 'and will probably not work.') - return self.__extract_info(url, ie, download, extra_info, process) + temp_id = ie.get_temp_id(url) + if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}): + self.to_screen("[%s] %s: has already been recorded in archive" % ( + ie_key, temp_id)) + break + return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process) else: self.report_error('no suitable InfoExtractor for URL %s' % url) def __handle_extraction_exceptions(func): + @functools.wraps(func) def wrapper(self, *args, **kwargs): try: return func(self, *args, **kwargs) @@ -822,10 +1287,14 @@ class YoutubeDL(object): self.report_error(msg) except ExtractorError as e: # An error we somewhat expected self.report_error(compat_str(e), e.format_traceback()) - except MaxDownloadsReached: + except ThrottledDownload: + self.to_stderr('\r') + self.report_warning('The download speed is below throttle limit. Re-extracting data') + return wrapper(self, *args, **kwargs) + except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError): raise except Exception as e: - if self.params.get('ignoreerrors', False): + if self.params.get('ignoreerrors'): self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc())) else: raise @@ -842,6 +1311,8 @@ class YoutubeDL(object): '_type': 'compat_list', 'entries': ie_result, } + if extra_info.get('original_url'): + ie_result.setdefault('original_url', extra_info['original_url']) self.add_default_extra_info(ie_result, ie, url) if process: return self.process_ie_result(ie_result, download, extra_info) @@ -849,14 +1320,19 @@ class YoutubeDL(object): return ie_result def add_default_extra_info(self, ie_result, ie, url): - self.add_extra_info(ie_result, { - 'extractor': ie.IE_NAME, - 'webpage_url': url, - 'webpage_url_basename': url_basename(url), - 'extractor_key': ie.ie_key(), - }) - - def process_ie_result(self, ie_result, download=True, extra_info={}): + if url is not None: + self.add_extra_info(ie_result, { + 'webpage_url': url, + 'original_url': url, + 'webpage_url_basename': url_basename(url), + }) + if ie is not None: + self.add_extra_info(ie_result, { + 'extractor': ie.IE_NAME, + 'extractor_key': ie.ie_key(), + }) + + def process_ie_result(self, ie_result, download=True, extra_info=None): """ Take the result of the ie(may be modified) and resolve all unresolved references (URLs, playlist items). @@ -864,28 +1340,54 @@ class YoutubeDL(object): It will also download the videos if 'download'. Returns the resolved ie_result. """ + if extra_info is None: + extra_info = {} result_type = ie_result.get('_type', 'video') if result_type in ('url', 'url_transparent'): ie_result['url'] = sanitize_url(ie_result['url']) + if ie_result.get('original_url'): + extra_info.setdefault('original_url', ie_result['original_url']) + extract_flat = self.params.get('extract_flat', False) if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or extract_flat is True): - self.__forced_printings( - ie_result, self.prepare_filename(ie_result), - incomplete=True) + info_copy = ie_result.copy() + ie = try_get(ie_result.get('ie_key'), self.get_info_extractor) + if ie and not ie_result.get('id'): + info_copy['id'] = ie.get_temp_id(ie_result['url']) + self.add_default_extra_info(info_copy, ie, ie_result['url']) + self.add_extra_info(info_copy, extra_info) + self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True) + if self.params.get('force_write_download_archive', False): + self.record_download_archive(info_copy) return ie_result if result_type == 'video': self.add_extra_info(ie_result, extra_info) - return self.process_video_result(ie_result, download=download) + ie_result = self.process_video_result(ie_result, download=download) + additional_urls = (ie_result or {}).get('additional_urls') + if additional_urls: + # TODO: Improve MetadataParserPP to allow setting a list + if isinstance(additional_urls, compat_str): + additional_urls = [additional_urls] + self.to_screen( + '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls))) + self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls)) + ie_result['additional_entries'] = [ + self.extract_info( + url, download, extra_info, + force_generic_extractor=self.params.get('force_generic_extractor')) + for url in additional_urls + ] + return ie_result elif result_type == 'url': # We have to add extra_info to the results because it may be # contained in a playlist - return self.extract_info(ie_result['url'], - download, - ie_key=ie_result.get('ie_key'), - extra_info=extra_info) + return self.extract_info( + ie_result['url'], download, + ie_key=ie_result.get('ie_key'), + extra_info=extra_info) elif result_type == 'url_transparent': # Use the information from the embedding page info = self.extract_info( @@ -929,6 +1431,7 @@ class YoutubeDL(object): self._playlist_level += 1 self._playlist_urls.add(webpage_url) + self._sanitize_thumbnails(ie_result) try: return self.__process_playlist(ie_result, download) finally: @@ -941,15 +1444,12 @@ class YoutubeDL(object): 'It needs to be updated.' % ie_result.get('extractor')) def _fixup(r): - self.add_extra_info( - r, - { - 'extractor': ie_result['extractor'], - 'webpage_url': ie_result['webpage_url'], - 'webpage_url_basename': url_basename(ie_result['webpage_url']), - 'extractor_key': ie_result['extractor_key'], - } - ) + self.add_extra_info(r, { + 'extractor': ie_result['extractor'], + 'webpage_url': ie_result['webpage_url'], + 'webpage_url_basename': url_basename(ie_result['webpage_url']), + 'extractor_key': ie_result['extractor_key'], + }) return r ie_result['entries'] = [ self.process_ie_result(_fixup(r), download, extra_info) @@ -959,15 +1459,28 @@ class YoutubeDL(object): else: raise Exception('Invalid result type: %s' % result_type) + def _ensure_dir_exists(self, path): + return make_dir(path, self.report_error) + def __process_playlist(self, ie_result, download): # We process each entry in the playlist playlist = ie_result.get('title') or ie_result.get('id') - self.to_screen('[download] Downloading playlist: %s' % playlist) + if 'entries' not in ie_result: + raise EntryNotInPlaylist() + incomplete_entries = bool(ie_result.get('requested_entries')) + if incomplete_entries: + def fill_missing_entries(entries, indexes): + ret = [None] * max(*indexes) + for i, entry in zip(indexes, entries): + ret[i - 1] = entry + return ret + ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries']) + playlist_results = [] - playliststart = self.params.get('playliststart', 1) - 1 + playliststart = self.params.get('playliststart', 1) playlistend = self.params.get('playlistend') # For backwards compatibility, interpret -1 as whole list if playlistend == -1: @@ -987,59 +1500,92 @@ class YoutubeDL(object): playlistitems = orderedSet(iter_playlistitems(playlistitems_str)) ie_entries = ie_result['entries'] - - def make_playlistitems_entries(list_ie_entries): - num_entries = len(list_ie_entries) - return [ - list_ie_entries[i - 1] for i in playlistitems - if -num_entries <= i - 1 < num_entries] - - def report_download(num_entries): - self.to_screen( - '[%s] playlist %s: Downloading %d videos' % - (ie_result['extractor'], playlist, num_entries)) + msg = ( + 'Downloading %d videos' if not isinstance(ie_entries, list) + else 'Collected %d videos; downloading %%d of them' % len(ie_entries)) if isinstance(ie_entries, list): - n_all_entries = len(ie_entries) - if playlistitems: - entries = make_playlistitems_entries(ie_entries) - else: - entries = ie_entries[playliststart:playlistend] - n_entries = len(entries) - self.to_screen( - '[%s] playlist %s: Collected %d video ids (downloading %d of them)' % - (ie_result['extractor'], playlist, n_all_entries, n_entries)) - elif isinstance(ie_entries, PagedList): - if playlistitems: - entries = [] - for item in playlistitems: - entries.extend(ie_entries.getslice( - item - 1, item - )) - else: - entries = ie_entries.getslice( - playliststart, playlistend) - n_entries = len(entries) - report_download(n_entries) - else: # iterable - if playlistitems: - entries = make_playlistitems_entries(list(itertools.islice( - ie_entries, 0, max(playlistitems)))) - else: - entries = list(itertools.islice( - ie_entries, playliststart, playlistend)) - n_entries = len(entries) - report_download(n_entries) + def get_entry(i): + return ie_entries[i - 1] + else: + if not isinstance(ie_entries, PagedList): + ie_entries = LazyList(ie_entries) + + def get_entry(i): + return YoutubeDL.__handle_extraction_exceptions( + lambda self, i: ie_entries[i - 1] + )(self, i) + + entries = [] + items = playlistitems if playlistitems is not None else itertools.count(playliststart) + for i in items: + if i == 0: + continue + if playlistitems is None and playlistend is not None and playlistend < i: + break + entry = None + try: + entry = get_entry(i) + if entry is None: + raise EntryNotInPlaylist() + except (IndexError, EntryNotInPlaylist): + if incomplete_entries: + raise EntryNotInPlaylist() + elif not playlistitems: + break + entries.append(entry) + try: + if entry is not None: + self._match_entry(entry, incomplete=True, silent=True) + except (ExistingVideoReached, RejectedVideoReached): + break + ie_result['entries'] = entries + + # Save playlist_index before re-ordering + entries = [ + ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry) + for i, entry in enumerate(entries, 1) + if entry is not None] + n_entries = len(entries) + + if not playlistitems and (playliststart or playlistend): + playlistitems = list(range(playliststart, playliststart + n_entries)) + ie_result['requested_entries'] = playlistitems + + if self.params.get('allow_playlist_files', True): + ie_copy = { + 'playlist': playlist, + 'playlist_id': ie_result.get('id'), + 'playlist_title': ie_result.get('title'), + 'playlist_uploader': ie_result.get('uploader'), + 'playlist_uploader_id': ie_result.get('uploader_id'), + 'playlist_index': 0, + } + ie_copy.update(dict(ie_result)) + + if self._write_info_json('playlist', ie_result, + self.prepare_filename(ie_copy, 'pl_infojson')) is None: + return + if self._write_description('playlist', ie_result, + self.prepare_filename(ie_copy, 'pl_description')) is None: + return + # TODO: This should be passed to ThumbnailsConvertor if necessary + self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail')) if self.params.get('playlistreverse', False): entries = entries[::-1] - if self.params.get('playlistrandom', False): random.shuffle(entries) x_forwarded_for = ie_result.get('__x_forwarded_for_ip') - for i, entry in enumerate(entries, 1): + self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries)) + failures = 0 + max_failures = self.params.get('skip_playlist_after_errors') or float('inf') + for i, entry_tuple in enumerate(entries, 1): + playlist_index, entry = entry_tuple + if 'playlist-index' in self.params.get('compat_opts', []): + playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) # This __x_forwarded_for_ip thing is a bit ugly but requires # minimal changes @@ -1047,24 +1593,30 @@ class YoutubeDL(object): entry['__x_forwarded_for_ip'] = x_forwarded_for extra = { 'n_entries': n_entries, + '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries), + 'playlist_index': playlist_index, + 'playlist_autonumber': i, 'playlist': playlist, 'playlist_id': ie_result.get('id'), 'playlist_title': ie_result.get('title'), 'playlist_uploader': ie_result.get('uploader'), 'playlist_uploader_id': ie_result.get('uploader_id'), - 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart, 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], 'webpage_url_basename': url_basename(ie_result['webpage_url']), 'extractor_key': ie_result['extractor_key'], } - reason = self._match_entry(entry, incomplete=True) - if reason is not None: - self.to_screen('[download] ' + reason) + if self._match_entry(entry, incomplete=True) is not None: continue entry_result = self.__process_iterable_entry(entry, download, extra) + if not entry_result: + failures += 1 + if failures >= max_failures: + self.report_error( + 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures)) + break # TODO: skip failed (empty) entries? playlist_results.append(entry_result) ie_result['entries'] = playlist_results @@ -1088,12 +1640,11 @@ class YoutubeDL(object): '!=': operator.ne, } operator_rex = re.compile(r'''(?x)\s* - (?Pwidth|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps) - \s*(?P%s)(?P\s*\?)?\s* - (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) - $ + (?Pwidth|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s* + (?P%s)(?P\s*\?)?\s* + (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s* ''' % '|'.join(map(re.escape, OPERATORS.keys()))) - m = operator_rex.search(filter_spec) + m = operator_rex.fullmatch(filter_spec) if m: try: comparison_value = int(m.group('value')) @@ -1114,13 +1665,12 @@ class YoutubeDL(object): '$=': lambda attr, value: attr.endswith(value), '*=': lambda attr, value: value in attr, } - str_operator_rex = re.compile(r'''(?x) - \s*(?Pext|acodec|vcodec|container|protocol|format_id|language) - \s*(?P!\s*)?(?P%s)(?P\s*\?)? - \s*(?P[a-zA-Z0-9._-]+) - \s*$ + str_operator_rex = re.compile(r'''(?x)\s* + (?P[a-zA-Z0-9._-]+)\s* + (?P!\s*)?(?P%s)(?P\s*\?)?\s* + (?P[a-zA-Z0-9._-]+)\s* ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) - m = str_operator_rex.search(filter_spec) + m = str_operator_rex.fullmatch(filter_spec) if m: comparison_value = m.group('value') str_op = STR_OPERATORS[m.group('op')] @@ -1130,7 +1680,7 @@ class YoutubeDL(object): op = str_op if not m: - raise ValueError('Invalid filter specification %r' % filter_spec) + raise SyntaxError('Invalid filter specification %r' % filter_spec) def _filter(f): actual_value = f.get(m.group('key')) @@ -1145,23 +1695,22 @@ class YoutubeDL(object): merger = FFmpegMergerPP(self) return merger.available and merger.can_merge() - def prefer_best(): - if self.params.get('simulate', False): - return False - if not download: - return False - if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-': - return True - if info_dict.get('is_live'): - return True - if not can_merge(): - return True - return False - - req_format_list = ['bestvideo+bestaudio', 'best'] - if prefer_best(): - req_format_list.reverse() - return '/'.join(req_format_list) + prefer_best = ( + not self.params.get('simulate') + and download + and ( + not can_merge() + or info_dict.get('is_live', False) + or self.outtmpl_dict['default'] == '-')) + compat = ( + prefer_best + or self.params.get('allow_multiple_audio_streams', False) + or 'format-spec' in self.params.get('compat_opts', [])) + + return ( + 'best/bestvideo+bestaudio' if prefer_best + else 'bestvideo*+bestaudio/best' if not compat + else 'bestvideo+bestaudio/best') def build_format_selector(self, format_spec): def syntax_error(note, start): @@ -1176,6 +1725,11 @@ class YoutubeDL(object): GROUP = 'GROUP' FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters']) + allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False), + 'video': self.params.get('allow_multiple_video_streams', False)} + + check_formats = self.params.get('check_formats') + def _parse_filter(tokens): filter_parts = [] for type, string, start, _, _ in tokens: @@ -1258,13 +1812,13 @@ class YoutubeDL(object): group = _parse_format_selection(tokens, inside_group=True) current_selector = FormatSelector(GROUP, group, []) elif string == '+': - if inside_merge: + if not current_selector: raise syntax_error('Unexpected "+"', start) - video_selector = current_selector - audio_selector = _parse_format_selection(tokens, inside_merge=True) - if not video_selector or not audio_selector: - raise syntax_error('"+" must be between two format selectors', start) - current_selector = FormatSelector(MERGE, (video_selector, audio_selector), []) + selector_1 = current_selector + selector_2 = _parse_format_selection(tokens, inside_merge=True) + if not selector_2: + raise syntax_error('Expected a selector', start) + current_selector = FormatSelector(MERGE, (selector_1, selector_2), []) else: raise syntax_error('Operator not recognized: "{0}"'.format(string), start) elif type == tokenize.ENDMARKER: @@ -1273,18 +1827,116 @@ class YoutubeDL(object): selectors.append(current_selector) return selectors + def _merge(formats_pair): + format_1, format_2 = formats_pair + + formats_info = [] + formats_info.extend(format_1.get('requested_formats', (format_1,))) + formats_info.extend(format_2.get('requested_formats', (format_2,))) + + if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']: + get_no_more = {'video': False, 'audio': False} + for (i, fmt_info) in enumerate(formats_info): + if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none': + formats_info.pop(i) + continue + for aud_vid in ['audio', 'video']: + if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none': + if get_no_more[aud_vid]: + formats_info.pop(i) + break + get_no_more[aud_vid] = True + + if len(formats_info) == 1: + return formats_info[0] + + video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none'] + audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none'] + + the_only_video = video_fmts[0] if len(video_fmts) == 1 else None + the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None + + output_ext = self.params.get('merge_output_format') + if not output_ext: + if the_only_video: + output_ext = the_only_video['ext'] + elif the_only_audio and not video_fmts: + output_ext = the_only_audio['ext'] + else: + output_ext = 'mkv' + + filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info)) + + new_dict = { + 'requested_formats': formats_info, + 'format': '+'.join(filtered('format')), + 'format_id': '+'.join(filtered('format_id')), + 'ext': output_ext, + 'protocol': '+'.join(map(determine_protocol, formats_info)), + 'language': '+'.join(orderedSet(filtered('language'))), + 'format_note': '+'.join(orderedSet(filtered('format_note'))), + 'filesize_approx': sum(filtered('filesize', 'filesize_approx')), + 'tbr': sum(filtered('tbr', 'vbr', 'abr')), + } + + if the_only_video: + new_dict.update({ + 'width': the_only_video.get('width'), + 'height': the_only_video.get('height'), + 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video), + 'fps': the_only_video.get('fps'), + 'vcodec': the_only_video.get('vcodec'), + 'vbr': the_only_video.get('vbr'), + 'stretched_ratio': the_only_video.get('stretched_ratio'), + }) + + if the_only_audio: + new_dict.update({ + 'acodec': the_only_audio.get('acodec'), + 'abr': the_only_audio.get('abr'), + 'asr': the_only_audio.get('asr'), + }) + + return new_dict + + def _check_formats(formats): + if not check_formats: + yield from formats + return + for f in formats: + self.to_screen('[info] Testing format %s' % f['format_id']) + temp_file = tempfile.NamedTemporaryFile( + suffix='.tmp', delete=False, + dir=self.get_output_path('temp') or None) + temp_file.close() + try: + success, _ = self.dl(temp_file.name, f, test=True) + except (DownloadError, IOError, OSError, ValueError) + network_exceptions: + success = False + finally: + if os.path.exists(temp_file.name): + try: + os.remove(temp_file.name) + except OSError: + self.report_warning('Unable to delete temporary file "%s"' % temp_file.name) + if success: + yield f + else: + self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id']) + def _build_selector_function(selector): - if isinstance(selector, list): + if isinstance(selector, list): # , fs = [_build_selector_function(s) for s in selector] def selector_function(ctx): for f in fs: - for format in f(ctx): - yield format + yield from f(ctx) return selector_function - elif selector.type == GROUP: + + elif selector.type == GROUP: # () selector_function = _build_selector_function(selector.selector) - elif selector.type == PICKFIRST: + + elif selector.type == PICKFIRST: # / fs = [_build_selector_function(s) for s in selector.selector] def selector_function(ctx): @@ -1293,105 +1945,79 @@ class YoutubeDL(object): if picked_formats: return picked_formats return [] - elif selector.type == SINGLE: - format_spec = selector.selector - def selector_function(ctx): - formats = list(ctx['formats']) - if not formats: - return - if format_spec == 'all': - for f in formats: - yield f - elif format_spec in ['best', 'worst', None]: - format_idx = 0 if format_spec == 'worst' else -1 - audiovideo_formats = [ - f for f in formats - if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] - if audiovideo_formats: - yield audiovideo_formats[format_idx] - # for extractors with incomplete formats (audio only (soundcloud) - # or video only (imgur)) we will fallback to best/worst - # {video,audio}-only format - elif ctx['incomplete_formats']: - yield formats[format_idx] - elif format_spec == 'bestaudio': - audio_formats = [ - f for f in formats - if f.get('vcodec') == 'none'] - if audio_formats: - yield audio_formats[-1] - elif format_spec == 'worstaudio': - audio_formats = [ - f for f in formats - if f.get('vcodec') == 'none'] - if audio_formats: - yield audio_formats[0] - elif format_spec == 'bestvideo': - video_formats = [ - f for f in formats - if f.get('acodec') == 'none'] - if video_formats: - yield video_formats[-1] - elif format_spec == 'worstvideo': - video_formats = [ - f for f in formats - if f.get('acodec') == 'none'] - if video_formats: - yield video_formats[0] - else: - extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] - if format_spec in extensions: - filter_f = lambda f: f['ext'] == format_spec - else: - filter_f = lambda f: f['format_id'] == format_spec - matches = list(filter(filter_f, formats)) - if matches: - yield matches[-1] - elif selector.type == MERGE: - def _merge(formats_info): - format_1, format_2 = [f['format_id'] for f in formats_info] - # The first format must contain the video and the - # second the audio - if formats_info[0].get('vcodec') == 'none': - self.report_error('The first format must ' - 'contain the video, try using ' - '"-f %s+%s"' % (format_2, format_1)) - return - # Formats must be opposite (video+audio) - if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none': - self.report_error( - 'Both formats %s and %s are video-only, you must specify "-f video+audio"' - % (format_1, format_2)) - return - output_ext = ( - formats_info[0]['ext'] - if self.params.get('merge_output_format') is None - else self.params['merge_output_format']) - return { - 'requested_formats': formats_info, - 'format': '%s+%s' % (formats_info[0].get('format'), - formats_info[1].get('format')), - 'format_id': '%s+%s' % (formats_info[0].get('format_id'), - formats_info[1].get('format_id')), - 'width': formats_info[0].get('width'), - 'height': formats_info[0].get('height'), - 'resolution': formats_info[0].get('resolution'), - 'fps': formats_info[0].get('fps'), - 'vcodec': formats_info[0].get('vcodec'), - 'vbr': formats_info[0].get('vbr'), - 'stretched_ratio': formats_info[0].get('stretched_ratio'), - 'acodec': formats_info[1].get('acodec'), - 'abr': formats_info[1].get('abr'), - 'ext': output_ext, - } - video_selector, audio_selector = map(_build_selector_function, selector.selector) + elif selector.type == MERGE: # + + selector_1, selector_2 = map(_build_selector_function, selector.selector) def selector_function(ctx): for pair in itertools.product( - video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))): + selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))): yield _merge(pair) + elif selector.type == SINGLE: # atom + format_spec = selector.selector or 'best' + + # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector + if format_spec == 'all': + def selector_function(ctx): + yield from _check_formats(ctx['formats']) + elif format_spec == 'mergeall': + def selector_function(ctx): + formats = list(_check_formats(ctx['formats'])) + if not formats: + return + merged_format = formats[-1] + for f in formats[-2::-1]: + merged_format = _merge((merged_format, f)) + yield merged_format + + else: + format_fallback, format_reverse, format_idx = False, True, 1 + mobj = re.match( + r'(?Pbest|worst|b|w)(?Pvideo|audio|v|a)?(?P\*)?(?:\.(?P[1-9]\d*))?$', + format_spec) + if mobj is not None: + format_idx = int_or_none(mobj.group('n'), default=1) + format_reverse = mobj.group('bw')[0] == 'b' + format_type = (mobj.group('type') or [None])[0] + not_format_type = {'v': 'a', 'a': 'v'}.get(format_type) + format_modified = mobj.group('mod') is not None + + format_fallback = not format_type and not format_modified # for b, w + _filter_f = ( + (lambda f: f.get('%scodec' % format_type) != 'none') + if format_type and format_modified # bv*, ba*, wv*, wa* + else (lambda f: f.get('%scodec' % not_format_type) == 'none') + if format_type # bv, ba, wv, wa + else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none') + if not format_modified # b, w + else lambda f: True) # b*, w* + filter_f = lambda f: _filter_f(f) and ( + f.get('vcodec') != 'none' or f.get('acodec') != 'none') + else: + if format_spec in self._format_selection_exts['audio']: + filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' + elif format_spec in self._format_selection_exts['video']: + filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none' + elif format_spec in self._format_selection_exts['storyboards']: + filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none' + else: + filter_f = lambda f: f.get('format_id') == format_spec # id + + def selector_function(ctx): + formats = list(ctx['formats']) + matches = list(filter(filter_f, formats)) if filter_f is not None else formats + if format_fallback and ctx['incomplete_formats'] and not matches: + # for extractors with incomplete formats (audio only (soundcloud) + # or video only (imgur)) best/worst will fallback to + # best/worst {video,audio}-only format + matches = formats + matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1])) + try: + yield matches[format_idx - 1] + except IndexError: + return + filters = [self._build_format_filter(f) for f in selector.filters] def final_selector(ctx): @@ -1453,13 +2079,51 @@ class YoutubeDL(object): self.cookiejar.add_cookie_header(pr) return pr.get_header('Cookie') + def _sanitize_thumbnails(self, info_dict): + thumbnails = info_dict.get('thumbnails') + if thumbnails is None: + thumbnail = info_dict.get('thumbnail') + if thumbnail: + info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] + if thumbnails: + thumbnails.sort(key=lambda t: ( + t.get('preference') if t.get('preference') is not None else -1, + t.get('width') if t.get('width') is not None else -1, + t.get('height') if t.get('height') is not None else -1, + t.get('id') if t.get('id') is not None else '', + t.get('url'))) + + def thumbnail_tester(): + def test_thumbnail(t): + self.to_screen(f'[info] Testing thumbnail {t["id"]}') + try: + self.urlopen(HEADRequest(t['url'])) + except network_exceptions as err: + self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...') + return False + return True + return test_thumbnail + + for i, t in enumerate(thumbnails): + if t.get('id') is None: + t['id'] = '%d' % i + if t.get('width') and t.get('height'): + t['resolution'] = '%dx%d' % (t['width'], t['height']) + t['url'] = sanitize_url(t['url']) + + if self.params.get('check_formats'): + info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse() + else: + info_dict['thumbnails'] = thumbnails + def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' if 'id' not in info_dict: raise ExtractorError('Missing "id" field in extractor result') if 'title' not in info_dict: - raise ExtractorError('Missing "title" field in extractor result') + raise ExtractorError('Missing "title" field in extractor result', + video_id=info_dict['id'], ie=info_dict['extractor']) def report_force_conversion(field, field_not, conversion): self.report_warning( @@ -1489,37 +2153,21 @@ class YoutubeDL(object): info_dict['playlist'] = None info_dict['playlist_index'] = None - thumbnails = info_dict.get('thumbnails') - if thumbnails is None: - thumbnail = info_dict.get('thumbnail') - if thumbnail: - info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] - if thumbnails: - thumbnails.sort(key=lambda t: ( - t.get('preference') if t.get('preference') is not None else -1, - t.get('width') if t.get('width') is not None else -1, - t.get('height') if t.get('height') is not None else -1, - t.get('id') if t.get('id') is not None else '', t.get('url'))) - for i, t in enumerate(thumbnails): - t['url'] = sanitize_url(t['url']) - if t.get('width') and t.get('height'): - t['resolution'] = '%dx%d' % (t['width'], t['height']) - if t.get('id') is None: - t['id'] = '%d' % i - - if self.params.get('list_thumbnails'): - self.list_thumbnails(info_dict) - return + self._sanitize_thumbnails(info_dict) thumbnail = info_dict.get('thumbnail') + thumbnails = info_dict.get('thumbnails') if thumbnail: info_dict['thumbnail'] = sanitize_url(thumbnail) elif thumbnails: info_dict['thumbnail'] = thumbnails[-1]['url'] - if 'display_id' not in info_dict and 'id' in info_dict: + if info_dict.get('display_id') is None and 'id' in info_dict: info_dict['display_id'] = info_dict['id'] + if info_dict.get('duration') is not None: + info_dict['duration_string'] = formatSeconds(info_dict['duration']) + for ts_key, date_key in ( ('timestamp', 'upload_date'), ('release_timestamp', 'release_date'), @@ -1533,6 +2181,23 @@ class YoutubeDL(object): except (ValueError, OverflowError, OSError): pass + live_keys = ('is_live', 'was_live') + live_status = info_dict.get('live_status') + if live_status is None: + for key in live_keys: + if info_dict.get(key) is False: + continue + if info_dict.get(key): + live_status = key + break + if all(info_dict.get(key) is False for key in live_keys): + live_status = 'not_live' + if live_status: + info_dict['live_status'] = live_status + for key in live_keys: + if info_dict.get(key) is None: + info_dict[key] = (live_status == key) + # Auto generate title fields corresponding to the *_number fields when missing # in order to always have clean titles. This is very common for TV series. for field in ('chapter', 'season', 'episode'): @@ -1552,13 +2217,6 @@ class YoutubeDL(object): automatic_captions = info_dict.get('automatic_captions') subtitles = info_dict.get('subtitles') - if self.params.get('listsubtitles', False): - if 'automatic_captions' in info_dict: - self.list_subtitles( - info_dict['id'], automatic_captions, 'automatic captions') - self.list_subtitles(info_dict['id'], subtitles, 'subtitles') - return - info_dict['requested_subtitles'] = self.process_subtitles( info_dict['id'], subtitles, automatic_captions) @@ -1569,8 +2227,12 @@ class YoutubeDL(object): else: formats = info_dict['formats'] + info_dict['__has_drm'] = any(f.get('has_drm') for f in formats) + if not self.params.get('allow_unplayable_formats'): + formats = [f for f in formats if not f.get('has_drm')] + if not formats: - raise ExtractorError('No video formats found!') + self.raise_no_formats(info_dict) def is_wellformed(f): url = f.get('url') @@ -1604,25 +2266,32 @@ class YoutubeDL(object): formats_dict[format_id].append(format) # Make sure all formats have unique format_id + common_exts = set(itertools.chain(*self._format_selection_exts.values())) for format_id, ambiguous_formats in formats_dict.items(): - if len(ambiguous_formats) > 1: - for i, format in enumerate(ambiguous_formats): + ambigious_id = len(ambiguous_formats) > 1 + for i, format in enumerate(ambiguous_formats): + if ambigious_id: format['format_id'] = '%s-%d' % (format_id, i) + if format.get('ext') is None: + format['ext'] = determine_ext(format['url']).lower() + # Ensure there is no conflict between id and ext in format selection + # See https://github.com/hypervideo/hypervideo/issues/1282 + if format['format_id'] != format['ext'] and format['format_id'] in common_exts: + format['format_id'] = 'f%s' % format['format_id'] for i, format in enumerate(formats): if format.get('format') is None: format['format'] = '{id} - {res}{note}'.format( id=format['format_id'], res=self.format_resolution(format), - note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '', + note=format_field(format, 'format_note', ' (%s)'), ) - # Automatically determine file extension if missing - if format.get('ext') is None: - format['ext'] = determine_ext(format['url']).lower() - # Automatically determine protocol if missing (useful for format - # selection purposes) if format.get('protocol') is None: format['protocol'] = determine_protocol(format) + if format.get('resolution') is None: + format['resolution'] = self.format_resolution(format, default=None) + if format.get('dynamic_range') is None and format.get('vcodec') != 'none': + format['dynamic_range'] = 'SDR' # Add HTTP headers, so that external programs can use them from the # json output full_format_info = info_dict.copy() @@ -1634,23 +2303,39 @@ class YoutubeDL(object): # TODO Central sorting goes here - if formats[0] is not info_dict: + if not formats or formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them # otherwise we end up with a circular reference, the first (and unique) # element in the 'formats' field in info_dict is info_dict itself, # which can't be exported to json info_dict['formats'] = formats + + info_dict, _ = self.pre_process(info_dict) + + if self.params.get('list_thumbnails'): + self.list_thumbnails(info_dict) if self.params.get('listformats'): - self.list_formats(info_dict) + if not info_dict.get('formats') and not info_dict.get('url'): + self.to_screen('%s has no formats' % info_dict['id']) + else: + self.list_formats(info_dict) + if self.params.get('listsubtitles'): + if 'automatic_captions' in info_dict: + self.list_subtitles( + info_dict['id'], automatic_captions, 'automatic captions') + self.list_subtitles(info_dict['id'], subtitles, 'subtitles') + list_only = self.params.get('simulate') is None and ( + self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')) + if list_only: + # Without this printing, -F --print-json will not work + self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True) return - req_format = self.params.get('format') - if req_format is None: + format_selector = self.format_selector + if format_selector is None: req_format = self._default_format_spec(info_dict, download=download) - if self.params.get('verbose'): - self._write_string('[debug] Default format spec: %s\n' % req_format) - - format_selector = self.build_format_selector(req_format) + self.write_debug('Default format spec: %s' % req_format) + format_selector = self.build_format_selector(req_format) # While in format selection we may need to have an access to the original # format set in order to calculate some metrics or do some processing. @@ -1680,18 +2365,27 @@ class YoutubeDL(object): formats_to_download = list(format_selector(ctx)) if not formats_to_download: - raise ExtractorError('requested format not available', - expected=True) - - if download: - if len(formats_to_download) > 1: - self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download))) - for format in formats_to_download: + if not self.params.get('ignore_no_formats_error'): + raise ExtractorError('Requested format is not available', expected=True, + video_id=info_dict['id'], ie=info_dict['extractor']) + else: + self.report_warning('Requested format is not available') + # Process what we can, even without any available formats. + self.process_info(dict(info_dict)) + elif download: + self.to_screen( + '[info] %s: Downloading %d format(s): %s' % ( + info_dict['id'], len(formats_to_download), + ", ".join([f['format_id'] for f in formats_to_download]))) + for fmt in formats_to_download: new_info = dict(info_dict) - new_info.update(format) + # Save a reference to the original info_dict so that it can be modified in process_info if needed + new_info['__original_infodict'] = info_dict + new_info.update(fmt) self.process_info(new_info) # We update the info dict with the best quality format (backwards compatibility) - info_dict.update(formats_to_download[-1]) + if formats_to_download: + info_dict.update(formats_to_download[-1]) return info_dict def process_subtitles(self, video_id, normal_subtitles, automatic_captions): @@ -1709,15 +2403,34 @@ class YoutubeDL(object): available_subs): return None + all_sub_langs = available_subs.keys() if self.params.get('allsubtitles', False): - requested_langs = available_subs.keys() + requested_langs = all_sub_langs + elif self.params.get('subtitleslangs', False): + # A list is used so that the order of languages will be the same as + # given in subtitleslangs. See https://github.com/hypervideo/hypervideo/issues/1041 + requested_langs = [] + for lang_re in self.params.get('subtitleslangs'): + if lang_re == 'all': + requested_langs.extend(all_sub_langs) + continue + discard = lang_re[0] == '-' + if discard: + lang_re = lang_re[1:] + current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs) + if discard: + for lang in current_langs: + while lang in requested_langs: + requested_langs.remove(lang) + else: + requested_langs.extend(current_langs) + requested_langs = orderedSet(requested_langs) + elif 'en' in available_subs: + requested_langs = ['en'] else: - if self.params.get('subtitleslangs', False): - requested_langs = self.params.get('subtitleslangs') - elif 'en' in available_subs: - requested_langs = ['en'] - else: - requested_langs = [list(available_subs.keys())[0]] + requested_langs = [list(all_sub_langs)[0]] + if requested_langs: + self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs)) formats_query = self.params.get('subtitlesformat', 'best') formats_preference = formats_query.split('/') if formats_query else [] @@ -1744,34 +2457,80 @@ class YoutubeDL(object): return subs def __forced_printings(self, info_dict, filename, incomplete): - def print_mandatory(field): + def print_mandatory(field, actual_field=None): + if actual_field is None: + actual_field = field if (self.params.get('force%s' % field, False) - and (not incomplete or info_dict.get(field) is not None)): - self.to_stdout(info_dict[field]) + and (not incomplete or info_dict.get(actual_field) is not None)): + self.to_stdout(info_dict[actual_field]) def print_optional(field): if (self.params.get('force%s' % field, False) and info_dict.get(field) is not None): self.to_stdout(info_dict[field]) + info_dict = info_dict.copy() + if filename is not None: + info_dict['filename'] = filename + if info_dict.get('requested_formats') is not None: + # For RTMP URLs, also include the playpath + info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats']) + elif 'url' in info_dict: + info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '') + + if self.params.get('forceprint') or self.params.get('forcejson'): + self.post_extract(info_dict) + for tmpl in self.params.get('forceprint', []): + mobj = re.match(r'\w+(=?)$', tmpl) + if mobj and mobj.group(1): + tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s' + elif mobj: + tmpl = '%({})s'.format(tmpl) + self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict)) + print_mandatory('title') print_mandatory('id') - if self.params.get('forceurl', False) and not incomplete: - if info_dict.get('requested_formats') is not None: - for f in info_dict['requested_formats']: - self.to_stdout(f['url'] + f.get('play_path', '')) - else: - # For RTMP URLs, also include the playpath - self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) + print_mandatory('url', 'urls') print_optional('thumbnail') print_optional('description') - if self.params.get('forcefilename', False) and filename is not None: - self.to_stdout(filename) - if self.params.get('forceduration', False) and info_dict.get('duration') is not None: + print_optional('filename') + if self.params.get('forceduration') and info_dict.get('duration') is not None: self.to_stdout(formatSeconds(info_dict['duration'])) print_mandatory('format') - if self.params.get('forcejson', False): - self.to_stdout(json.dumps(info_dict)) + + if self.params.get('forcejson'): + self.to_stdout(json.dumps(self.sanitize_info(info_dict))) + + def dl(self, name, info, subtitle=False, test=False): + if not info.get('url'): + self.raise_no_formats(info, True) + + if test: + verbose = self.params.get('verbose') + params = { + 'test': True, + 'quiet': self.params.get('quiet') or not verbose, + 'verbose': verbose, + 'noprogress': not verbose, + 'nopart': True, + 'skip_unavailable_fragments': False, + 'keep_fragments': False, + 'overwrites': True, + '_no_ytdl_file': True, + } + else: + params = self.params + fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params) + if not test: + for ph in self._progress_hooks: + fd.add_progress_hook(ph) + urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']]) + self.write_debug('Invoking downloader on "%s"' % urls) + + new_info = copy.deepcopy(self._copy_infodict(info)) + if new_info.get('http_headers') is None: + new_info['http_headers'] = self._calc_headers(new_info) + return fd.download(name, new_info, subtitle) def process_info(self, info_dict): """Process a single resolved IE result.""" @@ -1786,61 +2545,66 @@ class YoutubeDL(object): # TODO: backward compatibility, to be removed info_dict['fulltitle'] = info_dict['title'] - if 'format' not in info_dict: + if 'format' not in info_dict and 'ext' in info_dict: info_dict['format'] = info_dict['ext'] - reason = self._match_entry(info_dict, incomplete=False) - if reason is not None: - self.to_screen('[download] ' + reason) + if self._match_entry(info_dict) is not None: return + self.post_extract(info_dict) self._num_downloads += 1 - info_dict['_filename'] = filename = self.prepare_filename(info_dict) + # info_dict['_filename'] needs to be set for backward compatibility + info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True) + temp_filename = self.prepare_filename(info_dict, 'temp') + files_to_move = {} # Forced printings - self.__forced_printings(info_dict, filename, incomplete=False) + self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict)) - # Do nothing else if in simulate mode - if self.params.get('simulate', False): + if self.params.get('simulate'): + if self.params.get('force_write_download_archive', False): + self.record_download_archive(info_dict) + # Do nothing else if in simulate mode return - if filename is None: + if full_filename is None: + return + if not self._ensure_dir_exists(encodeFilename(full_filename)): + return + if not self._ensure_dir_exists(encodeFilename(temp_filename)): return - def ensure_dir_exists(path): - try: - dn = os.path.dirname(path) - if dn and not os.path.exists(dn): - os.makedirs(dn) - return True - except (OSError, IOError) as err: - if isinstance(err, OSError) and err.errno == errno.EEXIST: - return True - self.report_error('unable to create directory ' + error_to_compat_str(err)) - return False + if self._write_description('video', info_dict, + self.prepare_filename(info_dict, 'description')) is None: + return - if not ensure_dir_exists(sanitize_path(encodeFilename(filename))): + sub_files = self._write_subtitles(info_dict, temp_filename) + if sub_files is None: return + files_to_move.update(dict(sub_files)) - if self.params.get('writedescription', False): - descfn = replace_extension(filename, 'description', info_dict.get('ext')) - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)): - self.to_screen('[info] Video description is already present') - elif info_dict.get('description') is None: - self.report_warning('There\'s no description to write.') - else: - try: - self.to_screen('[info] Writing video description to: ' + descfn) - with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: - descfile.write(info_dict['description']) - except (OSError, IOError): - self.report_error('Cannot write description file ' + descfn) - return + thumb_files = self._write_thumbnails( + 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail')) + if thumb_files is None: + return + files_to_move.update(dict(thumb_files)) + + infofn = self.prepare_filename(info_dict, 'infojson') + _infojson_written = self._write_info_json('video', info_dict, infofn) + if _infojson_written: + info_dict['__infojson_filename'] = infofn + elif _infojson_written is None: + return + # Note: Annotations are deprecated + annofn = None if self.params.get('writeannotations', False): - annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext')) - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)): + annofn = self.prepare_filename(info_dict, 'annotation') + if annofn: + if not self._ensure_dir_exists(encodeFilename(annofn)): + return + if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)): self.to_screen('[info] Video annotations are already present') elif not info_dict.get('annotations'): self.report_warning('There are no annotations to write.') @@ -1855,126 +2619,213 @@ class YoutubeDL(object): self.report_error('Cannot write annotations file: ' + annofn) return - subtitles_are_requested = any([self.params.get('writesubtitles', False), - self.params.get('writeautomaticsub')]) + # Write internet shortcut files + url_link = webloc_link = desktop_link = False + if self.params.get('writelink', False): + if sys.platform == "darwin": # macOS. + webloc_link = True + elif sys.platform.startswith("linux"): + desktop_link = True + else: # if sys.platform in ['win32', 'cygwin']: + url_link = True + if self.params.get('writeurllink', False): + url_link = True + if self.params.get('writewebloclink', False): + webloc_link = True + if self.params.get('writedesktoplink', False): + desktop_link = True + + if url_link or webloc_link or desktop_link: + if 'webpage_url' not in info_dict: + self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information') + return + ascii_url = iri_to_uri(info_dict['webpage_url']) - if subtitles_are_requested and info_dict.get('requested_subtitles'): - # subtitles download errors are already managed as troubles in relevant IE - # that way it will silently go on when used with unsupporting IE - subtitles = info_dict['requested_subtitles'] - ie = self.get_info_extractor(info_dict['extractor_key']) - for sub_lang, sub_info in subtitles.items(): - sub_format = sub_info['ext'] - sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): - self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format)) - else: - self.to_screen('[info] Writing video subtitles to: ' + sub_filename) - if sub_info.get('data') is not None: - try: - # Use newline='' to prevent conversion of newline characters - # See https://github.com/ytdl-org/youtube-dl/issues/10268 - with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: - subfile.write(sub_info['data']) - except (OSError, IOError): - self.report_error('Cannot write subtitles file ' + sub_filename) - return - else: - try: - sub_data = ie._request_webpage( - sub_info['url'], info_dict['id'], note=False).read() - with io.open(encodeFilename(sub_filename), 'wb') as subfile: - subfile.write(sub_data) - except (ExtractorError, IOError, OSError, ValueError) as err: - self.report_warning('Unable to download subtitle for "%s": %s' % - (sub_lang, error_to_compat_str(err))) - continue - - if self.params.get('writeinfojson', False): - infofn = replace_extension(filename, 'info.json', info_dict.get('ext')) - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)): - self.to_screen('[info] Video description metadata is already present') + def _write_link_file(extension, template, newline, embed_filename): + linkfn = replace_extension(full_filename, extension, info_dict.get('ext')) + if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)): + self.to_screen('[info] Internet shortcut is already present') else: - self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn) try: - write_json_file(self.filter_requested_info(info_dict), infofn) + self.to_screen('[info] Writing internet shortcut to: ' + linkfn) + with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile: + template_vars = {'url': ascii_url} + if embed_filename: + template_vars['filename'] = linkfn[:-(len(extension) + 1)] + linkfile.write(template % template_vars) except (OSError, IOError): - self.report_error('Cannot write metadata to JSON file ' + infofn) - return + self.report_error('Cannot write internet shortcut ' + linkfn) + return False + return True - self._write_thumbnails(info_dict, filename) + if url_link: + if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False): + return + if webloc_link: + if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False): + return + if desktop_link: + if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True): + return + + try: + info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) + except PostProcessingError as err: + self.report_error('Preprocessing: %s' % str(err)) + return - if not self.params.get('skip_download', False): + must_record_download_archive = False + if self.params.get('skip_download', False): + info_dict['filepath'] = temp_filename + info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) + info_dict['__files_to_move'] = files_to_move + info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict) + else: + # Download + info_dict.setdefault('__postprocessors', []) try: - def dl(name, info): - fd = get_suitable_downloader(info, self.params)(self, self.params) - for ph in self._progress_hooks: - fd.add_progress_hook(ph) - if self.params.get('verbose'): - self.to_screen('[debug] Invoking downloader on %r' % info.get('url')) - return fd.download(name, info) + def existing_file(*filepaths): + ext = info_dict.get('ext') + final_ext = self.params.get('final_ext', ext) + existing_files = [] + for file in orderedSet(filepaths): + if final_ext != ext: + converted = replace_extension(file, final_ext, ext) + if os.path.exists(encodeFilename(converted)): + existing_files.append(converted) + if os.path.exists(encodeFilename(file)): + existing_files.append(file) + + if not existing_files or self.params.get('overwrites', False): + for file in orderedSet(existing_files): + self.report_file_delete(file) + os.remove(encodeFilename(file)) + return None + + info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:] + return existing_files[0] + + success = True if info_dict.get('requested_formats') is not None: - downloaded = [] - success = True - merger = FFmpegMergerPP(self) - if not merger.available: - postprocessors = [] - self.report_warning('You have requested multiple ' - 'formats but ffmpeg or avconv are not installed.' - ' The formats won\'t be merged.') - else: - postprocessors = [merger] def compatible_formats(formats): - video, audio = formats + # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them. + video_formats = [format for format in formats if format.get('vcodec') != 'none'] + audio_formats = [format for format in formats if format.get('acodec') != 'none'] + if len(video_formats) > 2 or len(audio_formats) > 2: + return False + # Check extension - video_ext, audio_ext = video.get('ext'), audio.get('ext') - if video_ext and audio_ext: - COMPATIBLE_EXTS = ( - ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'), - ('webm') - ) - for exts in COMPATIBLE_EXTS: - if video_ext in exts and audio_ext in exts: - return True + exts = set(format.get('ext') for format in formats) + COMPATIBLE_EXTS = ( + set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')), + set(('webm',)), + ) + for ext_sets in COMPATIBLE_EXTS: + if ext_sets.issuperset(exts): + return True # TODO: Check acodec/vcodec return False - filename_real_ext = os.path.splitext(filename)[1][1:] - filename_wo_ext = ( - os.path.splitext(filename)[0] - if filename_real_ext == info_dict['ext'] - else filename) requested_formats = info_dict['requested_formats'] - if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats): - info_dict['ext'] = 'mkv' - self.report_warning( - 'Requested formats are incompatible for merge and will be merged into mkv.') + old_ext = info_dict['ext'] + if self.params.get('merge_output_format') is None: + if not compatible_formats(requested_formats): + info_dict['ext'] = 'mkv' + self.report_warning( + 'Requested formats are incompatible for merge and will be merged into mkv') + if (info_dict['ext'] == 'webm' + and info_dict.get('thumbnails') + # check with type instead of pp_key, __name__, or isinstance + # since we dont want any custom PPs to trigger this + and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): + info_dict['ext'] = 'mkv' + self.report_warning( + 'webm doesn\'t support embedding a thumbnail, mkv will be used') + new_ext = info_dict['ext'] + + def correct_ext(filename, ext=new_ext): + if filename == '-': + return filename + filename_real_ext = os.path.splitext(filename)[1][1:] + filename_wo_ext = ( + os.path.splitext(filename)[0] + if filename_real_ext in (old_ext, new_ext) + else filename) + return '%s.%s' % (filename_wo_ext, ext) + # Ensure filename always has a correct extension for successful merge - filename = '%s.%s' % (filename_wo_ext, info_dict['ext']) - if os.path.exists(encodeFilename(filename)): - self.to_screen( - '[download] %s has already been downloaded and ' - 'merged' % filename) + full_filename = correct_ext(full_filename) + temp_filename = correct_ext(temp_filename) + dl_filename = existing_file(full_filename, temp_filename) + info_dict['__real_download'] = False + + if dl_filename is not None: + self.report_file_already_downloaded(dl_filename) + elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'): + info_dict['url'] = '\n'.join(f['url'] for f in requested_formats) + success, real_download = self.dl(temp_filename, info_dict) + info_dict['__real_download'] = real_download else: + downloaded = [] + merger = FFmpegMergerPP(self) + if self.params.get('allow_unplayable_formats'): + self.report_warning( + 'You have requested merging of multiple formats ' + 'while also allowing unplayable formats to be downloaded. ' + 'The formats won\'t be merged to prevent data corruption.') + elif not merger.available: + self.report_warning( + 'You have requested merging of multiple formats but ffmpeg is not installed. ' + 'The formats won\'t be merged.') + + if temp_filename == '-': + reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict) + else 'but the formats are incompatible for simultaneous download' if merger.available + else 'but ffmpeg is not installed') + self.report_warning( + f'You have requested downloading multiple formats to stdout {reason}. ' + 'The formats will be streamed one after the other') + fname = temp_filename for f in requested_formats: new_info = dict(info_dict) + del new_info['requested_formats'] new_info.update(f) - fname = prepend_extension( - self.prepare_filename(new_info), - 'f%s' % f['format_id'], new_info['ext']) - if not ensure_dir_exists(fname): - return - downloaded.append(fname) - partial_success = dl(fname, new_info) + if temp_filename != '-': + fname = prepend_extension( + correct_ext(temp_filename, new_info['ext']), + 'f%s' % f['format_id'], new_info['ext']) + if not self._ensure_dir_exists(fname): + return + f['filepath'] = fname + downloaded.append(fname) + partial_success, real_download = self.dl(fname, new_info) + info_dict['__real_download'] = info_dict['__real_download'] or real_download success = success and partial_success - info_dict['__postprocessors'] = postprocessors - info_dict['__files_to_merge'] = downloaded + if merger.available and not self.params.get('allow_unplayable_formats'): + info_dict['__postprocessors'].append(merger) + info_dict['__files_to_merge'] = downloaded + # Even if there were no downloads, it is being merged only now + info_dict['__real_download'] = True + else: + for file in downloaded: + files_to_move[file] = None else: # Just a single file - success = dl(filename, info_dict) - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + dl_filename = existing_file(full_filename, temp_filename) + if dl_filename is None or dl_filename == temp_filename: + # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part. + # So we should try to resume the download + success, real_download = self.dl(temp_filename, info_dict) + info_dict['__real_download'] = real_download + else: + self.report_file_already_downloaded(dl_filename) + + dl_filename = dl_filename or temp_filename + info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) + + except network_exceptions as err: self.report_error('unable to download video data: %s' % error_to_compat_str(err)) return except (OSError, IOError) as err: @@ -1983,79 +2834,77 @@ class YoutubeDL(object): self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) return - if success and filename != '-': - # Fixup content - fixup_policy = self.params.get('fixup') - if fixup_policy is None: - fixup_policy = 'detect_or_warn' - - INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.' - - stretched_ratio = info_dict.get('stretched_ratio') - if stretched_ratio is not None and stretched_ratio != 1: - if fixup_policy == 'warn': - self.report_warning('%s: Non-uniform pixel ratio (%s)' % ( - info_dict['id'], stretched_ratio)) - elif fixup_policy == 'detect_or_warn': - stretched_pp = FFmpegFixupStretchedPP(self) - if stretched_pp.available: - info_dict.setdefault('__postprocessors', []) - info_dict['__postprocessors'].append(stretched_pp) - else: - self.report_warning( - '%s: Non-uniform pixel ratio (%s). %s' - % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE)) - else: - assert fixup_policy in ('ignore', 'never') - - if (info_dict.get('requested_formats') is None - and info_dict.get('container') == 'm4a_dash'): - if fixup_policy == 'warn': - self.report_warning( - '%s: writing DASH m4a. ' - 'Only some players support this container.' - % info_dict['id']) - elif fixup_policy == 'detect_or_warn': - fixup_pp = FFmpegFixupM4aPP(self) - if fixup_pp.available: - info_dict.setdefault('__postprocessors', []) - info_dict['__postprocessors'].append(fixup_pp) - else: - self.report_warning( - '%s: writing DASH m4a. ' - 'Only some players support this container. %s' - % (info_dict['id'], INSTALL_FFMPEG_MESSAGE)) - else: - assert fixup_policy in ('ignore', 'never') - - if (info_dict.get('protocol') == 'm3u8_native' - or info_dict.get('protocol') == 'm3u8' - and self.params.get('hls_prefer_native')): - if fixup_policy == 'warn': - self.report_warning('%s: malformed AAC bitstream detected.' % ( - info_dict['id'])) - elif fixup_policy == 'detect_or_warn': - fixup_pp = FFmpegFixupM3u8PP(self) - if fixup_pp.available: - info_dict.setdefault('__postprocessors', []) - info_dict['__postprocessors'].append(fixup_pp) - else: - self.report_warning( - '%s: malformed AAC bitstream detected. %s' - % (info_dict['id'], INSTALL_FFMPEG_MESSAGE)) - else: - assert fixup_policy in ('ignore', 'never') + if success and full_filename != '-': + + def fixup(): + do_fixup = True + fixup_policy = self.params.get('fixup') + vid = info_dict['id'] + if fixup_policy in ('ignore', 'never'): + return + elif fixup_policy == 'warn': + do_fixup = False + elif fixup_policy != 'force': + assert fixup_policy in ('detect_or_warn', None) + if not info_dict.get('__real_download'): + do_fixup = False + + def ffmpeg_fixup(cndn, msg, cls): + if not cndn: + return + if not do_fixup: + self.report_warning(f'{vid}: {msg}') + return + pp = cls(self) + if pp.available: + info_dict['__postprocessors'].append(pp) + else: + self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically') + + stretched_ratio = info_dict.get('stretched_ratio') + ffmpeg_fixup( + stretched_ratio not in (1, None), + f'Non-uniform pixel ratio {stretched_ratio}', + FFmpegFixupStretchedPP) + + ffmpeg_fixup( + (info_dict.get('requested_formats') is None + and info_dict.get('container') == 'm4a_dash' + and info_dict.get('ext') == 'm4a'), + 'writing DASH m4a. Only some players support this container', + FFmpegFixupM4aPP) + + downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None + downloader = downloader.__name__ if downloader else None + ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD', + 'malformed AAC bitstream detected', FFmpegFixupM3u8PP) + ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP) + ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP) + + fixup() try: - self.post_process(filename, info_dict) - except (PostProcessingError) as err: - self.report_error('postprocessing: %s' % str(err)) + info_dict = self.post_process(dl_filename, info_dict, files_to_move) + except PostProcessingError as err: + self.report_error('Postprocessing: %s' % str(err)) return - self.record_download_archive(info_dict) + try: + for ph in self._post_hooks: + ph(info_dict['filepath']) + except Exception as err: + self.report_error('post hooks: %s' % str(err)) + return + must_record_download_archive = True + + if must_record_download_archive or self.params.get('force_write_download_archive', False): + self.record_download_archive(info_dict) + max_downloads = self.params.get('max_downloads') + if max_downloads is not None and self._num_downloads >= int(max_downloads): + raise MaxDownloadsReached() def download(self, url_list): """Download a given list of URLs.""" - outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) + outtmpl = self.outtmpl_dict['default'] if (len(url_list) > 1 and outtmpl != '-' and '%' not in outtmpl @@ -2070,11 +2919,18 @@ class YoutubeDL(object): except UnavailableVideoError: self.report_error('unable to download video') except MaxDownloadsReached: - self.to_screen('[info] Maximum number of downloaded files reached.') + self.to_screen('[info] Maximum number of downloads reached') + raise + except ExistingVideoReached: + self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing') + raise + except RejectedVideoReached: + self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject') raise else: if self.params.get('dump_single_json', False): - self.to_stdout(json.dumps(res)) + self.post_extract(res) + self.to_stdout(json.dumps(self.sanitize_info(res))) return self._download_retcode @@ -2083,10 +2939,10 @@ class YoutubeDL(object): [info_filename], mode='r', openhook=fileinput.hook_encoded('utf-8'))) as f: # FileInput doesn't have a read method, we can't call json.load - info = self.filter_requested_info(json.loads('\n'.join(f))) + info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True)) try: self.process_ie_result(info, download=True) - except DownloadError: + except (DownloadError, EntryNotInPlaylist, ThrottledDownload): webpage_url = info.get('webpage_url') if webpage_url is not None: self.report_warning('The info failed to download, trying with "%s"' % webpage_url) @@ -2096,32 +2952,102 @@ class YoutubeDL(object): return self._download_retcode @staticmethod - def filter_requested_info(info_dict): - return dict( - (k, v) for k, v in info_dict.items() - if k not in ['requested_formats', 'requested_subtitles']) + def sanitize_info(info_dict, remove_private_keys=False): + ''' Sanitize the infodict for converting to json ''' + if info_dict is None: + return info_dict + info_dict.setdefault('epoch', int(time.time())) + remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict + keep_keys = ['_type'], # Always keep this to facilitate load-info-json + if remove_private_keys: + remove_keys |= { + 'requested_formats', 'requested_subtitles', 'requested_entries', + 'filepath', 'entries', 'original_url', 'playlist_autonumber', + } + empty_values = (None, {}, [], set(), tuple()) + reject = lambda k, v: k not in keep_keys and ( + k.startswith('_') or k in remove_keys or v in empty_values) + else: + reject = lambda k, v: k in remove_keys + filter_fn = lambda obj: ( + list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set)) + else obj if not isinstance(obj, dict) + else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v))) + return filter_fn(info_dict) + + @staticmethod + def filter_requested_info(info_dict, actually_filter=True): + ''' Alias of sanitize_info for backward compatibility ''' + return YoutubeDL.sanitize_info(info_dict, actually_filter) + + def run_pp(self, pp, infodict): + files_to_delete = [] + if '__files_to_move' not in infodict: + infodict['__files_to_move'] = {} + try: + files_to_delete, infodict = pp.run(infodict) + except PostProcessingError as e: + # Must be True and not 'only_download' + if self.params.get('ignoreerrors') is True: + self.report_error(e) + return infodict + raise + + if not files_to_delete: + return infodict + if self.params.get('keepvideo', False): + for f in files_to_delete: + infodict['__files_to_move'].setdefault(f, '') + else: + for old_filename in set(files_to_delete): + self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename) + try: + os.remove(encodeFilename(old_filename)) + except (IOError, OSError): + self.report_warning('Unable to remove downloaded original file') + if old_filename in infodict['__files_to_move']: + del infodict['__files_to_move'][old_filename] + return infodict + + @staticmethod + def post_extract(info_dict): + def actual_post_extract(info_dict): + if info_dict.get('_type') in ('playlist', 'multi_video'): + for video_dict in info_dict.get('entries', {}): + actual_post_extract(video_dict or {}) + return + + post_extractor = info_dict.get('__post_extractor') or (lambda: {}) + extra = post_extractor().items() + info_dict.update(extra) + info_dict.pop('__post_extractor', None) + + original_infodict = info_dict.get('__original_infodict') or {} + original_infodict.update(extra) + original_infodict.pop('__post_extractor', None) - def post_process(self, filename, ie_info): + actual_post_extract(info_dict or {}) + + def pre_process(self, ie_info, key='pre_process', files_to_move=None): + info = dict(ie_info) + info['__files_to_move'] = files_to_move or {} + for pp in self._pps[key]: + info = self.run_pp(pp, info) + return info, info.pop('__files_to_move', None) + + def post_process(self, filename, ie_info, files_to_move=None): """Run all the postprocessors on the given file.""" info = dict(ie_info) info['filepath'] = filename - pps_chain = [] - if ie_info.get('__postprocessors') is not None: - pps_chain.extend(ie_info['__postprocessors']) - pps_chain.extend(self._pps) - for pp in pps_chain: - files_to_delete = [] - try: - files_to_delete, info = pp.run(info) - except PostProcessingError as e: - self.report_error(e.msg) - if files_to_delete and not self.params.get('keepvideo', False): - for old_filename in files_to_delete: - self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename) - try: - os.remove(encodeFilename(old_filename)) - except (IOError, OSError): - self.report_warning('Unable to remove downloaded original file') + info['__files_to_move'] = files_to_move or {} + + for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']: + info = self.run_pp(pp, info) + info = self.run_pp(MoveFilesAfterDownloadPP(self), info) + del info['__files_to_move'] + for pp in self._pps['after_move']: + info = self.run_pp(pp, info) + return info def _make_archive_id(self, info_dict): video_id = info_dict.get('id') @@ -2135,13 +3061,13 @@ class YoutubeDL(object): if not url: return # Try to find matching extractor for the URL and take its ie_key - for ie in self._ies: + for ie_key, ie in self._ies.items(): if ie.suitable(url): - extractor = ie.ie_key() + extractor = ie_key break else: return - return extractor.lower() + ' ' + video_id + return '%s %s' % (extractor.lower(), video_id) def in_download_archive(self, info_dict): fn = self.params.get('download_archive') @@ -2152,15 +3078,7 @@ class YoutubeDL(object): if not vid_id: return False # Incomplete video information - try: - with locked_file(fn, 'r', encoding='utf-8') as archive_file: - for line in archive_file: - if line.strip() == vid_id: - return True - except IOError as ioe: - if ioe.errno != errno.ENOENT: - raise - return False + return vid_id in self.archive def record_download_archive(self, info_dict): fn = self.params.get('download_archive') @@ -2170,23 +3088,26 @@ class YoutubeDL(object): assert vid_id with locked_file(fn, 'a', encoding='utf-8') as archive_file: archive_file.write(vid_id + '\n') + self.archive.add(vid_id) @staticmethod def format_resolution(format, default='unknown'): - if format.get('vcodec') == 'none': + is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none' + if format.get('vcodec') == 'none' and format.get('acodec') != 'none': return 'audio only' if format.get('resolution') is not None: return format['resolution'] - if format.get('height') is not None: - if format.get('width') is not None: - res = '%sx%s' % (format['width'], format['height']) - else: - res = '%sp' % format['height'] - elif format.get('width') is not None: + if format.get('width') and format.get('height'): + res = '%dx%d' % (format['width'], format['height']) + elif format.get('height'): + res = '%sp' % format['height'] + elif format.get('width'): res = '%dx?' % format['width'] + elif is_images: + return 'images' else: - res = default - return res + return default + return f'{res} images' if is_images else res def _format_note(self, fdict): res = '' @@ -2246,27 +3167,61 @@ class YoutubeDL(object): def list_formats(self, info_dict): formats = info_dict.get('formats', [info_dict]) - table = [ - [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)] - for f in formats - if f.get('preference') is None or f['preference'] >= -1000] - if len(formats) > 1: - table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)' - - header_line = ['format code', 'extension', 'resolution', 'note'] + new_format = ( + 'list-formats' not in self.params.get('compat_opts', []) + and self.params.get('listformats_table', True) is not False) + if new_format: + table = [ + [ + format_field(f, 'format_id'), + format_field(f, 'ext'), + self.format_resolution(f), + format_field(f, 'fps', '%d'), + format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), + '|', + format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes), + format_field(f, 'tbr', '%4dk'), + shorten_protocol_name(f.get('protocol', '').replace("native", "n")), + '|', + format_field(f, 'vcodec', default='unknown').replace('none', ''), + format_field(f, 'vbr', '%4dk'), + format_field(f, 'acodec', default='unknown').replace('none', ''), + format_field(f, 'abr', '%3dk'), + format_field(f, 'asr', '%5dHz'), + ', '.join(filter(None, ( + 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '', + format_field(f, 'language', '[%s]'), + format_field(f, 'format_note'), + format_field(f, 'container', ignore=(None, f.get('ext'))), + ))), + ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] + header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', '|', ' FILESIZE', ' TBR', 'PROTO', + '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO'] + else: + table = [ + [ + format_field(f, 'format_id'), + format_field(f, 'ext'), + self.format_resolution(f), + self._format_note(f)] + for f in formats + if f.get('preference') is None or f['preference'] >= -1000] + header_line = ['format code', 'extension', 'resolution', 'note'] + self.to_screen( - '[info] Available formats for %s:\n%s' % - (info_dict['id'], render_table(header_line, table))) + '[info] Available formats for %s:' % info_dict['id']) + self.to_stdout(render_table( + header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format)) def list_thumbnails(self, info_dict): - thumbnails = info_dict.get('thumbnails') + thumbnails = list(info_dict.get('thumbnails')) if not thumbnails: self.to_screen('[info] No thumbnails present for %s' % info_dict['id']) return self.to_screen( '[info] Thumbnails for %s:' % info_dict['id']) - self.to_screen(render_table( + self.to_stdout(render_table( ['ID', 'width', 'height', 'URL'], [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) @@ -2276,10 +3231,17 @@ class YoutubeDL(object): return self.to_screen( 'Available %s for %s:' % (name, video_id)) - self.to_screen(render_table( - ['Language', 'formats'], - [[lang, ', '.join(f['ext'] for f in reversed(formats))] - for lang, formats in subtitles.items()])) + + def _row(lang, formats): + exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats))) + if len(set(names)) == 1: + names = [] if names[0] == 'unknown' else names[:1] + return [lang, ', '.join(names), ', '.join(exts)] + + self.to_stdout(render_table( + ['Language', 'Name', 'Formats'], + [_row(lang, formats) for lang, formats in subtitles.items()], + hideEmpty=True)) def urlopen(self, req): """ Start an HTTP download """ @@ -2290,34 +3252,40 @@ class YoutubeDL(object): def print_debug_header(self): if not self.params.get('verbose'): return - - if type('') is not compat_str: - # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326) - self.report_warning( - 'Your Python is broken! Update to a newer and supported version') - - stdout_encoding = getattr( - sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__) + get_encoding = lambda stream: getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__) encoding_str = ( - '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % ( + '[debug] Encodings: locale %s, fs %s, stdout %s, stderr %s, pref %s\n' % ( locale.getpreferredencoding(), sys.getfilesystemencoding(), - stdout_encoding, + get_encoding(self._screen_file), get_encoding(self._err_file), self.get_encoding())) - write_string(encoding_str, encoding=None) - self._write_string('[debug] hypervideo version ' + __version__ + '\n') + logger = self.params.get('logger') + if logger: + write_debug = lambda msg: logger.debug(f'[debug] {msg}') + write_debug(encoding_str) + else: + write_debug = lambda msg: self._write_string(f'[debug] {msg}') + write_string(encoding_str, encoding=None) + + write_debug('hypervideo version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})')) if _LAZY_LOADER: - self._write_string('[debug] Lazy loading extractors enabled' + '\n') + write_debug('Lazy loading extractors enabled\n') + if plugin_extractors or plugin_postprocessors: + write_debug('Plugins: %s\n' % [ + '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}') + for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())]) + if self.params.get('compat_opts'): + write_debug('Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts'))) try: sp = subprocess.Popen( ['git', 'rev-parse', '--short', 'HEAD'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=os.path.dirname(os.path.abspath(__file__))) - out, err = sp.communicate() + out, err = process_communicate_or_kill(sp) out = out.decode().strip() if re.match('[0-9a-f]+', out): - self._write_string('[debug] Git HEAD: ' + out + '\n') + write_debug('Git HEAD: %s\n' % out) except Exception: try: sys.exc_clear() @@ -2330,31 +3298,46 @@ class YoutubeDL(object): return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3] return impl_name - self._write_string('[debug] Python version %s (%s) - %s\n' % ( - platform.python_version(), python_implementation(), + write_debug('Python version %s (%s %s) - %s\n' % ( + platform.python_version(), + python_implementation(), + platform.architecture()[0], platform_name())) exe_versions = FFmpegPostProcessor.get_versions(self) exe_versions['rtmpdump'] = rtmpdump_version() exe_versions['phantomjs'] = PhantomJSwrapper._version() exe_str = ', '.join( - '%s %s' % (exe, v) - for exe, v in sorted(exe_versions.items()) - if v - ) - if not exe_str: - exe_str = 'none' - self._write_string('[debug] exe versions: %s\n' % exe_str) + f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v + ) or 'none' + write_debug('exe versions: %s\n' % exe_str) + + from .downloader.websocket import has_websockets + from .postprocessor.embedthumbnail import has_mutagen + from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE + + lib_str = ', '.join(sorted(filter(None, ( + compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0], + has_websockets and 'websockets', + has_mutagen and 'mutagen', + SQLITE_AVAILABLE and 'sqlite', + KEYRING_AVAILABLE and 'keyring', + )))) or 'none' + write_debug('Optional libraries: %s\n' % lib_str) + write_debug('ANSI escape support: stdout = %s, stderr = %s\n' % ( + supports_terminal_sequences(self._screen_file), + supports_terminal_sequences(self._err_file))) proxy_map = {} for handler in self._opener.handlers: if hasattr(handler, 'proxies'): proxy_map.update(handler.proxies) - self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n') + write_debug('Proxy map: ' + compat_str(proxy_map) + '\n') if self.params.get('call_home', False): ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8') - self._write_string('[debug] Public IP address: %s\n' % ipaddr) + write_debug('Public IP address: %s\n' % ipaddr) + return latest_version = self.urlopen( 'https://yt-dl.org/latest/version').read().decode('utf-8') if version_tuple(latest_version) > version_tuple(__version__): @@ -2365,18 +3348,13 @@ class YoutubeDL(object): def _setup_opener(self): timeout_val = self.params.get('socket_timeout') - self._socket_timeout = 600 if timeout_val is None else float(timeout_val) + self._socket_timeout = 20 if timeout_val is None else float(timeout_val) + opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser') opts_cookiefile = self.params.get('cookiefile') opts_proxy = self.params.get('proxy') - if opts_cookiefile is None: - self.cookiejar = compat_cookiejar.CookieJar() - else: - opts_cookiefile = expand_path(opts_cookiefile) - self.cookiejar = YoutubeDLCookieJar(opts_cookiefile) - if os.access(opts_cookiefile, os.R_OK): - self.cookiejar.load(ignore_discard=True, ignore_expires=True) + self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self) cookie_processor = YoutubeDLCookieProcessor(self.cookiejar) if opts_proxy is not None: @@ -2432,38 +3410,133 @@ class YoutubeDL(object): encoding = preferredencoding() return encoding - def _write_thumbnails(self, info_dict, filename): - if self.params.get('writethumbnail', False): - thumbnails = info_dict.get('thumbnails') - if thumbnails: - thumbnails = [thumbnails[-1]] - elif self.params.get('write_all_thumbnails', False): - thumbnails = info_dict.get('thumbnails') + def _write_info_json(self, label, ie_result, infofn): + ''' Write infojson and returns True = written, False = skip, None = error ''' + if not self.params.get('writeinfojson'): + return False + elif not infofn: + self.write_debug(f'Skipping writing {label} infojson') + return False + elif not self._ensure_dir_exists(infofn): + return None + elif not self.params.get('overwrites', True) and os.path.exists(infofn): + self.to_screen(f'[info] {label.title()} metadata is already present') else: - return - - if not thumbnails: - # No thumbnails present, so return immediately - return + self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}') + try: + write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn) + except (OSError, IOError): + self.report_error(f'Cannot write {label} metadata to JSON file {infofn}') + return None + return True + + def _write_description(self, label, ie_result, descfn): + ''' Write description and returns True = written, False = skip, None = error ''' + if not self.params.get('writedescription'): + return False + elif not descfn: + self.write_debug(f'Skipping writing {label} description') + return False + elif not self._ensure_dir_exists(descfn): + return None + elif not self.params.get('overwrites', True) and os.path.exists(descfn): + self.to_screen(f'[info] {label.title()} description is already present') + elif ie_result.get('description') is None: + self.report_warning(f'There\'s no {label} description to write') + return False + else: + try: + self.to_screen(f'[info] Writing {label} description to: {descfn}') + with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: + descfile.write(ie_result['description']) + except (OSError, IOError): + self.report_error(f'Cannot write {label} description file {descfn}') + return None + return True + + def _write_subtitles(self, info_dict, filename): + ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error''' + ret = [] + subtitles = info_dict.get('requested_subtitles') + if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): + # subtitles download errors are already managed as troubles in relevant IE + # that way it will silently go on when used with unsupporting IE + return ret + + sub_filename_base = self.prepare_filename(info_dict, 'subtitle') + if not sub_filename_base: + self.to_screen('[info] Skipping writing video subtitles') + return ret + for sub_lang, sub_info in subtitles.items(): + sub_format = sub_info['ext'] + sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) + sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext')) + if not self.params.get('overwrites', True) and os.path.exists(sub_filename): + self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present') + sub_info['filepath'] = sub_filename + ret.append((sub_filename, sub_filename_final)) + continue - for t in thumbnails: - thumb_ext = determine_ext(t['url'], 'jpg') - suffix = '_%s' % t['id'] if len(thumbnails) > 1 else '' - thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else '' - t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext')) + self.to_screen(f'[info] Writing video subtitles to: {sub_filename}') + if sub_info.get('data') is not None: + try: + # Use newline='' to prevent conversion of newline characters + # See https://github.com/ytdl-org/youtube-dl/issues/10268 + with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile: + subfile.write(sub_info['data']) + sub_info['filepath'] = sub_filename + ret.append((sub_filename, sub_filename_final)) + continue + except (OSError, IOError): + self.report_error(f'Cannot write video subtitles file {sub_filename}') + return None - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): - self.to_screen('[%s] %s: Thumbnail %sis already present' % - (info_dict['extractor'], info_dict['id'], thumb_display_id)) + try: + sub_copy = sub_info.copy() + sub_copy.setdefault('http_headers', info_dict.get('http_headers')) + self.dl(sub_filename, sub_copy, subtitle=True) + sub_info['filepath'] = sub_filename + ret.append((sub_filename, sub_filename_final)) + except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: + self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}') + continue + return ret + + def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None): + ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) ''' + write_all = self.params.get('write_all_thumbnails', False) + thumbnails, ret = [], [] + if write_all or self.params.get('writethumbnail', False): + thumbnails = info_dict.get('thumbnails') or [] + multiple = write_all and len(thumbnails) > 1 + + if thumb_filename_base is None: + thumb_filename_base = filename + if thumbnails and not thumb_filename_base: + self.write_debug(f'Skipping writing {label} thumbnail') + return ret + + for t in thumbnails[::-1]: + thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg') + thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '') + thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext')) + thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext')) + + if not self.params.get('overwrites', True) and os.path.exists(thumb_filename): + ret.append((thumb_filename, thumb_filename_final)) + t['filepath'] = thumb_filename + self.to_screen(f'[info] {thumb_display_id.title()} is already present') else: - self.to_screen('[%s] %s: Downloading thumbnail %s...' % - (info_dict['extractor'], info_dict['id'], thumb_display_id)) + self.to_screen(f'[info] Downloading {thumb_display_id} ...') try: uf = self.urlopen(t['url']) + self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}') with open(encodeFilename(thumb_filename), 'wb') as thumbf: shutil.copyfileobj(uf, thumbf) - self.to_screen('[%s] %s: Writing thumbnail %sto: %s' % - (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename)) - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self.report_warning('Unable to download thumbnail "%s": %s' % - (t['url'], error_to_compat_str(err))) + ret.append((thumb_filename, thumb_filename_final)) + t['filepath'] = thumb_filename + except network_exceptions as err: + self.report_warning(f'Unable to download {thumb_display_id}: {err}') + if ret and not write_all: + break + return ret diff --git a/hypervideo_dl/__init__.py b/hypervideo_dl/__init__.py index 70c53fc..d8b7de5 100644 --- a/hypervideo_dl/__init__.py +++ b/hypervideo_dl/__init__.py @@ -1,46 +1,59 @@ -#!/usr/bin/env python +#!/usr/bin/python # coding: utf-8 -from __future__ import unicode_literals - __license__ = 'CC0-1.0' import codecs import io +import itertools import os import random +import re import sys - from .options import ( parseOpts, ) from .compat import ( compat_getpass, - compat_shlex_split, + compat_shlex_quote, workaround_optparse_bug9161, ) +from .cookies import SUPPORTED_BROWSERS from .utils import ( DateRange, decodeOption, - DEFAULT_OUTTMPL, DownloadError, + error_to_compat_str, + ExistingVideoReached, expand_path, match_filter_func, MaxDownloadsReached, + parse_duration, preferredencoding, read_batch_urls, + RejectedVideoReached, + render_table, SameFileError, setproctitle, std_headers, write_string, - render_table, ) from .downloader import ( FileDownloader, ) from .extractor import gen_extractors, list_extractors +from .extractor.common import InfoExtractor from .extractor.adobepass import MSO_INFO +from .postprocessor import ( + FFmpegExtractAudioPP, + FFmpegSubtitlesConvertorPP, + FFmpegThumbnailsConvertorPP, + FFmpegVideoConvertorPP, + FFmpegVideoRemuxerPP, + MetadataFromFieldPP, + MetadataParserPP, +) from .YoutubeDL import YoutubeDL @@ -55,6 +68,7 @@ def _real_main(argv=None): setproctitle('hypervideo') parser, opts, args = parseOpts(argv) + warnings = [] # Set user agent if opts.user_agent is not None: @@ -65,14 +79,7 @@ def _real_main(argv=None): std_headers['Referer'] = opts.referer # Custom HTTP headers - if opts.headers is not None: - for h in opts.headers: - if ':' not in h: - parser.error('wrong header formatting, it should be key:value, not "%s"' % h) - key, value = h.split(':', 1) - if opts.verbose: - write_string('[debug] Adding header from command line option %s:%s\n' % (key, value)) - std_headers[key] = value + std_headers.update(opts.headers) # Dump user agent if opts.dump_user_agent: @@ -100,14 +107,14 @@ def _real_main(argv=None): if opts.list_extractors: for ie in list_extractors(opts.age_limit): - write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout) + write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n', out=sys.stdout) matchedUrls = [url for url in all_urls if ie.suitable(url)] for mu in matchedUrls: write_string(' ' + mu + '\n', out=sys.stdout) sys.exit(0) if opts.list_extractor_descriptions: for ie in list_extractors(opts.age_limit): - if not ie._WORKING: + if not ie.working(): continue desc = getattr(ie, 'IE_DESC', ie.IE_NAME) if desc is False: @@ -130,16 +137,12 @@ def _real_main(argv=None): parser.error('account username missing\n') if opts.ap_password is not None and opts.ap_username is None: parser.error('TV Provider account username missing\n') - if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid): - parser.error('using output template conflicts with using title, video ID or auto number') if opts.autonumber_size is not None: if opts.autonumber_size <= 0: parser.error('auto number size must be positive') if opts.autonumber_start is not None: if opts.autonumber_start < 0: parser.error('auto number start must be positive or 0') - if opts.usetitle and opts.useid: - parser.error('using title conflicts with using video ID') if opts.username is not None and opts.password is None: opts.password = compat_getpass('Type account password and press [Return]: ') if opts.ap_username is not None and opts.ap_password is None: @@ -149,6 +152,11 @@ def _real_main(argv=None): if numeric_limit is None: parser.error('invalid rate limit specified') opts.ratelimit = numeric_limit + if opts.throttledratelimit is not None: + numeric_limit = FileDownloader.parse_bytes(opts.throttledratelimit) + if numeric_limit is None: + parser.error('invalid rate limit specified') + opts.throttledratelimit = numeric_limit if opts.min_filesize is not None: numeric_limit = FileDownloader.parse_bytes(opts.min_filesize) if numeric_limit is None: @@ -171,22 +179,34 @@ def _real_main(argv=None): parser.error('max sleep interval must be greater than or equal to min sleep interval') else: opts.max_sleep_interval = opts.sleep_interval + if opts.sleep_interval_subtitles is not None: + if opts.sleep_interval_subtitles < 0: + parser.error('subtitles sleep interval must be positive or 0') + if opts.sleep_interval_requests is not None: + if opts.sleep_interval_requests < 0: + parser.error('requests sleep interval must be positive or 0') if opts.ap_mso and opts.ap_mso not in MSO_INFO: parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers') + if opts.overwrites: # --yes-overwrites implies --no-continue + opts.continue_dl = False + if opts.concurrent_fragment_downloads <= 0: + raise ValueError('Concurrent fragments must be positive') - def parse_retries(retries): + def parse_retries(retries, name=''): if retries in ('inf', 'infinite'): parsed_retries = float('inf') else: try: parsed_retries = int(retries) except (TypeError, ValueError): - parser.error('invalid retry count specified') + parser.error('invalid %sretry count specified' % name) return parsed_retries if opts.retries is not None: opts.retries = parse_retries(opts.retries) if opts.fragment_retries is not None: - opts.fragment_retries = parse_retries(opts.fragment_retries) + opts.fragment_retries = parse_retries(opts.fragment_retries, 'fragment ') + if opts.extractor_retries is not None: + opts.extractor_retries = parse_retries(opts.extractor_retries, 'extractor ') if opts.buffersize is not None: numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) if numeric_buffersize is None: @@ -202,56 +222,239 @@ def _real_main(argv=None): if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart: raise ValueError('Playlist end must be greater than playlist start') if opts.extractaudio: - if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']: + if opts.audioformat not in ['best'] + list(FFmpegExtractAudioPP.SUPPORTED_EXTS): parser.error('invalid audio format specified') if opts.audioquality: opts.audioquality = opts.audioquality.strip('k').strip('K') if not opts.audioquality.isdigit(): parser.error('invalid audio quality specified') if opts.recodevideo is not None: - if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']: - parser.error('invalid video recode format specified') + opts.recodevideo = opts.recodevideo.replace(' ', '') + if not re.match(FFmpegVideoConvertorPP.FORMAT_RE, opts.recodevideo): + parser.error('invalid video remux format specified') + if opts.remuxvideo is not None: + opts.remuxvideo = opts.remuxvideo.replace(' ', '') + if not re.match(FFmpegVideoRemuxerPP.FORMAT_RE, opts.remuxvideo): + parser.error('invalid video remux format specified') if opts.convertsubtitles is not None: - if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']: + if opts.convertsubtitles not in FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS: parser.error('invalid subtitle format specified') + if opts.convertthumbnails is not None: + if opts.convertthumbnails not in FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS: + parser.error('invalid thumbnail format specified') + + if opts.cookiesfrombrowser is not None: + opts.cookiesfrombrowser = [ + part.strip() or None for part in opts.cookiesfrombrowser.split(':', 1)] + if opts.cookiesfrombrowser[0].lower() not in SUPPORTED_BROWSERS: + parser.error('unsupported browser specified for cookies') if opts.date is not None: date = DateRange.day(opts.date) else: date = DateRange(opts.dateafter, opts.datebefore) - # Do not download videos when there are audio-only formats + compat_opts = opts.compat_opts + + def _unused_compat_opt(name): + if name not in compat_opts: + return False + compat_opts.discard(name) + compat_opts.update(['*%s' % name]) + return True + + def set_default_compat(compat_name, opt_name, default=True, remove_compat=True): + attr = getattr(opts, opt_name) + if compat_name in compat_opts: + if attr is None: + setattr(opts, opt_name, not default) + return True + else: + if remove_compat: + _unused_compat_opt(compat_name) + return False + elif attr is None: + setattr(opts, opt_name, default) + return None + + set_default_compat('abort-on-error', 'ignoreerrors', 'only_download') + set_default_compat('no-playlist-metafiles', 'allow_playlist_files') + set_default_compat('no-clean-infojson', 'clean_infojson') + if 'format-sort' in compat_opts: + opts.format_sort.extend(InfoExtractor.FormatSort.ytdl_default) + _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) + _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) + if _video_multistreams_set is False and _audio_multistreams_set is False: + _unused_compat_opt('multistreams') + outtmpl_default = opts.outtmpl.get('default') + if 'filename' in compat_opts: + if outtmpl_default is None: + outtmpl_default = '%(title)s-%(id)s.%(ext)s' + opts.outtmpl.update({'default': outtmpl_default}) + else: + _unused_compat_opt('filename') + + def validate_outtmpl(tmpl, msg): + err = YoutubeDL.validate_outtmpl(tmpl) + if err: + parser.error('invalid %s %r: %s' % (msg, tmpl, error_to_compat_str(err))) + + for k, tmpl in opts.outtmpl.items(): + validate_outtmpl(tmpl, f'{k} output template') + opts.forceprint = opts.forceprint or [] + for tmpl in opts.forceprint or []: + validate_outtmpl(tmpl, 'print template') + validate_outtmpl(opts.sponsorblock_chapter_title, 'SponsorBlock chapter title') + for k, tmpl in opts.progress_template.items(): + k = f'{k[:-6]} console title' if '-title' in k else f'{k} progress' + validate_outtmpl(tmpl, f'{k} template') + if opts.extractaudio and not opts.keepvideo and opts.format is None: opts.format = 'bestaudio/best' - # --all-sub automatically sets --write-sub if --write-auto-sub is not given - # this was the old behaviour if only --all-sub was given. - if opts.allsubtitles and not opts.writeautomaticsub: - opts.writesubtitles = True - - outtmpl = ((opts.outtmpl is not None and opts.outtmpl) - or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') - or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') - or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') - or (opts.usetitle and '%(title)s-%(id)s.%(ext)s') - or (opts.useid and '%(id)s.%(ext)s') - or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') - or DEFAULT_OUTTMPL) - if not os.path.splitext(outtmpl)[1] and opts.extractaudio: + if outtmpl_default is not None and not os.path.splitext(outtmpl_default)[1] and opts.extractaudio: parser.error('Cannot download a video and extract audio into the same' ' file! Use "{0}.%(ext)s" instead of "{0}" as the output' - ' template'.format(outtmpl)) + ' template'.format(outtmpl_default)) + + for f in opts.format_sort: + if re.match(InfoExtractor.FormatSort.regex, f) is None: + parser.error('invalid format sort string "%s" specified' % f) + + def metadataparser_actions(f): + if isinstance(f, str): + cmd = '--parse-metadata %s' % compat_shlex_quote(f) + try: + actions = [MetadataFromFieldPP.to_action(f)] + except Exception as err: + parser.error(f'{cmd} is invalid; {err}') + else: + cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f)) + actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(',')) - any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json + for action in actions: + try: + MetadataParserPP.validate_action(*action) + except Exception as err: + parser.error(f'{cmd} is invalid; {err}') + yield action + + if opts.parse_metadata is None: + opts.parse_metadata = [] + if opts.metafromtitle is not None: + opts.parse_metadata.append('title:%s' % opts.metafromtitle) + opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, opts.parse_metadata))) + + any_getting = opts.forceprint or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json any_printing = opts.print_json download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive + # If JSON is not printed anywhere, but comments are requested, save it to file + printing_json = opts.dumpjson or opts.print_json or opts.dump_single_json + if opts.getcomments and not printing_json: + opts.writeinfojson = True + + if opts.no_sponsorblock: + opts.sponsorblock_mark = set() + opts.sponsorblock_remove = set() + sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove + + if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None: + opts.addchapters = True + opts.remove_chapters = opts.remove_chapters or [] + + def report_conflict(arg1, arg2): + warnings.append('%s is ignored since %s was given' % (arg2, arg1)) + + if (opts.remove_chapters or sponsorblock_query) and opts.sponskrub is not False: + if opts.sponskrub: + if opts.remove_chapters: + report_conflict('--remove-chapters', '--sponskrub') + if opts.sponsorblock_mark: + report_conflict('--sponsorblock-mark', '--sponskrub') + if opts.sponsorblock_remove: + report_conflict('--sponsorblock-remove', '--sponskrub') + opts.sponskrub = False + if opts.sponskrub_cut and opts.split_chapters and opts.sponskrub is not False: + report_conflict('--split-chapter', '--sponskrub-cut') + opts.sponskrub_cut = False + + if opts.remuxvideo and opts.recodevideo: + report_conflict('--recode-video', '--remux-video') + opts.remuxvideo = False + + if opts.allow_unplayable_formats: + if opts.extractaudio: + report_conflict('--allow-unplayable-formats', '--extract-audio') + opts.extractaudio = False + if opts.remuxvideo: + report_conflict('--allow-unplayable-formats', '--remux-video') + opts.remuxvideo = False + if opts.recodevideo: + report_conflict('--allow-unplayable-formats', '--recode-video') + opts.recodevideo = False + if opts.addmetadata: + report_conflict('--allow-unplayable-formats', '--add-metadata') + opts.addmetadata = False + if opts.embedsubtitles: + report_conflict('--allow-unplayable-formats', '--embed-subs') + opts.embedsubtitles = False + if opts.embedthumbnail: + report_conflict('--allow-unplayable-formats', '--embed-thumbnail') + opts.embedthumbnail = False + if opts.xattrs: + report_conflict('--allow-unplayable-formats', '--xattrs') + opts.xattrs = False + if opts.fixup and opts.fixup.lower() not in ('never', 'ignore'): + report_conflict('--allow-unplayable-formats', '--fixup') + opts.fixup = 'never' + if opts.remove_chapters: + report_conflict('--allow-unplayable-formats', '--remove-chapters') + opts.remove_chapters = [] + if opts.sponsorblock_remove: + report_conflict('--allow-unplayable-formats', '--sponsorblock-remove') + opts.sponsorblock_remove = set() + if opts.sponskrub: + report_conflict('--allow-unplayable-formats', '--sponskrub') + opts.sponskrub = False + # PostProcessors - postprocessors = [] - if opts.metafromtitle: + postprocessors = list(opts.add_postprocessors) + if sponsorblock_query: postprocessors.append({ - 'key': 'MetadataFromTitle', - 'titleformat': opts.metafromtitle + 'key': 'SponsorBlock', + 'categories': sponsorblock_query, + 'api': opts.sponsorblock_api, + # Run this immediately after extraction is complete + 'when': 'pre_process' + }) + if opts.parse_metadata: + postprocessors.append({ + 'key': 'MetadataParser', + 'actions': opts.parse_metadata, + # Run this immediately after extraction is complete + 'when': 'pre_process' + }) + if opts.convertsubtitles: + postprocessors.append({ + 'key': 'FFmpegSubtitlesConvertor', + 'format': opts.convertsubtitles, + # Run this before the actual video download + 'when': 'before_dl' + }) + if opts.convertthumbnails: + postprocessors.append({ + 'key': 'FFmpegThumbnailsConvertor', + 'format': opts.convertthumbnails, + # Run this before the actual video download + 'when': 'before_dl' + }) + # Must be after all other before_dl + if opts.exec_before_dl_cmd: + postprocessors.append({ + 'key': 'Exec', + 'exec_cmd': opts.exec_before_dl_cmd, + 'when': 'before_dl' }) if opts.extractaudio: postprocessors.append({ @@ -260,61 +463,129 @@ def _real_main(argv=None): 'preferredquality': opts.audioquality, 'nopostoverwrites': opts.nopostoverwrites, }) + if opts.remuxvideo: + postprocessors.append({ + 'key': 'FFmpegVideoRemuxer', + 'preferedformat': opts.remuxvideo, + }) if opts.recodevideo: postprocessors.append({ 'key': 'FFmpegVideoConvertor', 'preferedformat': opts.recodevideo, }) + # If ModifyChapters is going to remove chapters, subtitles must already be in the container. + if opts.embedsubtitles: + already_have_subtitle = opts.writesubtitles and 'no-keep-subs' not in compat_opts + postprocessors.append({ + 'key': 'FFmpegEmbedSubtitle', + # already_have_subtitle = True prevents the file from being deleted after embedding + 'already_have_subtitle': already_have_subtitle + }) + if not opts.writeautomaticsub and 'no-keep-subs' not in compat_opts: + opts.writesubtitles = True + # --all-sub automatically sets --write-sub if --write-auto-sub is not given + # this was the old behaviour if only --all-sub was given. + if opts.allsubtitles and not opts.writeautomaticsub: + opts.writesubtitles = True + # ModifyChapters must run before FFmpegMetadataPP + remove_chapters_patterns, remove_ranges = [], [] + for regex in opts.remove_chapters: + if regex.startswith('*'): + dur = list(map(parse_duration, regex[1:].split('-'))) + if len(dur) == 2 and all(t is not None for t in dur): + remove_ranges.append(tuple(dur)) + continue + parser.error(f'invalid --remove-chapters time range {regex!r}. Must be of the form ?start-end') + try: + remove_chapters_patterns.append(re.compile(regex)) + except re.error as err: + parser.error(f'invalid --remove-chapters regex {regex!r} - {err}') + if opts.remove_chapters or sponsorblock_query: + postprocessors.append({ + 'key': 'ModifyChapters', + 'remove_chapters_patterns': remove_chapters_patterns, + 'remove_sponsor_segments': opts.sponsorblock_remove, + 'remove_ranges': remove_ranges, + 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, + 'force_keyframes': opts.force_keyframes_at_cuts + }) # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and # FFmpegExtractAudioPP as containers before conversion may not support # metadata (3gp, webm, etc.) - # And this post-processor should be placed before other metadata - # manipulating post-processors (FFmpegEmbedSubtitle) to prevent loss of - # extra metadata. By default ffmpeg preserves metadata applicable for both + # By default ffmpeg preserves metadata applicable for both # source and target containers. From this point the container won't change, # so metadata can be added here. - if opts.addmetadata: - postprocessors.append({'key': 'FFmpegMetadata'}) - if opts.convertsubtitles: + if opts.addmetadata or opts.addchapters: postprocessors.append({ - 'key': 'FFmpegSubtitlesConvertor', - 'format': opts.convertsubtitles, + 'key': 'FFmpegMetadata', + 'add_chapters': opts.addchapters, + 'add_metadata': opts.addmetadata, }) - if opts.embedsubtitles: + # Note: Deprecated + # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment + # but must be below EmbedSubtitle and FFmpegMetadata + # See https://github.com/hypervideo/hypervideo/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29 + # If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found + if opts.sponskrub is not False: postprocessors.append({ - 'key': 'FFmpegEmbedSubtitle', + 'key': 'SponSkrub', + 'path': opts.sponskrub_path, + 'args': opts.sponskrub_args, + 'cut': opts.sponskrub_cut, + 'force': opts.sponskrub_force, + 'ignoreerror': opts.sponskrub is None, }) if opts.embedthumbnail: already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails postprocessors.append({ 'key': 'EmbedThumbnail', + # already_have_thumbnail = True prevents the file from being deleted after embedding 'already_have_thumbnail': already_have_thumbnail }) if not already_have_thumbnail: opts.writethumbnail = True - # XAttrMetadataPP should be run after post-processors that may change file - # contents + opts.outtmpl['pl_thumbnail'] = '' + if opts.split_chapters: + postprocessors.append({ + 'key': 'FFmpegSplitChapters', + 'force_keyframes': opts.force_keyframes_at_cuts, + }) + # XAttrMetadataPP should be run after post-processors that may change file contents if opts.xattrs: postprocessors.append({'key': 'XAttrMetadata'}) - # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way. - # So if the user is able to remove the file before your postprocessor runs it might cause a few problems. + # Exec must be the last PP if opts.exec_cmd: postprocessors.append({ - 'key': 'ExecAfterDownload', + 'key': 'Exec', 'exec_cmd': opts.exec_cmd, + # Run this only after the files have been moved to their final locations + 'when': 'after_move' }) - external_downloader_args = None - if opts.external_downloader_args: - external_downloader_args = compat_shlex_split(opts.external_downloader_args) - postprocessor_args = None - if opts.postprocessor_args: - postprocessor_args = compat_shlex_split(opts.postprocessor_args) + + def report_args_compat(arg, name): + warnings.append('%s given without specifying name. The arguments will be given to all %s' % (arg, name)) + + if 'default' in opts.external_downloader_args: + report_args_compat('--downloader-args', 'external downloaders') + + if 'default-compat' in opts.postprocessor_args and 'default' not in opts.postprocessor_args: + report_args_compat('--post-processor-args', 'post-processors') + opts.postprocessor_args.setdefault('sponskrub', []) + opts.postprocessor_args['default'] = opts.postprocessor_args['default-compat'] + + final_ext = ( + opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS + else opts.remuxvideo if opts.remuxvideo in FFmpegVideoRemuxerPP.SUPPORTED_EXTS + else opts.audioformat if (opts.extractaudio and opts.audioformat != 'best') + else None) + match_filter = ( None if opts.match_filter is None else match_filter_func(opts.match_filter)) ydl_opts = { 'usenetrc': opts.usenetrc, + 'netrc_location': opts.netrc_location, 'username': opts.username, 'password': opts.password, 'twofactor': opts.twofactor, @@ -332,45 +603,68 @@ def _real_main(argv=None): 'forceduration': opts.getduration, 'forcefilename': opts.getfilename, 'forceformat': opts.getformat, + 'forceprint': opts.forceprint, 'forcejson': opts.dumpjson or opts.print_json, 'dump_single_json': opts.dump_single_json, - 'simulate': opts.simulate or any_getting, + 'force_write_download_archive': opts.force_write_download_archive, + 'simulate': (any_getting or None) if opts.simulate is None else opts.simulate, 'skip_download': opts.skip_download, 'format': opts.format, + 'allow_unplayable_formats': opts.allow_unplayable_formats, + 'ignore_no_formats_error': opts.ignore_no_formats_error, + 'format_sort': opts.format_sort, + 'format_sort_force': opts.format_sort_force, + 'allow_multiple_video_streams': opts.allow_multiple_video_streams, + 'allow_multiple_audio_streams': opts.allow_multiple_audio_streams, + 'check_formats': opts.check_formats, 'listformats': opts.listformats, - 'outtmpl': outtmpl, + 'listformats_table': opts.listformats_table, + 'outtmpl': opts.outtmpl, 'outtmpl_na_placeholder': opts.outtmpl_na_placeholder, + 'paths': opts.paths, 'autonumber_size': opts.autonumber_size, 'autonumber_start': opts.autonumber_start, 'restrictfilenames': opts.restrictfilenames, + 'windowsfilenames': opts.windowsfilenames, 'ignoreerrors': opts.ignoreerrors, 'force_generic_extractor': opts.force_generic_extractor, 'ratelimit': opts.ratelimit, - 'nooverwrites': opts.nooverwrites, + 'throttledratelimit': opts.throttledratelimit, + 'overwrites': opts.overwrites, 'retries': opts.retries, 'fragment_retries': opts.fragment_retries, + 'extractor_retries': opts.extractor_retries, 'skip_unavailable_fragments': opts.skip_unavailable_fragments, 'keep_fragments': opts.keep_fragments, + 'concurrent_fragment_downloads': opts.concurrent_fragment_downloads, 'buffersize': opts.buffersize, 'noresizebuffer': opts.noresizebuffer, 'http_chunk_size': opts.http_chunk_size, 'continuedl': opts.continue_dl, - 'noprogress': opts.noprogress, + 'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress, 'progress_with_newline': opts.progress_with_newline, + 'progress_template': opts.progress_template, 'playliststart': opts.playliststart, 'playlistend': opts.playlistend, 'playlistreverse': opts.playlist_reverse, 'playlistrandom': opts.playlist_random, 'noplaylist': opts.noplaylist, - 'logtostderr': opts.outtmpl == '-', + 'logtostderr': outtmpl_default == '-', 'consoletitle': opts.consoletitle, 'nopart': opts.nopart, 'updatetime': opts.updatetime, 'writedescription': opts.writedescription, 'writeannotations': opts.writeannotations, 'writeinfojson': opts.writeinfojson, + 'allow_playlist_files': opts.allow_playlist_files, + 'clean_infojson': opts.clean_infojson, + 'getcomments': opts.getcomments, 'writethumbnail': opts.writethumbnail, 'write_all_thumbnails': opts.write_all_thumbnails, + 'writelink': opts.writelink, + 'writeurllink': opts.writeurllink, + 'writewebloclink': opts.writewebloclink, + 'writedesktoplink': opts.writedesktoplink, 'writesubtitles': opts.writesubtitles, 'writeautomaticsub': opts.writeautomaticsub, 'allsubtitles': opts.allsubtitles, @@ -381,6 +675,7 @@ def _real_main(argv=None): 'rejecttitle': decodeOption(opts.rejecttitle), 'max_downloads': opts.max_downloads, 'prefer_free_formats': opts.prefer_free_formats, + 'trim_file_name': opts.trim_file_name, 'verbose': opts.verbose, 'dump_intermediate_pages': opts.dump_intermediate_pages, 'write_pages': opts.write_pages, @@ -395,7 +690,11 @@ def _real_main(argv=None): 'youtube_print_sig_code': opts.youtube_print_sig_code, 'age_limit': opts.age_limit, 'download_archive': download_archive_fn, + 'break_on_existing': opts.break_on_existing, + 'break_on_reject': opts.break_on_reject, + 'skip_playlist_after_errors': opts.skip_playlist_after_errors, 'cookiefile': opts.cookiefile, + 'cookiesfrombrowser': opts.cookiesfrombrowser, 'nocheckcertificate': opts.no_check_certificate, 'prefer_insecure': opts.prefer_insecure, 'proxy': opts.proxy, @@ -405,17 +704,23 @@ def _real_main(argv=None): 'prefer_ffmpeg': opts.prefer_ffmpeg, 'include_ads': opts.include_ads, 'default_search': opts.default_search, + 'dynamic_mpd': opts.dynamic_mpd, + 'extractor_args': opts.extractor_args, 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest, + 'youtube_include_hls_manifest': opts.youtube_include_hls_manifest, 'encoding': opts.encoding, 'extract_flat': opts.extract_flat, 'mark_watched': opts.mark_watched, 'merge_output_format': opts.merge_output_format, + 'final_ext': final_ext, 'postprocessors': postprocessors, 'fixup': opts.fixup, 'source_address': opts.source_address, 'call_home': opts.call_home, + 'sleep_interval_requests': opts.sleep_interval_requests, 'sleep_interval': opts.sleep_interval, 'max_sleep_interval': opts.max_sleep_interval, + 'sleep_interval_subtitles': opts.sleep_interval_subtitles, 'external_downloader': opts.external_downloader, 'list_thumbnails': opts.list_thumbnails, 'playlist_items': opts.playlist_items, @@ -425,28 +730,27 @@ def _real_main(argv=None): 'ffmpeg_location': opts.ffmpeg_location, 'hls_prefer_native': opts.hls_prefer_native, 'hls_use_mpegts': opts.hls_use_mpegts, - 'external_downloader_args': external_downloader_args, - 'postprocessor_args': postprocessor_args, + 'hls_split_discontinuity': opts.hls_split_discontinuity, + 'external_downloader_args': opts.external_downloader_args, + 'postprocessor_args': opts.postprocessor_args, 'cn_verification_proxy': opts.cn_verification_proxy, 'geo_verification_proxy': opts.geo_verification_proxy, - 'config_location': opts.config_location, 'geo_bypass': opts.geo_bypass, 'geo_bypass_country': opts.geo_bypass_country, 'geo_bypass_ip_block': opts.geo_bypass_ip_block, - # just for deprecation check - 'autonumber': opts.autonumber if opts.autonumber is True else None, - 'usetitle': opts.usetitle if opts.usetitle is True else None, + 'warnings': warnings, + 'compat_opts': compat_opts, } with YoutubeDL(ydl_opts) as ydl: + actual_use = len(all_urls) or opts.load_info_filename # Remove cache dir if opts.rm_cachedir: ydl.cache.remove() # Maybe do nothing - if (len(all_urls) < 1) and (opts.load_info_filename is None): - + if not actual_use: ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv) parser.error( 'You must provide at least one URL.\n' @@ -457,8 +761,8 @@ def _real_main(argv=None): retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename)) else: retcode = ydl.download(all_urls) - except MaxDownloadsReached: - ydl.to_screen('--max-download limit reached, aborting.') + except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached): + ydl.to_screen('Aborting remaining downloads') retcode = 101 sys.exit(retcode) @@ -473,6 +777,11 @@ def main(argv=None): sys.exit('ERROR: fixed output name but more than one file to download') except KeyboardInterrupt: sys.exit('\nERROR: Interrupted by user') + except BrokenPipeError: + # https://docs.python.org/3/library/signal.html#note-on-sigpipe + devnull = os.open(os.devnull, os.O_WRONLY) + os.dup2(devnull, sys.stdout.fileno()) + sys.exit(r'\nERROR: {err}') __all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors'] diff --git a/hypervideo_dl/__main__.py b/hypervideo_dl/__main__.py index e3b35e2..49765e4 100755 --- a/hypervideo_dl/__main__.py +++ b/hypervideo_dl/__main__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 from __future__ import unicode_literals # Execute with diff --git a/hypervideo_dl/aes.py b/hypervideo_dl/aes.py index 461bb6d..60cdeb7 100644 --- a/hypervideo_dl/aes.py +++ b/hypervideo_dl/aes.py @@ -2,36 +2,68 @@ from __future__ import unicode_literals from math import ceil -from .compat import compat_b64decode +from .compat import compat_b64decode, compat_pycrypto_AES from .utils import bytes_to_intlist, intlist_to_bytes + +if compat_pycrypto_AES: + def aes_cbc_decrypt_bytes(data, key, iv): + """ Decrypt bytes with AES-CBC using pycryptodome """ + return compat_pycrypto_AES.new(key, compat_pycrypto_AES.MODE_CBC, iv).decrypt(data) + + def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): + """ Decrypt bytes with AES-GCM using pycryptodome """ + return compat_pycrypto_AES.new(key, compat_pycrypto_AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) + +else: + def aes_cbc_decrypt_bytes(data, key, iv): + """ Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """ + return intlist_to_bytes(aes_cbc_decrypt(*map(bytes_to_intlist, (data, key, iv)))) + + def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): + """ Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """ + return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce)))) + + BLOCK_SIZE_BYTES = 16 -def aes_ctr_decrypt(data, key, counter): +def aes_ctr_decrypt(data, key, iv): """ Decrypt with aes in counter mode @param {int[]} data cipher @param {int[]} key 16/24/32-Byte cipher key - @param {instance} counter Instance whose next_value function (@returns {int[]} 16-Byte block) - returns the next counter block + @param {int[]} iv 16-Byte initialization vector @returns {int[]} decrypted data """ + return aes_ctr_encrypt(data, key, iv) + + +def aes_ctr_encrypt(data, key, iv): + """ + Encrypt with aes in counter mode + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv 16-Byte initialization vector + @returns {int[]} encrypted data + """ expanded_key = key_expansion(key) block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + counter = iter_vector(iv) - decrypted_data = [] + encrypted_data = [] for i in range(block_count): - counter_block = counter.next_value() + counter_block = next(counter) block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] block += [0] * (BLOCK_SIZE_BYTES - len(block)) cipher_counter_block = aes_encrypt(counter_block, expanded_key) - decrypted_data += xor(block, cipher_counter_block) - decrypted_data = decrypted_data[:len(data)] + encrypted_data += xor(block, cipher_counter_block) + encrypted_data = encrypted_data[:len(data)] - return decrypted_data + return encrypted_data def aes_cbc_decrypt(data, key, iv): @@ -88,39 +120,47 @@ def aes_cbc_encrypt(data, key, iv): return encrypted_data -def key_expansion(data): +def aes_gcm_decrypt_and_verify(data, key, tag, nonce): """ - Generate key schedule + Decrypt with aes in GBM mode and checks authenticity using tag - @param {int[]} data 16/24/32-Byte cipher key - @returns {int[]} 176/208/240-Byte expanded key + @param {int[]} data cipher + @param {int[]} key 16-Byte cipher key + @param {int[]} tag authentication tag + @param {int[]} nonce IV (recommended 12-Byte) + @returns {int[]} decrypted data """ - data = data[:] # copy - rcon_iteration = 1 - key_size_bytes = len(data) - expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES - while len(data) < expanded_key_size_bytes: - temp = data[-4:] - temp = key_schedule_core(temp, rcon_iteration) - rcon_iteration += 1 - data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + # XXX: check aes, gcm param - for _ in range(3): - temp = data[-4:] - data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key)) - if key_size_bytes == 32: - temp = data[-4:] - temp = sub_bytes(temp) - data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + if len(nonce) == 12: + j0 = nonce + [0, 0, 0, 1] + else: + fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8 + ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big')) + j0 = ghash(hash_subkey, ghash_in) - for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): - temp = data[-4:] - data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) - data = data[:expanded_key_size_bytes] + # TODO: add nonce support to aes_ctr_decrypt - return data + # nonce_ctr = j0[:12] + iv_ctr = inc(j0) + + decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr))) + pad_len = len(data) // 16 * 16 + s_tag = ghash( + hash_subkey, + data + + [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad + + bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data + + ((len(data) * 8).to_bytes(8, 'big'))) # length of data + ) + + if tag != aes_ctr_encrypt(s_tag, key, j0): + raise ValueError("Mismatching authentication tag") + + return decrypted_data def aes_encrypt(data, expanded_key): @@ -138,7 +178,7 @@ def aes_encrypt(data, expanded_key): data = sub_bytes(data) data = shift_rows(data) if i != rounds: - data = mix_columns(data) + data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX)) data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) return data @@ -157,7 +197,7 @@ def aes_decrypt(data, expanded_key): for i in range(rounds, 0, -1): data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) if i != rounds: - data = mix_columns_inv(data) + data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV)) data = shift_rows_inv(data) data = sub_bytes_inv(data) data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) @@ -189,15 +229,7 @@ def aes_decrypt_text(data, password, key_size_bytes): nonce = data[:NONCE_LENGTH_BYTES] cipher = data[NONCE_LENGTH_BYTES:] - class Counter(object): - __value = nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES) - - def next_value(self): - temp = self.__value - self.__value = inc(self.__value) - return temp - - decrypted_data = aes_ctr_decrypt(cipher, key, Counter()) + decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)) plaintext = intlist_to_bytes(decrypted_data) return plaintext @@ -278,6 +310,47 @@ RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07) +def key_expansion(data): + """ + Generate key schedule + + @param {int[]} data 16/24/32-Byte cipher key + @returns {int[]} 176/208/240-Byte expanded key + """ + data = data[:] # copy + rcon_iteration = 1 + key_size_bytes = len(data) + expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES + + while len(data) < expanded_key_size_bytes: + temp = data[-4:] + temp = key_schedule_core(temp, rcon_iteration) + rcon_iteration += 1 + data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + + for _ in range(3): + temp = data[-4:] + data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + + if key_size_bytes == 32: + temp = data[-4:] + temp = sub_bytes(temp) + data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + + for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): + temp = data[-4:] + data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + data = data[:expanded_key_size_bytes] + + return data + + +def iter_vector(iv): + while True: + yield iv + iv = inc(iv) + + def sub_bytes(data): return [SBOX[x] for x in data] @@ -302,48 +375,36 @@ def xor(data1, data2): return [x ^ y for x, y in zip(data1, data2)] -def rijndael_mul(a, b): - if(a == 0 or b == 0): - return 0 - return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF] +def iter_mix_columns(data, matrix): + for i in (0, 4, 8, 12): + for row in matrix: + mixed = 0 + for j in range(4): + # xor is (+) and (-) + mixed ^= (0 if data[i:i + 4][j] == 0 or row[j] == 0 else + RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[data[i + j]] + RIJNDAEL_LOG_TABLE[row[j]]) % 0xFF]) + yield mixed -def mix_column(data, matrix): - data_mixed = [] - for row in range(4): - mixed = 0 - for column in range(4): - # xor is (+) and (-) - mixed ^= rijndael_mul(data[column], matrix[row][column]) - data_mixed.append(mixed) - return data_mixed - - -def mix_columns(data, matrix=MIX_COLUMN_MATRIX): - data_mixed = [] - for i in range(4): - column = data[i * 4: (i + 1) * 4] - data_mixed += mix_column(column, matrix) - return data_mixed +def shift_rows(data): + return [data[((column + row) & 0b11) * 4 + row] for column in range(4) for row in range(4)] -def mix_columns_inv(data): - return mix_columns(data, MIX_COLUMN_MATRIX_INV) +def shift_rows_inv(data): + return [data[((column - row) & 0b11) * 4 + row] for column in range(4) for row in range(4)] -def shift_rows(data): +def shift_block(data): data_shifted = [] - for column in range(4): - for row in range(4): - data_shifted.append(data[((column + row) & 0b11) * 4 + row]) - return data_shifted + bit = 0 + for n in data: + if bit: + n |= 0x100 + bit = n & 1 + n >>= 1 + data_shifted.append(n) -def shift_rows_inv(data): - data_shifted = [] - for column in range(4): - for row in range(4): - data_shifted.append(data[((column - row) & 0b11) * 4 + row]) return data_shifted @@ -358,4 +419,50 @@ def inc(data): return data -__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text'] +def block_product(block_x, block_y): + # NIST SP 800-38D, Algorithm 1 + + if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES: + raise ValueError("Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES) + + block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1) + block_v = block_y[:] + block_z = [0] * BLOCK_SIZE_BYTES + + for i in block_x: + for bit in range(7, -1, -1): + if i & (1 << bit): + block_z = xor(block_z, block_v) + + do_xor = block_v[-1] & 1 + block_v = shift_block(block_v) + if do_xor: + block_v = xor(block_v, block_r) + + return block_z + + +def ghash(subkey, data): + # NIST SP 800-38D, Algorithm 2 + + if len(data) % BLOCK_SIZE_BYTES: + raise ValueError("Length of data should be %d bytes" % BLOCK_SIZE_BYTES) + + last_y = [0] * BLOCK_SIZE_BYTES + for i in range(0, len(data), BLOCK_SIZE_BYTES): + block = data[i : i + BLOCK_SIZE_BYTES] # noqa: E203 + last_y = block_product(xor(last_y, block), subkey) + + return last_y + + +__all__ = [ + 'aes_ctr_decrypt', + 'aes_cbc_decrypt', + 'aes_cbc_decrypt_bytes', + 'aes_decrypt_text', + 'aes_encrypt', + 'aes_gcm_decrypt_and_verify', + 'aes_gcm_decrypt_and_verify_bytes', + 'key_expansion' +] diff --git a/hypervideo_dl/cache.py b/hypervideo_dl/cache.py index 81cd297..24acb1b 100644 --- a/hypervideo_dl/cache.py +++ b/hypervideo_dl/cache.py @@ -50,6 +50,7 @@ class Cache(object): except OSError as ose: if ose.errno != errno.EEXIST: raise + self._ydl.write_debug(f'Saving {section}.{key} to cache') write_json_file(data, fn) except Exception: tb = traceback.format_exc() @@ -66,6 +67,7 @@ class Cache(object): try: try: with io.open(cache_fn, 'r', encoding='utf-8') as cachef: + self._ydl.write_debug(f'Loading {section}.{key} from cache') return json.load(cachef) except ValueError: try: diff --git a/hypervideo_dl/compat.py b/hypervideo_dl/compat.py index 97ab37a..5e0e5d8 100644 --- a/hypervideo_dl/compat.py +++ b/hypervideo_dl/compat.py @@ -1,2526 +1,42 @@ # coding: utf-8 -from __future__ import unicode_literals +import asyncio import base64 -import binascii -import collections import ctypes -import email import getpass -import io +import html +import html.parser +import http +import http.client +import http.cookiejar +import http.cookies +import http.server import itertools import optparse import os -import platform import re import shlex import shutil import socket import struct -import subprocess import sys -import xml.etree.ElementTree +import tokenize +import urllib +import xml.etree.ElementTree as etree +from subprocess import DEVNULL -try: - import urllib.request as compat_urllib_request -except ImportError: # Python 2 - import urllib2 as compat_urllib_request - -try: - import urllib.error as compat_urllib_error -except ImportError: # Python 2 - import urllib2 as compat_urllib_error - -try: - import urllib.parse as compat_urllib_parse -except ImportError: # Python 2 - import urllib as compat_urllib_parse - -try: - from urllib.parse import urlparse as compat_urllib_parse_urlparse -except ImportError: # Python 2 - from urlparse import urlparse as compat_urllib_parse_urlparse - -try: - import urllib.parse as compat_urlparse -except ImportError: # Python 2 - import urlparse as compat_urlparse - -try: - import urllib.response as compat_urllib_response -except ImportError: # Python 2 - import urllib as compat_urllib_response - -try: - import http.cookiejar as compat_cookiejar -except ImportError: # Python 2 - import cookielib as compat_cookiejar - -if sys.version_info[0] == 2: - class compat_cookiejar_Cookie(compat_cookiejar.Cookie): - def __init__(self, version, name, value, *args, **kwargs): - if isinstance(name, compat_str): - name = name.encode() - if isinstance(value, compat_str): - value = value.encode() - compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs) -else: - compat_cookiejar_Cookie = compat_cookiejar.Cookie - -try: - import http.cookies as compat_cookies -except ImportError: # Python 2 - import Cookie as compat_cookies - -if sys.version_info[0] == 2: - class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie): - def load(self, rawdata): - if isinstance(rawdata, compat_str): - rawdata = str(rawdata) - return super(compat_cookies_SimpleCookie, self).load(rawdata) -else: - compat_cookies_SimpleCookie = compat_cookies.SimpleCookie - -try: - import html.entities as compat_html_entities -except ImportError: # Python 2 - import htmlentitydefs as compat_html_entities - -try: # Python >= 3.3 - compat_html_entities_html5 = compat_html_entities.html5 -except AttributeError: - # Copied from CPython 3.5.1 html/entities.py - compat_html_entities_html5 = { - 'Aacute': '\xc1', - 'aacute': '\xe1', - 'Aacute;': '\xc1', - 'aacute;': '\xe1', - 'Abreve;': '\u0102', - 'abreve;': '\u0103', - 'ac;': '\u223e', - 'acd;': '\u223f', - 'acE;': '\u223e\u0333', - 'Acirc': '\xc2', - 'acirc': '\xe2', - 'Acirc;': '\xc2', - 'acirc;': '\xe2', - 'acute': '\xb4', - 'acute;': '\xb4', - 'Acy;': '\u0410', - 'acy;': '\u0430', - 'AElig': '\xc6', - 'aelig': '\xe6', - 'AElig;': '\xc6', - 'aelig;': '\xe6', - 'af;': '\u2061', - 'Afr;': '\U0001d504', - 'afr;': '\U0001d51e', - 'Agrave': '\xc0', - 'agrave': '\xe0', - 'Agrave;': '\xc0', - 'agrave;': '\xe0', - 'alefsym;': '\u2135', - 'aleph;': '\u2135', - 'Alpha;': '\u0391', - 'alpha;': '\u03b1', - 'Amacr;': '\u0100', - 'amacr;': '\u0101', - 'amalg;': '\u2a3f', - 'AMP': '&', - 'amp': '&', - 'AMP;': '&', - 'amp;': '&', - 'And;': '\u2a53', - 'and;': '\u2227', - 'andand;': '\u2a55', - 'andd;': '\u2a5c', - 'andslope;': '\u2a58', - 'andv;': '\u2a5a', - 'ang;': '\u2220', - 'ange;': '\u29a4', - 'angle;': '\u2220', - 'angmsd;': '\u2221', - 'angmsdaa;': '\u29a8', - 'angmsdab;': '\u29a9', - 'angmsdac;': '\u29aa', - 'angmsdad;': '\u29ab', - 'angmsdae;': '\u29ac', - 'angmsdaf;': '\u29ad', - 'angmsdag;': '\u29ae', - 'angmsdah;': '\u29af', - 'angrt;': '\u221f', - 'angrtvb;': '\u22be', - 'angrtvbd;': '\u299d', - 'angsph;': '\u2222', - 'angst;': '\xc5', - 'angzarr;': '\u237c', - 'Aogon;': '\u0104', - 'aogon;': '\u0105', - 'Aopf;': '\U0001d538', - 'aopf;': '\U0001d552', - 'ap;': '\u2248', - 'apacir;': '\u2a6f', - 'apE;': '\u2a70', - 'ape;': '\u224a', - 'apid;': '\u224b', - 'apos;': "'", - 'ApplyFunction;': '\u2061', - 'approx;': '\u2248', - 'approxeq;': '\u224a', - 'Aring': '\xc5', - 'aring': '\xe5', - 'Aring;': '\xc5', - 'aring;': '\xe5', - 'Ascr;': '\U0001d49c', - 'ascr;': '\U0001d4b6', - 'Assign;': '\u2254', - 'ast;': '*', - 'asymp;': '\u2248', - 'asympeq;': '\u224d', - 'Atilde': '\xc3', - 'atilde': '\xe3', - 'Atilde;': '\xc3', - 'atilde;': '\xe3', - 'Auml': '\xc4', - 'auml': '\xe4', - 'Auml;': '\xc4', - 'auml;': '\xe4', - 'awconint;': '\u2233', - 'awint;': '\u2a11', - 'backcong;': '\u224c', - 'backepsilon;': '\u03f6', - 'backprime;': '\u2035', - 'backsim;': '\u223d', - 'backsimeq;': '\u22cd', - 'Backslash;': '\u2216', - 'Barv;': '\u2ae7', - 'barvee;': '\u22bd', - 'Barwed;': '\u2306', - 'barwed;': '\u2305', - 'barwedge;': '\u2305', - 'bbrk;': '\u23b5', - 'bbrktbrk;': '\u23b6', - 'bcong;': '\u224c', - 'Bcy;': '\u0411', - 'bcy;': '\u0431', - 'bdquo;': '\u201e', - 'becaus;': '\u2235', - 'Because;': '\u2235', - 'because;': '\u2235', - 'bemptyv;': '\u29b0', - 'bepsi;': '\u03f6', - 'bernou;': '\u212c', - 'Bernoullis;': '\u212c', - 'Beta;': '\u0392', - 'beta;': '\u03b2', - 'beth;': '\u2136', - 'between;': '\u226c', - 'Bfr;': '\U0001d505', - 'bfr;': '\U0001d51f', - 'bigcap;': '\u22c2', - 'bigcirc;': '\u25ef', - 'bigcup;': '\u22c3', - 'bigodot;': '\u2a00', - 'bigoplus;': '\u2a01', - 'bigotimes;': '\u2a02', - 'bigsqcup;': '\u2a06', - 'bigstar;': '\u2605', - 'bigtriangledown;': '\u25bd', - 'bigtriangleup;': '\u25b3', - 'biguplus;': '\u2a04', - 'bigvee;': '\u22c1', - 'bigwedge;': '\u22c0', - 'bkarow;': '\u290d', - 'blacklozenge;': '\u29eb', - 'blacksquare;': '\u25aa', - 'blacktriangle;': '\u25b4', - 'blacktriangledown;': '\u25be', - 'blacktriangleleft;': '\u25c2', - 'blacktriangleright;': '\u25b8', - 'blank;': '\u2423', - 'blk12;': '\u2592', - 'blk14;': '\u2591', - 'blk34;': '\u2593', - 'block;': '\u2588', - 'bne;': '=\u20e5', - 'bnequiv;': '\u2261\u20e5', - 'bNot;': '\u2aed', - 'bnot;': '\u2310', - 'Bopf;': '\U0001d539', - 'bopf;': '\U0001d553', - 'bot;': '\u22a5', - 'bottom;': '\u22a5', - 'bowtie;': '\u22c8', - 'boxbox;': '\u29c9', - 'boxDL;': '\u2557', - 'boxDl;': '\u2556', - 'boxdL;': '\u2555', - 'boxdl;': '\u2510', - 'boxDR;': '\u2554', - 'boxDr;': '\u2553', - 'boxdR;': '\u2552', - 'boxdr;': '\u250c', - 'boxH;': '\u2550', - 'boxh;': '\u2500', - 'boxHD;': '\u2566', - 'boxHd;': '\u2564', - 'boxhD;': '\u2565', - 'boxhd;': '\u252c', - 'boxHU;': '\u2569', - 'boxHu;': '\u2567', - 'boxhU;': '\u2568', - 'boxhu;': '\u2534', - 'boxminus;': '\u229f', - 'boxplus;': '\u229e', - 'boxtimes;': '\u22a0', - 'boxUL;': '\u255d', - 'boxUl;': '\u255c', - 'boxuL;': '\u255b', - 'boxul;': '\u2518', - 'boxUR;': '\u255a', - 'boxUr;': '\u2559', - 'boxuR;': '\u2558', - 'boxur;': '\u2514', - 'boxV;': '\u2551', - 'boxv;': '\u2502', - 'boxVH;': '\u256c', - 'boxVh;': '\u256b', - 'boxvH;': '\u256a', - 'boxvh;': '\u253c', - 'boxVL;': '\u2563', - 'boxVl;': '\u2562', - 'boxvL;': '\u2561', - 'boxvl;': '\u2524', - 'boxVR;': '\u2560', - 'boxVr;': '\u255f', - 'boxvR;': '\u255e', - 'boxvr;': '\u251c', - 'bprime;': '\u2035', - 'Breve;': '\u02d8', - 'breve;': '\u02d8', - 'brvbar': '\xa6', - 'brvbar;': '\xa6', - 'Bscr;': '\u212c', - 'bscr;': '\U0001d4b7', - 'bsemi;': '\u204f', - 'bsim;': '\u223d', - 'bsime;': '\u22cd', - 'bsol;': '\\', - 'bsolb;': '\u29c5', - 'bsolhsub;': '\u27c8', - 'bull;': '\u2022', - 'bullet;': '\u2022', - 'bump;': '\u224e', - 'bumpE;': '\u2aae', - 'bumpe;': '\u224f', - 'Bumpeq;': '\u224e', - 'bumpeq;': '\u224f', - 'Cacute;': '\u0106', - 'cacute;': '\u0107', - 'Cap;': '\u22d2', - 'cap;': '\u2229', - 'capand;': '\u2a44', - 'capbrcup;': '\u2a49', - 'capcap;': '\u2a4b', - 'capcup;': '\u2a47', - 'capdot;': '\u2a40', - 'CapitalDifferentialD;': '\u2145', - 'caps;': '\u2229\ufe00', - 'caret;': '\u2041', - 'caron;': '\u02c7', - 'Cayleys;': '\u212d', - 'ccaps;': '\u2a4d', - 'Ccaron;': '\u010c', - 'ccaron;': '\u010d', - 'Ccedil': '\xc7', - 'ccedil': '\xe7', - 'Ccedil;': '\xc7', - 'ccedil;': '\xe7', - 'Ccirc;': '\u0108', - 'ccirc;': '\u0109', - 'Cconint;': '\u2230', - 'ccups;': '\u2a4c', - 'ccupssm;': '\u2a50', - 'Cdot;': '\u010a', - 'cdot;': '\u010b', - 'cedil': '\xb8', - 'cedil;': '\xb8', - 'Cedilla;': '\xb8', - 'cemptyv;': '\u29b2', - 'cent': '\xa2', - 'cent;': '\xa2', - 'CenterDot;': '\xb7', - 'centerdot;': '\xb7', - 'Cfr;': '\u212d', - 'cfr;': '\U0001d520', - 'CHcy;': '\u0427', - 'chcy;': '\u0447', - 'check;': '\u2713', - 'checkmark;': '\u2713', - 'Chi;': '\u03a7', - 'chi;': '\u03c7', - 'cir;': '\u25cb', - 'circ;': '\u02c6', - 'circeq;': '\u2257', - 'circlearrowleft;': '\u21ba', - 'circlearrowright;': '\u21bb', - 'circledast;': '\u229b', - 'circledcirc;': '\u229a', - 'circleddash;': '\u229d', - 'CircleDot;': '\u2299', - 'circledR;': '\xae', - 'circledS;': '\u24c8', - 'CircleMinus;': '\u2296', - 'CirclePlus;': '\u2295', - 'CircleTimes;': '\u2297', - 'cirE;': '\u29c3', - 'cire;': '\u2257', - 'cirfnint;': '\u2a10', - 'cirmid;': '\u2aef', - 'cirscir;': '\u29c2', - 'ClockwiseContourIntegral;': '\u2232', - 'CloseCurlyDoubleQuote;': '\u201d', - 'CloseCurlyQuote;': '\u2019', - 'clubs;': '\u2663', - 'clubsuit;': '\u2663', - 'Colon;': '\u2237', - 'colon;': ':', - 'Colone;': '\u2a74', - 'colone;': '\u2254', - 'coloneq;': '\u2254', - 'comma;': ',', - 'commat;': '@', - 'comp;': '\u2201', - 'compfn;': '\u2218', - 'complement;': '\u2201', - 'complexes;': '\u2102', - 'cong;': '\u2245', - 'congdot;': '\u2a6d', - 'Congruent;': '\u2261', - 'Conint;': '\u222f', - 'conint;': '\u222e', - 'ContourIntegral;': '\u222e', - 'Copf;': '\u2102', - 'copf;': '\U0001d554', - 'coprod;': '\u2210', - 'Coproduct;': '\u2210', - 'COPY': '\xa9', - 'copy': '\xa9', - 'COPY;': '\xa9', - 'copy;': '\xa9', - 'copysr;': '\u2117', - 'CounterClockwiseContourIntegral;': '\u2233', - 'crarr;': '\u21b5', - 'Cross;': '\u2a2f', - 'cross;': '\u2717', - 'Cscr;': '\U0001d49e', - 'cscr;': '\U0001d4b8', - 'csub;': '\u2acf', - 'csube;': '\u2ad1', - 'csup;': '\u2ad0', - 'csupe;': '\u2ad2', - 'ctdot;': '\u22ef', - 'cudarrl;': '\u2938', - 'cudarrr;': '\u2935', - 'cuepr;': '\u22de', - 'cuesc;': '\u22df', - 'cularr;': '\u21b6', - 'cularrp;': '\u293d', - 'Cup;': '\u22d3', - 'cup;': '\u222a', - 'cupbrcap;': '\u2a48', - 'CupCap;': '\u224d', - 'cupcap;': '\u2a46', - 'cupcup;': '\u2a4a', - 'cupdot;': '\u228d', - 'cupor;': '\u2a45', - 'cups;': '\u222a\ufe00', - 'curarr;': '\u21b7', - 'curarrm;': '\u293c', - 'curlyeqprec;': '\u22de', - 'curlyeqsucc;': '\u22df', - 'curlyvee;': '\u22ce', - 'curlywedge;': '\u22cf', - 'curren': '\xa4', - 'curren;': '\xa4', - 'curvearrowleft;': '\u21b6', - 'curvearrowright;': '\u21b7', - 'cuvee;': '\u22ce', - 'cuwed;': '\u22cf', - 'cwconint;': '\u2232', - 'cwint;': '\u2231', - 'cylcty;': '\u232d', - 'Dagger;': '\u2021', - 'dagger;': '\u2020', - 'daleth;': '\u2138', - 'Darr;': '\u21a1', - 'dArr;': '\u21d3', - 'darr;': '\u2193', - 'dash;': '\u2010', - 'Dashv;': '\u2ae4', - 'dashv;': '\u22a3', - 'dbkarow;': '\u290f', - 'dblac;': '\u02dd', - 'Dcaron;': '\u010e', - 'dcaron;': '\u010f', - 'Dcy;': '\u0414', - 'dcy;': '\u0434', - 'DD;': '\u2145', - 'dd;': '\u2146', - 'ddagger;': '\u2021', - 'ddarr;': '\u21ca', - 'DDotrahd;': '\u2911', - 'ddotseq;': '\u2a77', - 'deg': '\xb0', - 'deg;': '\xb0', - 'Del;': '\u2207', - 'Delta;': '\u0394', - 'delta;': '\u03b4', - 'demptyv;': '\u29b1', - 'dfisht;': '\u297f', - 'Dfr;': '\U0001d507', - 'dfr;': '\U0001d521', - 'dHar;': '\u2965', - 'dharl;': '\u21c3', - 'dharr;': '\u21c2', - 'DiacriticalAcute;': '\xb4', - 'DiacriticalDot;': '\u02d9', - 'DiacriticalDoubleAcute;': '\u02dd', - 'DiacriticalGrave;': '`', - 'DiacriticalTilde;': '\u02dc', - 'diam;': '\u22c4', - 'Diamond;': '\u22c4', - 'diamond;': '\u22c4', - 'diamondsuit;': '\u2666', - 'diams;': '\u2666', - 'die;': '\xa8', - 'DifferentialD;': '\u2146', - 'digamma;': '\u03dd', - 'disin;': '\u22f2', - 'div;': '\xf7', - 'divide': '\xf7', - 'divide;': '\xf7', - 'divideontimes;': '\u22c7', - 'divonx;': '\u22c7', - 'DJcy;': '\u0402', - 'djcy;': '\u0452', - 'dlcorn;': '\u231e', - 'dlcrop;': '\u230d', - 'dollar;': '$', - 'Dopf;': '\U0001d53b', - 'dopf;': '\U0001d555', - 'Dot;': '\xa8', - 'dot;': '\u02d9', - 'DotDot;': '\u20dc', - 'doteq;': '\u2250', - 'doteqdot;': '\u2251', - 'DotEqual;': '\u2250', - 'dotminus;': '\u2238', - 'dotplus;': '\u2214', - 'dotsquare;': '\u22a1', - 'doublebarwedge;': '\u2306', - 'DoubleContourIntegral;': '\u222f', - 'DoubleDot;': '\xa8', - 'DoubleDownArrow;': '\u21d3', - 'DoubleLeftArrow;': '\u21d0', - 'DoubleLeftRightArrow;': '\u21d4', - 'DoubleLeftTee;': '\u2ae4', - 'DoubleLongLeftArrow;': '\u27f8', - 'DoubleLongLeftRightArrow;': '\u27fa', - 'DoubleLongRightArrow;': '\u27f9', - 'DoubleRightArrow;': '\u21d2', - 'DoubleRightTee;': '\u22a8', - 'DoubleUpArrow;': '\u21d1', - 'DoubleUpDownArrow;': '\u21d5', - 'DoubleVerticalBar;': '\u2225', - 'DownArrow;': '\u2193', - 'Downarrow;': '\u21d3', - 'downarrow;': '\u2193', - 'DownArrowBar;': '\u2913', - 'DownArrowUpArrow;': '\u21f5', - 'DownBreve;': '\u0311', - 'downdownarrows;': '\u21ca', - 'downharpoonleft;': '\u21c3', - 'downharpoonright;': '\u21c2', - 'DownLeftRightVector;': '\u2950', - 'DownLeftTeeVector;': '\u295e', - 'DownLeftVector;': '\u21bd', - 'DownLeftVectorBar;': '\u2956', - 'DownRightTeeVector;': '\u295f', - 'DownRightVector;': '\u21c1', - 'DownRightVectorBar;': '\u2957', - 'DownTee;': '\u22a4', - 'DownTeeArrow;': '\u21a7', - 'drbkarow;': '\u2910', - 'drcorn;': '\u231f', - 'drcrop;': '\u230c', - 'Dscr;': '\U0001d49f', - 'dscr;': '\U0001d4b9', - 'DScy;': '\u0405', - 'dscy;': '\u0455', - 'dsol;': '\u29f6', - 'Dstrok;': '\u0110', - 'dstrok;': '\u0111', - 'dtdot;': '\u22f1', - 'dtri;': '\u25bf', - 'dtrif;': '\u25be', - 'duarr;': '\u21f5', - 'duhar;': '\u296f', - 'dwangle;': '\u29a6', - 'DZcy;': '\u040f', - 'dzcy;': '\u045f', - 'dzigrarr;': '\u27ff', - 'Eacute': '\xc9', - 'eacute': '\xe9', - 'Eacute;': '\xc9', - 'eacute;': '\xe9', - 'easter;': '\u2a6e', - 'Ecaron;': '\u011a', - 'ecaron;': '\u011b', - 'ecir;': '\u2256', - 'Ecirc': '\xca', - 'ecirc': '\xea', - 'Ecirc;': '\xca', - 'ecirc;': '\xea', - 'ecolon;': '\u2255', - 'Ecy;': '\u042d', - 'ecy;': '\u044d', - 'eDDot;': '\u2a77', - 'Edot;': '\u0116', - 'eDot;': '\u2251', - 'edot;': '\u0117', - 'ee;': '\u2147', - 'efDot;': '\u2252', - 'Efr;': '\U0001d508', - 'efr;': '\U0001d522', - 'eg;': '\u2a9a', - 'Egrave': '\xc8', - 'egrave': '\xe8', - 'Egrave;': '\xc8', - 'egrave;': '\xe8', - 'egs;': '\u2a96', - 'egsdot;': '\u2a98', - 'el;': '\u2a99', - 'Element;': '\u2208', - 'elinters;': '\u23e7', - 'ell;': '\u2113', - 'els;': '\u2a95', - 'elsdot;': '\u2a97', - 'Emacr;': '\u0112', - 'emacr;': '\u0113', - 'empty;': '\u2205', - 'emptyset;': '\u2205', - 'EmptySmallSquare;': '\u25fb', - 'emptyv;': '\u2205', - 'EmptyVerySmallSquare;': '\u25ab', - 'emsp13;': '\u2004', - 'emsp14;': '\u2005', - 'emsp;': '\u2003', - 'ENG;': '\u014a', - 'eng;': '\u014b', - 'ensp;': '\u2002', - 'Eogon;': '\u0118', - 'eogon;': '\u0119', - 'Eopf;': '\U0001d53c', - 'eopf;': '\U0001d556', - 'epar;': '\u22d5', - 'eparsl;': '\u29e3', - 'eplus;': '\u2a71', - 'epsi;': '\u03b5', - 'Epsilon;': '\u0395', - 'epsilon;': '\u03b5', - 'epsiv;': '\u03f5', - 'eqcirc;': '\u2256', - 'eqcolon;': '\u2255', - 'eqsim;': '\u2242', - 'eqslantgtr;': '\u2a96', - 'eqslantless;': '\u2a95', - 'Equal;': '\u2a75', - 'equals;': '=', - 'EqualTilde;': '\u2242', - 'equest;': '\u225f', - 'Equilibrium;': '\u21cc', - 'equiv;': '\u2261', - 'equivDD;': '\u2a78', - 'eqvparsl;': '\u29e5', - 'erarr;': '\u2971', - 'erDot;': '\u2253', - 'Escr;': '\u2130', - 'escr;': '\u212f', - 'esdot;': '\u2250', - 'Esim;': '\u2a73', - 'esim;': '\u2242', - 'Eta;': '\u0397', - 'eta;': '\u03b7', - 'ETH': '\xd0', - 'eth': '\xf0', - 'ETH;': '\xd0', - 'eth;': '\xf0', - 'Euml': '\xcb', - 'euml': '\xeb', - 'Euml;': '\xcb', - 'euml;': '\xeb', - 'euro;': '\u20ac', - 'excl;': '!', - 'exist;': '\u2203', - 'Exists;': '\u2203', - 'expectation;': '\u2130', - 'ExponentialE;': '\u2147', - 'exponentiale;': '\u2147', - 'fallingdotseq;': '\u2252', - 'Fcy;': '\u0424', - 'fcy;': '\u0444', - 'female;': '\u2640', - 'ffilig;': '\ufb03', - 'fflig;': '\ufb00', - 'ffllig;': '\ufb04', - 'Ffr;': '\U0001d509', - 'ffr;': '\U0001d523', - 'filig;': '\ufb01', - 'FilledSmallSquare;': '\u25fc', - 'FilledVerySmallSquare;': '\u25aa', - 'fjlig;': 'fj', - 'flat;': '\u266d', - 'fllig;': '\ufb02', - 'fltns;': '\u25b1', - 'fnof;': '\u0192', - 'Fopf;': '\U0001d53d', - 'fopf;': '\U0001d557', - 'ForAll;': '\u2200', - 'forall;': '\u2200', - 'fork;': '\u22d4', - 'forkv;': '\u2ad9', - 'Fouriertrf;': '\u2131', - 'fpartint;': '\u2a0d', - 'frac12': '\xbd', - 'frac12;': '\xbd', - 'frac13;': '\u2153', - 'frac14': '\xbc', - 'frac14;': '\xbc', - 'frac15;': '\u2155', - 'frac16;': '\u2159', - 'frac18;': '\u215b', - 'frac23;': '\u2154', - 'frac25;': '\u2156', - 'frac34': '\xbe', - 'frac34;': '\xbe', - 'frac35;': '\u2157', - 'frac38;': '\u215c', - 'frac45;': '\u2158', - 'frac56;': '\u215a', - 'frac58;': '\u215d', - 'frac78;': '\u215e', - 'frasl;': '\u2044', - 'frown;': '\u2322', - 'Fscr;': '\u2131', - 'fscr;': '\U0001d4bb', - 'gacute;': '\u01f5', - 'Gamma;': '\u0393', - 'gamma;': '\u03b3', - 'Gammad;': '\u03dc', - 'gammad;': '\u03dd', - 'gap;': '\u2a86', - 'Gbreve;': '\u011e', - 'gbreve;': '\u011f', - 'Gcedil;': '\u0122', - 'Gcirc;': '\u011c', - 'gcirc;': '\u011d', - 'Gcy;': '\u0413', - 'gcy;': '\u0433', - 'Gdot;': '\u0120', - 'gdot;': '\u0121', - 'gE;': '\u2267', - 'ge;': '\u2265', - 'gEl;': '\u2a8c', - 'gel;': '\u22db', - 'geq;': '\u2265', - 'geqq;': '\u2267', - 'geqslant;': '\u2a7e', - 'ges;': '\u2a7e', - 'gescc;': '\u2aa9', - 'gesdot;': '\u2a80', - 'gesdoto;': '\u2a82', - 'gesdotol;': '\u2a84', - 'gesl;': '\u22db\ufe00', - 'gesles;': '\u2a94', - 'Gfr;': '\U0001d50a', - 'gfr;': '\U0001d524', - 'Gg;': '\u22d9', - 'gg;': '\u226b', - 'ggg;': '\u22d9', - 'gimel;': '\u2137', - 'GJcy;': '\u0403', - 'gjcy;': '\u0453', - 'gl;': '\u2277', - 'gla;': '\u2aa5', - 'glE;': '\u2a92', - 'glj;': '\u2aa4', - 'gnap;': '\u2a8a', - 'gnapprox;': '\u2a8a', - 'gnE;': '\u2269', - 'gne;': '\u2a88', - 'gneq;': '\u2a88', - 'gneqq;': '\u2269', - 'gnsim;': '\u22e7', - 'Gopf;': '\U0001d53e', - 'gopf;': '\U0001d558', - 'grave;': '`', - 'GreaterEqual;': '\u2265', - 'GreaterEqualLess;': '\u22db', - 'GreaterFullEqual;': '\u2267', - 'GreaterGreater;': '\u2aa2', - 'GreaterLess;': '\u2277', - 'GreaterSlantEqual;': '\u2a7e', - 'GreaterTilde;': '\u2273', - 'Gscr;': '\U0001d4a2', - 'gscr;': '\u210a', - 'gsim;': '\u2273', - 'gsime;': '\u2a8e', - 'gsiml;': '\u2a90', - 'GT': '>', - 'gt': '>', - 'GT;': '>', - 'Gt;': '\u226b', - 'gt;': '>', - 'gtcc;': '\u2aa7', - 'gtcir;': '\u2a7a', - 'gtdot;': '\u22d7', - 'gtlPar;': '\u2995', - 'gtquest;': '\u2a7c', - 'gtrapprox;': '\u2a86', - 'gtrarr;': '\u2978', - 'gtrdot;': '\u22d7', - 'gtreqless;': '\u22db', - 'gtreqqless;': '\u2a8c', - 'gtrless;': '\u2277', - 'gtrsim;': '\u2273', - 'gvertneqq;': '\u2269\ufe00', - 'gvnE;': '\u2269\ufe00', - 'Hacek;': '\u02c7', - 'hairsp;': '\u200a', - 'half;': '\xbd', - 'hamilt;': '\u210b', - 'HARDcy;': '\u042a', - 'hardcy;': '\u044a', - 'hArr;': '\u21d4', - 'harr;': '\u2194', - 'harrcir;': '\u2948', - 'harrw;': '\u21ad', - 'Hat;': '^', - 'hbar;': '\u210f', - 'Hcirc;': '\u0124', - 'hcirc;': '\u0125', - 'hearts;': '\u2665', - 'heartsuit;': '\u2665', - 'hellip;': '\u2026', - 'hercon;': '\u22b9', - 'Hfr;': '\u210c', - 'hfr;': '\U0001d525', - 'HilbertSpace;': '\u210b', - 'hksearow;': '\u2925', - 'hkswarow;': '\u2926', - 'hoarr;': '\u21ff', - 'homtht;': '\u223b', - 'hookleftarrow;': '\u21a9', - 'hookrightarrow;': '\u21aa', - 'Hopf;': '\u210d', - 'hopf;': '\U0001d559', - 'horbar;': '\u2015', - 'HorizontalLine;': '\u2500', - 'Hscr;': '\u210b', - 'hscr;': '\U0001d4bd', - 'hslash;': '\u210f', - 'Hstrok;': '\u0126', - 'hstrok;': '\u0127', - 'HumpDownHump;': '\u224e', - 'HumpEqual;': '\u224f', - 'hybull;': '\u2043', - 'hyphen;': '\u2010', - 'Iacute': '\xcd', - 'iacute': '\xed', - 'Iacute;': '\xcd', - 'iacute;': '\xed', - 'ic;': '\u2063', - 'Icirc': '\xce', - 'icirc': '\xee', - 'Icirc;': '\xce', - 'icirc;': '\xee', - 'Icy;': '\u0418', - 'icy;': '\u0438', - 'Idot;': '\u0130', - 'IEcy;': '\u0415', - 'iecy;': '\u0435', - 'iexcl': '\xa1', - 'iexcl;': '\xa1', - 'iff;': '\u21d4', - 'Ifr;': '\u2111', - 'ifr;': '\U0001d526', - 'Igrave': '\xcc', - 'igrave': '\xec', - 'Igrave;': '\xcc', - 'igrave;': '\xec', - 'ii;': '\u2148', - 'iiiint;': '\u2a0c', - 'iiint;': '\u222d', - 'iinfin;': '\u29dc', - 'iiota;': '\u2129', - 'IJlig;': '\u0132', - 'ijlig;': '\u0133', - 'Im;': '\u2111', - 'Imacr;': '\u012a', - 'imacr;': '\u012b', - 'image;': '\u2111', - 'ImaginaryI;': '\u2148', - 'imagline;': '\u2110', - 'imagpart;': '\u2111', - 'imath;': '\u0131', - 'imof;': '\u22b7', - 'imped;': '\u01b5', - 'Implies;': '\u21d2', - 'in;': '\u2208', - 'incare;': '\u2105', - 'infin;': '\u221e', - 'infintie;': '\u29dd', - 'inodot;': '\u0131', - 'Int;': '\u222c', - 'int;': '\u222b', - 'intcal;': '\u22ba', - 'integers;': '\u2124', - 'Integral;': '\u222b', - 'intercal;': '\u22ba', - 'Intersection;': '\u22c2', - 'intlarhk;': '\u2a17', - 'intprod;': '\u2a3c', - 'InvisibleComma;': '\u2063', - 'InvisibleTimes;': '\u2062', - 'IOcy;': '\u0401', - 'iocy;': '\u0451', - 'Iogon;': '\u012e', - 'iogon;': '\u012f', - 'Iopf;': '\U0001d540', - 'iopf;': '\U0001d55a', - 'Iota;': '\u0399', - 'iota;': '\u03b9', - 'iprod;': '\u2a3c', - 'iquest': '\xbf', - 'iquest;': '\xbf', - 'Iscr;': '\u2110', - 'iscr;': '\U0001d4be', - 'isin;': '\u2208', - 'isindot;': '\u22f5', - 'isinE;': '\u22f9', - 'isins;': '\u22f4', - 'isinsv;': '\u22f3', - 'isinv;': '\u2208', - 'it;': '\u2062', - 'Itilde;': '\u0128', - 'itilde;': '\u0129', - 'Iukcy;': '\u0406', - 'iukcy;': '\u0456', - 'Iuml': '\xcf', - 'iuml': '\xef', - 'Iuml;': '\xcf', - 'iuml;': '\xef', - 'Jcirc;': '\u0134', - 'jcirc;': '\u0135', - 'Jcy;': '\u0419', - 'jcy;': '\u0439', - 'Jfr;': '\U0001d50d', - 'jfr;': '\U0001d527', - 'jmath;': '\u0237', - 'Jopf;': '\U0001d541', - 'jopf;': '\U0001d55b', - 'Jscr;': '\U0001d4a5', - 'jscr;': '\U0001d4bf', - 'Jsercy;': '\u0408', - 'jsercy;': '\u0458', - 'Jukcy;': '\u0404', - 'jukcy;': '\u0454', - 'Kappa;': '\u039a', - 'kappa;': '\u03ba', - 'kappav;': '\u03f0', - 'Kcedil;': '\u0136', - 'kcedil;': '\u0137', - 'Kcy;': '\u041a', - 'kcy;': '\u043a', - 'Kfr;': '\U0001d50e', - 'kfr;': '\U0001d528', - 'kgreen;': '\u0138', - 'KHcy;': '\u0425', - 'khcy;': '\u0445', - 'KJcy;': '\u040c', - 'kjcy;': '\u045c', - 'Kopf;': '\U0001d542', - 'kopf;': '\U0001d55c', - 'Kscr;': '\U0001d4a6', - 'kscr;': '\U0001d4c0', - 'lAarr;': '\u21da', - 'Lacute;': '\u0139', - 'lacute;': '\u013a', - 'laemptyv;': '\u29b4', - 'lagran;': '\u2112', - 'Lambda;': '\u039b', - 'lambda;': '\u03bb', - 'Lang;': '\u27ea', - 'lang;': '\u27e8', - 'langd;': '\u2991', - 'langle;': '\u27e8', - 'lap;': '\u2a85', - 'Laplacetrf;': '\u2112', - 'laquo': '\xab', - 'laquo;': '\xab', - 'Larr;': '\u219e', - 'lArr;': '\u21d0', - 'larr;': '\u2190', - 'larrb;': '\u21e4', - 'larrbfs;': '\u291f', - 'larrfs;': '\u291d', - 'larrhk;': '\u21a9', - 'larrlp;': '\u21ab', - 'larrpl;': '\u2939', - 'larrsim;': '\u2973', - 'larrtl;': '\u21a2', - 'lat;': '\u2aab', - 'lAtail;': '\u291b', - 'latail;': '\u2919', - 'late;': '\u2aad', - 'lates;': '\u2aad\ufe00', - 'lBarr;': '\u290e', - 'lbarr;': '\u290c', - 'lbbrk;': '\u2772', - 'lbrace;': '{', - 'lbrack;': '[', - 'lbrke;': '\u298b', - 'lbrksld;': '\u298f', - 'lbrkslu;': '\u298d', - 'Lcaron;': '\u013d', - 'lcaron;': '\u013e', - 'Lcedil;': '\u013b', - 'lcedil;': '\u013c', - 'lceil;': '\u2308', - 'lcub;': '{', - 'Lcy;': '\u041b', - 'lcy;': '\u043b', - 'ldca;': '\u2936', - 'ldquo;': '\u201c', - 'ldquor;': '\u201e', - 'ldrdhar;': '\u2967', - 'ldrushar;': '\u294b', - 'ldsh;': '\u21b2', - 'lE;': '\u2266', - 'le;': '\u2264', - 'LeftAngleBracket;': '\u27e8', - 'LeftArrow;': '\u2190', - 'Leftarrow;': '\u21d0', - 'leftarrow;': '\u2190', - 'LeftArrowBar;': '\u21e4', - 'LeftArrowRightArrow;': '\u21c6', - 'leftarrowtail;': '\u21a2', - 'LeftCeiling;': '\u2308', - 'LeftDoubleBracket;': '\u27e6', - 'LeftDownTeeVector;': '\u2961', - 'LeftDownVector;': '\u21c3', - 'LeftDownVectorBar;': '\u2959', - 'LeftFloor;': '\u230a', - 'leftharpoondown;': '\u21bd', - 'leftharpoonup;': '\u21bc', - 'leftleftarrows;': '\u21c7', - 'LeftRightArrow;': '\u2194', - 'Leftrightarrow;': '\u21d4', - 'leftrightarrow;': '\u2194', - 'leftrightarrows;': '\u21c6', - 'leftrightharpoons;': '\u21cb', - 'leftrightsquigarrow;': '\u21ad', - 'LeftRightVector;': '\u294e', - 'LeftTee;': '\u22a3', - 'LeftTeeArrow;': '\u21a4', - 'LeftTeeVector;': '\u295a', - 'leftthreetimes;': '\u22cb', - 'LeftTriangle;': '\u22b2', - 'LeftTriangleBar;': '\u29cf', - 'LeftTriangleEqual;': '\u22b4', - 'LeftUpDownVector;': '\u2951', - 'LeftUpTeeVector;': '\u2960', - 'LeftUpVector;': '\u21bf', - 'LeftUpVectorBar;': '\u2958', - 'LeftVector;': '\u21bc', - 'LeftVectorBar;': '\u2952', - 'lEg;': '\u2a8b', - 'leg;': '\u22da', - 'leq;': '\u2264', - 'leqq;': '\u2266', - 'leqslant;': '\u2a7d', - 'les;': '\u2a7d', - 'lescc;': '\u2aa8', - 'lesdot;': '\u2a7f', - 'lesdoto;': '\u2a81', - 'lesdotor;': '\u2a83', - 'lesg;': '\u22da\ufe00', - 'lesges;': '\u2a93', - 'lessapprox;': '\u2a85', - 'lessdot;': '\u22d6', - 'lesseqgtr;': '\u22da', - 'lesseqqgtr;': '\u2a8b', - 'LessEqualGreater;': '\u22da', - 'LessFullEqual;': '\u2266', - 'LessGreater;': '\u2276', - 'lessgtr;': '\u2276', - 'LessLess;': '\u2aa1', - 'lesssim;': '\u2272', - 'LessSlantEqual;': '\u2a7d', - 'LessTilde;': '\u2272', - 'lfisht;': '\u297c', - 'lfloor;': '\u230a', - 'Lfr;': '\U0001d50f', - 'lfr;': '\U0001d529', - 'lg;': '\u2276', - 'lgE;': '\u2a91', - 'lHar;': '\u2962', - 'lhard;': '\u21bd', - 'lharu;': '\u21bc', - 'lharul;': '\u296a', - 'lhblk;': '\u2584', - 'LJcy;': '\u0409', - 'ljcy;': '\u0459', - 'Ll;': '\u22d8', - 'll;': '\u226a', - 'llarr;': '\u21c7', - 'llcorner;': '\u231e', - 'Lleftarrow;': '\u21da', - 'llhard;': '\u296b', - 'lltri;': '\u25fa', - 'Lmidot;': '\u013f', - 'lmidot;': '\u0140', - 'lmoust;': '\u23b0', - 'lmoustache;': '\u23b0', - 'lnap;': '\u2a89', - 'lnapprox;': '\u2a89', - 'lnE;': '\u2268', - 'lne;': '\u2a87', - 'lneq;': '\u2a87', - 'lneqq;': '\u2268', - 'lnsim;': '\u22e6', - 'loang;': '\u27ec', - 'loarr;': '\u21fd', - 'lobrk;': '\u27e6', - 'LongLeftArrow;': '\u27f5', - 'Longleftarrow;': '\u27f8', - 'longleftarrow;': '\u27f5', - 'LongLeftRightArrow;': '\u27f7', - 'Longleftrightarrow;': '\u27fa', - 'longleftrightarrow;': '\u27f7', - 'longmapsto;': '\u27fc', - 'LongRightArrow;': '\u27f6', - 'Longrightarrow;': '\u27f9', - 'longrightarrow;': '\u27f6', - 'looparrowleft;': '\u21ab', - 'looparrowright;': '\u21ac', - 'lopar;': '\u2985', - 'Lopf;': '\U0001d543', - 'lopf;': '\U0001d55d', - 'loplus;': '\u2a2d', - 'lotimes;': '\u2a34', - 'lowast;': '\u2217', - 'lowbar;': '_', - 'LowerLeftArrow;': '\u2199', - 'LowerRightArrow;': '\u2198', - 'loz;': '\u25ca', - 'lozenge;': '\u25ca', - 'lozf;': '\u29eb', - 'lpar;': '(', - 'lparlt;': '\u2993', - 'lrarr;': '\u21c6', - 'lrcorner;': '\u231f', - 'lrhar;': '\u21cb', - 'lrhard;': '\u296d', - 'lrm;': '\u200e', - 'lrtri;': '\u22bf', - 'lsaquo;': '\u2039', - 'Lscr;': '\u2112', - 'lscr;': '\U0001d4c1', - 'Lsh;': '\u21b0', - 'lsh;': '\u21b0', - 'lsim;': '\u2272', - 'lsime;': '\u2a8d', - 'lsimg;': '\u2a8f', - 'lsqb;': '[', - 'lsquo;': '\u2018', - 'lsquor;': '\u201a', - 'Lstrok;': '\u0141', - 'lstrok;': '\u0142', - 'LT': '<', - 'lt': '<', - 'LT;': '<', - 'Lt;': '\u226a', - 'lt;': '<', - 'ltcc;': '\u2aa6', - 'ltcir;': '\u2a79', - 'ltdot;': '\u22d6', - 'lthree;': '\u22cb', - 'ltimes;': '\u22c9', - 'ltlarr;': '\u2976', - 'ltquest;': '\u2a7b', - 'ltri;': '\u25c3', - 'ltrie;': '\u22b4', - 'ltrif;': '\u25c2', - 'ltrPar;': '\u2996', - 'lurdshar;': '\u294a', - 'luruhar;': '\u2966', - 'lvertneqq;': '\u2268\ufe00', - 'lvnE;': '\u2268\ufe00', - 'macr': '\xaf', - 'macr;': '\xaf', - 'male;': '\u2642', - 'malt;': '\u2720', - 'maltese;': '\u2720', - 'Map;': '\u2905', - 'map;': '\u21a6', - 'mapsto;': '\u21a6', - 'mapstodown;': '\u21a7', - 'mapstoleft;': '\u21a4', - 'mapstoup;': '\u21a5', - 'marker;': '\u25ae', - 'mcomma;': '\u2a29', - 'Mcy;': '\u041c', - 'mcy;': '\u043c', - 'mdash;': '\u2014', - 'mDDot;': '\u223a', - 'measuredangle;': '\u2221', - 'MediumSpace;': '\u205f', - 'Mellintrf;': '\u2133', - 'Mfr;': '\U0001d510', - 'mfr;': '\U0001d52a', - 'mho;': '\u2127', - 'micro': '\xb5', - 'micro;': '\xb5', - 'mid;': '\u2223', - 'midast;': '*', - 'midcir;': '\u2af0', - 'middot': '\xb7', - 'middot;': '\xb7', - 'minus;': '\u2212', - 'minusb;': '\u229f', - 'minusd;': '\u2238', - 'minusdu;': '\u2a2a', - 'MinusPlus;': '\u2213', - 'mlcp;': '\u2adb', - 'mldr;': '\u2026', - 'mnplus;': '\u2213', - 'models;': '\u22a7', - 'Mopf;': '\U0001d544', - 'mopf;': '\U0001d55e', - 'mp;': '\u2213', - 'Mscr;': '\u2133', - 'mscr;': '\U0001d4c2', - 'mstpos;': '\u223e', - 'Mu;': '\u039c', - 'mu;': '\u03bc', - 'multimap;': '\u22b8', - 'mumap;': '\u22b8', - 'nabla;': '\u2207', - 'Nacute;': '\u0143', - 'nacute;': '\u0144', - 'nang;': '\u2220\u20d2', - 'nap;': '\u2249', - 'napE;': '\u2a70\u0338', - 'napid;': '\u224b\u0338', - 'napos;': '\u0149', - 'napprox;': '\u2249', - 'natur;': '\u266e', - 'natural;': '\u266e', - 'naturals;': '\u2115', - 'nbsp': '\xa0', - 'nbsp;': '\xa0', - 'nbump;': '\u224e\u0338', - 'nbumpe;': '\u224f\u0338', - 'ncap;': '\u2a43', - 'Ncaron;': '\u0147', - 'ncaron;': '\u0148', - 'Ncedil;': '\u0145', - 'ncedil;': '\u0146', - 'ncong;': '\u2247', - 'ncongdot;': '\u2a6d\u0338', - 'ncup;': '\u2a42', - 'Ncy;': '\u041d', - 'ncy;': '\u043d', - 'ndash;': '\u2013', - 'ne;': '\u2260', - 'nearhk;': '\u2924', - 'neArr;': '\u21d7', - 'nearr;': '\u2197', - 'nearrow;': '\u2197', - 'nedot;': '\u2250\u0338', - 'NegativeMediumSpace;': '\u200b', - 'NegativeThickSpace;': '\u200b', - 'NegativeThinSpace;': '\u200b', - 'NegativeVeryThinSpace;': '\u200b', - 'nequiv;': '\u2262', - 'nesear;': '\u2928', - 'nesim;': '\u2242\u0338', - 'NestedGreaterGreater;': '\u226b', - 'NestedLessLess;': '\u226a', - 'NewLine;': '\n', - 'nexist;': '\u2204', - 'nexists;': '\u2204', - 'Nfr;': '\U0001d511', - 'nfr;': '\U0001d52b', - 'ngE;': '\u2267\u0338', - 'nge;': '\u2271', - 'ngeq;': '\u2271', - 'ngeqq;': '\u2267\u0338', - 'ngeqslant;': '\u2a7e\u0338', - 'nges;': '\u2a7e\u0338', - 'nGg;': '\u22d9\u0338', - 'ngsim;': '\u2275', - 'nGt;': '\u226b\u20d2', - 'ngt;': '\u226f', - 'ngtr;': '\u226f', - 'nGtv;': '\u226b\u0338', - 'nhArr;': '\u21ce', - 'nharr;': '\u21ae', - 'nhpar;': '\u2af2', - 'ni;': '\u220b', - 'nis;': '\u22fc', - 'nisd;': '\u22fa', - 'niv;': '\u220b', - 'NJcy;': '\u040a', - 'njcy;': '\u045a', - 'nlArr;': '\u21cd', - 'nlarr;': '\u219a', - 'nldr;': '\u2025', - 'nlE;': '\u2266\u0338', - 'nle;': '\u2270', - 'nLeftarrow;': '\u21cd', - 'nleftarrow;': '\u219a', - 'nLeftrightarrow;': '\u21ce', - 'nleftrightarrow;': '\u21ae', - 'nleq;': '\u2270', - 'nleqq;': '\u2266\u0338', - 'nleqslant;': '\u2a7d\u0338', - 'nles;': '\u2a7d\u0338', - 'nless;': '\u226e', - 'nLl;': '\u22d8\u0338', - 'nlsim;': '\u2274', - 'nLt;': '\u226a\u20d2', - 'nlt;': '\u226e', - 'nltri;': '\u22ea', - 'nltrie;': '\u22ec', - 'nLtv;': '\u226a\u0338', - 'nmid;': '\u2224', - 'NoBreak;': '\u2060', - 'NonBreakingSpace;': '\xa0', - 'Nopf;': '\u2115', - 'nopf;': '\U0001d55f', - 'not': '\xac', - 'Not;': '\u2aec', - 'not;': '\xac', - 'NotCongruent;': '\u2262', - 'NotCupCap;': '\u226d', - 'NotDoubleVerticalBar;': '\u2226', - 'NotElement;': '\u2209', - 'NotEqual;': '\u2260', - 'NotEqualTilde;': '\u2242\u0338', - 'NotExists;': '\u2204', - 'NotGreater;': '\u226f', - 'NotGreaterEqual;': '\u2271', - 'NotGreaterFullEqual;': '\u2267\u0338', - 'NotGreaterGreater;': '\u226b\u0338', - 'NotGreaterLess;': '\u2279', - 'NotGreaterSlantEqual;': '\u2a7e\u0338', - 'NotGreaterTilde;': '\u2275', - 'NotHumpDownHump;': '\u224e\u0338', - 'NotHumpEqual;': '\u224f\u0338', - 'notin;': '\u2209', - 'notindot;': '\u22f5\u0338', - 'notinE;': '\u22f9\u0338', - 'notinva;': '\u2209', - 'notinvb;': '\u22f7', - 'notinvc;': '\u22f6', - 'NotLeftTriangle;': '\u22ea', - 'NotLeftTriangleBar;': '\u29cf\u0338', - 'NotLeftTriangleEqual;': '\u22ec', - 'NotLess;': '\u226e', - 'NotLessEqual;': '\u2270', - 'NotLessGreater;': '\u2278', - 'NotLessLess;': '\u226a\u0338', - 'NotLessSlantEqual;': '\u2a7d\u0338', - 'NotLessTilde;': '\u2274', - 'NotNestedGreaterGreater;': '\u2aa2\u0338', - 'NotNestedLessLess;': '\u2aa1\u0338', - 'notni;': '\u220c', - 'notniva;': '\u220c', - 'notnivb;': '\u22fe', - 'notnivc;': '\u22fd', - 'NotPrecedes;': '\u2280', - 'NotPrecedesEqual;': '\u2aaf\u0338', - 'NotPrecedesSlantEqual;': '\u22e0', - 'NotReverseElement;': '\u220c', - 'NotRightTriangle;': '\u22eb', - 'NotRightTriangleBar;': '\u29d0\u0338', - 'NotRightTriangleEqual;': '\u22ed', - 'NotSquareSubset;': '\u228f\u0338', - 'NotSquareSubsetEqual;': '\u22e2', - 'NotSquareSuperset;': '\u2290\u0338', - 'NotSquareSupersetEqual;': '\u22e3', - 'NotSubset;': '\u2282\u20d2', - 'NotSubsetEqual;': '\u2288', - 'NotSucceeds;': '\u2281', - 'NotSucceedsEqual;': '\u2ab0\u0338', - 'NotSucceedsSlantEqual;': '\u22e1', - 'NotSucceedsTilde;': '\u227f\u0338', - 'NotSuperset;': '\u2283\u20d2', - 'NotSupersetEqual;': '\u2289', - 'NotTilde;': '\u2241', - 'NotTildeEqual;': '\u2244', - 'NotTildeFullEqual;': '\u2247', - 'NotTildeTilde;': '\u2249', - 'NotVerticalBar;': '\u2224', - 'npar;': '\u2226', - 'nparallel;': '\u2226', - 'nparsl;': '\u2afd\u20e5', - 'npart;': '\u2202\u0338', - 'npolint;': '\u2a14', - 'npr;': '\u2280', - 'nprcue;': '\u22e0', - 'npre;': '\u2aaf\u0338', - 'nprec;': '\u2280', - 'npreceq;': '\u2aaf\u0338', - 'nrArr;': '\u21cf', - 'nrarr;': '\u219b', - 'nrarrc;': '\u2933\u0338', - 'nrarrw;': '\u219d\u0338', - 'nRightarrow;': '\u21cf', - 'nrightarrow;': '\u219b', - 'nrtri;': '\u22eb', - 'nrtrie;': '\u22ed', - 'nsc;': '\u2281', - 'nsccue;': '\u22e1', - 'nsce;': '\u2ab0\u0338', - 'Nscr;': '\U0001d4a9', - 'nscr;': '\U0001d4c3', - 'nshortmid;': '\u2224', - 'nshortparallel;': '\u2226', - 'nsim;': '\u2241', - 'nsime;': '\u2244', - 'nsimeq;': '\u2244', - 'nsmid;': '\u2224', - 'nspar;': '\u2226', - 'nsqsube;': '\u22e2', - 'nsqsupe;': '\u22e3', - 'nsub;': '\u2284', - 'nsubE;': '\u2ac5\u0338', - 'nsube;': '\u2288', - 'nsubset;': '\u2282\u20d2', - 'nsubseteq;': '\u2288', - 'nsubseteqq;': '\u2ac5\u0338', - 'nsucc;': '\u2281', - 'nsucceq;': '\u2ab0\u0338', - 'nsup;': '\u2285', - 'nsupE;': '\u2ac6\u0338', - 'nsupe;': '\u2289', - 'nsupset;': '\u2283\u20d2', - 'nsupseteq;': '\u2289', - 'nsupseteqq;': '\u2ac6\u0338', - 'ntgl;': '\u2279', - 'Ntilde': '\xd1', - 'ntilde': '\xf1', - 'Ntilde;': '\xd1', - 'ntilde;': '\xf1', - 'ntlg;': '\u2278', - 'ntriangleleft;': '\u22ea', - 'ntrianglelefteq;': '\u22ec', - 'ntriangleright;': '\u22eb', - 'ntrianglerighteq;': '\u22ed', - 'Nu;': '\u039d', - 'nu;': '\u03bd', - 'num;': '#', - 'numero;': '\u2116', - 'numsp;': '\u2007', - 'nvap;': '\u224d\u20d2', - 'nVDash;': '\u22af', - 'nVdash;': '\u22ae', - 'nvDash;': '\u22ad', - 'nvdash;': '\u22ac', - 'nvge;': '\u2265\u20d2', - 'nvgt;': '>\u20d2', - 'nvHarr;': '\u2904', - 'nvinfin;': '\u29de', - 'nvlArr;': '\u2902', - 'nvle;': '\u2264\u20d2', - 'nvlt;': '<\u20d2', - 'nvltrie;': '\u22b4\u20d2', - 'nvrArr;': '\u2903', - 'nvrtrie;': '\u22b5\u20d2', - 'nvsim;': '\u223c\u20d2', - 'nwarhk;': '\u2923', - 'nwArr;': '\u21d6', - 'nwarr;': '\u2196', - 'nwarrow;': '\u2196', - 'nwnear;': '\u2927', - 'Oacute': '\xd3', - 'oacute': '\xf3', - 'Oacute;': '\xd3', - 'oacute;': '\xf3', - 'oast;': '\u229b', - 'ocir;': '\u229a', - 'Ocirc': '\xd4', - 'ocirc': '\xf4', - 'Ocirc;': '\xd4', - 'ocirc;': '\xf4', - 'Ocy;': '\u041e', - 'ocy;': '\u043e', - 'odash;': '\u229d', - 'Odblac;': '\u0150', - 'odblac;': '\u0151', - 'odiv;': '\u2a38', - 'odot;': '\u2299', - 'odsold;': '\u29bc', - 'OElig;': '\u0152', - 'oelig;': '\u0153', - 'ofcir;': '\u29bf', - 'Ofr;': '\U0001d512', - 'ofr;': '\U0001d52c', - 'ogon;': '\u02db', - 'Ograve': '\xd2', - 'ograve': '\xf2', - 'Ograve;': '\xd2', - 'ograve;': '\xf2', - 'ogt;': '\u29c1', - 'ohbar;': '\u29b5', - 'ohm;': '\u03a9', - 'oint;': '\u222e', - 'olarr;': '\u21ba', - 'olcir;': '\u29be', - 'olcross;': '\u29bb', - 'oline;': '\u203e', - 'olt;': '\u29c0', - 'Omacr;': '\u014c', - 'omacr;': '\u014d', - 'Omega;': '\u03a9', - 'omega;': '\u03c9', - 'Omicron;': '\u039f', - 'omicron;': '\u03bf', - 'omid;': '\u29b6', - 'ominus;': '\u2296', - 'Oopf;': '\U0001d546', - 'oopf;': '\U0001d560', - 'opar;': '\u29b7', - 'OpenCurlyDoubleQuote;': '\u201c', - 'OpenCurlyQuote;': '\u2018', - 'operp;': '\u29b9', - 'oplus;': '\u2295', - 'Or;': '\u2a54', - 'or;': '\u2228', - 'orarr;': '\u21bb', - 'ord;': '\u2a5d', - 'order;': '\u2134', - 'orderof;': '\u2134', - 'ordf': '\xaa', - 'ordf;': '\xaa', - 'ordm': '\xba', - 'ordm;': '\xba', - 'origof;': '\u22b6', - 'oror;': '\u2a56', - 'orslope;': '\u2a57', - 'orv;': '\u2a5b', - 'oS;': '\u24c8', - 'Oscr;': '\U0001d4aa', - 'oscr;': '\u2134', - 'Oslash': '\xd8', - 'oslash': '\xf8', - 'Oslash;': '\xd8', - 'oslash;': '\xf8', - 'osol;': '\u2298', - 'Otilde': '\xd5', - 'otilde': '\xf5', - 'Otilde;': '\xd5', - 'otilde;': '\xf5', - 'Otimes;': '\u2a37', - 'otimes;': '\u2297', - 'otimesas;': '\u2a36', - 'Ouml': '\xd6', - 'ouml': '\xf6', - 'Ouml;': '\xd6', - 'ouml;': '\xf6', - 'ovbar;': '\u233d', - 'OverBar;': '\u203e', - 'OverBrace;': '\u23de', - 'OverBracket;': '\u23b4', - 'OverParenthesis;': '\u23dc', - 'par;': '\u2225', - 'para': '\xb6', - 'para;': '\xb6', - 'parallel;': '\u2225', - 'parsim;': '\u2af3', - 'parsl;': '\u2afd', - 'part;': '\u2202', - 'PartialD;': '\u2202', - 'Pcy;': '\u041f', - 'pcy;': '\u043f', - 'percnt;': '%', - 'period;': '.', - 'permil;': '\u2030', - 'perp;': '\u22a5', - 'pertenk;': '\u2031', - 'Pfr;': '\U0001d513', - 'pfr;': '\U0001d52d', - 'Phi;': '\u03a6', - 'phi;': '\u03c6', - 'phiv;': '\u03d5', - 'phmmat;': '\u2133', - 'phone;': '\u260e', - 'Pi;': '\u03a0', - 'pi;': '\u03c0', - 'pitchfork;': '\u22d4', - 'piv;': '\u03d6', - 'planck;': '\u210f', - 'planckh;': '\u210e', - 'plankv;': '\u210f', - 'plus;': '+', - 'plusacir;': '\u2a23', - 'plusb;': '\u229e', - 'pluscir;': '\u2a22', - 'plusdo;': '\u2214', - 'plusdu;': '\u2a25', - 'pluse;': '\u2a72', - 'PlusMinus;': '\xb1', - 'plusmn': '\xb1', - 'plusmn;': '\xb1', - 'plussim;': '\u2a26', - 'plustwo;': '\u2a27', - 'pm;': '\xb1', - 'Poincareplane;': '\u210c', - 'pointint;': '\u2a15', - 'Popf;': '\u2119', - 'popf;': '\U0001d561', - 'pound': '\xa3', - 'pound;': '\xa3', - 'Pr;': '\u2abb', - 'pr;': '\u227a', - 'prap;': '\u2ab7', - 'prcue;': '\u227c', - 'prE;': '\u2ab3', - 'pre;': '\u2aaf', - 'prec;': '\u227a', - 'precapprox;': '\u2ab7', - 'preccurlyeq;': '\u227c', - 'Precedes;': '\u227a', - 'PrecedesEqual;': '\u2aaf', - 'PrecedesSlantEqual;': '\u227c', - 'PrecedesTilde;': '\u227e', - 'preceq;': '\u2aaf', - 'precnapprox;': '\u2ab9', - 'precneqq;': '\u2ab5', - 'precnsim;': '\u22e8', - 'precsim;': '\u227e', - 'Prime;': '\u2033', - 'prime;': '\u2032', - 'primes;': '\u2119', - 'prnap;': '\u2ab9', - 'prnE;': '\u2ab5', - 'prnsim;': '\u22e8', - 'prod;': '\u220f', - 'Product;': '\u220f', - 'profalar;': '\u232e', - 'profline;': '\u2312', - 'profsurf;': '\u2313', - 'prop;': '\u221d', - 'Proportion;': '\u2237', - 'Proportional;': '\u221d', - 'propto;': '\u221d', - 'prsim;': '\u227e', - 'prurel;': '\u22b0', - 'Pscr;': '\U0001d4ab', - 'pscr;': '\U0001d4c5', - 'Psi;': '\u03a8', - 'psi;': '\u03c8', - 'puncsp;': '\u2008', - 'Qfr;': '\U0001d514', - 'qfr;': '\U0001d52e', - 'qint;': '\u2a0c', - 'Qopf;': '\u211a', - 'qopf;': '\U0001d562', - 'qprime;': '\u2057', - 'Qscr;': '\U0001d4ac', - 'qscr;': '\U0001d4c6', - 'quaternions;': '\u210d', - 'quatint;': '\u2a16', - 'quest;': '?', - 'questeq;': '\u225f', - 'QUOT': '"', - 'quot': '"', - 'QUOT;': '"', - 'quot;': '"', - 'rAarr;': '\u21db', - 'race;': '\u223d\u0331', - 'Racute;': '\u0154', - 'racute;': '\u0155', - 'radic;': '\u221a', - 'raemptyv;': '\u29b3', - 'Rang;': '\u27eb', - 'rang;': '\u27e9', - 'rangd;': '\u2992', - 'range;': '\u29a5', - 'rangle;': '\u27e9', - 'raquo': '\xbb', - 'raquo;': '\xbb', - 'Rarr;': '\u21a0', - 'rArr;': '\u21d2', - 'rarr;': '\u2192', - 'rarrap;': '\u2975', - 'rarrb;': '\u21e5', - 'rarrbfs;': '\u2920', - 'rarrc;': '\u2933', - 'rarrfs;': '\u291e', - 'rarrhk;': '\u21aa', - 'rarrlp;': '\u21ac', - 'rarrpl;': '\u2945', - 'rarrsim;': '\u2974', - 'Rarrtl;': '\u2916', - 'rarrtl;': '\u21a3', - 'rarrw;': '\u219d', - 'rAtail;': '\u291c', - 'ratail;': '\u291a', - 'ratio;': '\u2236', - 'rationals;': '\u211a', - 'RBarr;': '\u2910', - 'rBarr;': '\u290f', - 'rbarr;': '\u290d', - 'rbbrk;': '\u2773', - 'rbrace;': '}', - 'rbrack;': ']', - 'rbrke;': '\u298c', - 'rbrksld;': '\u298e', - 'rbrkslu;': '\u2990', - 'Rcaron;': '\u0158', - 'rcaron;': '\u0159', - 'Rcedil;': '\u0156', - 'rcedil;': '\u0157', - 'rceil;': '\u2309', - 'rcub;': '}', - 'Rcy;': '\u0420', - 'rcy;': '\u0440', - 'rdca;': '\u2937', - 'rdldhar;': '\u2969', - 'rdquo;': '\u201d', - 'rdquor;': '\u201d', - 'rdsh;': '\u21b3', - 'Re;': '\u211c', - 'real;': '\u211c', - 'realine;': '\u211b', - 'realpart;': '\u211c', - 'reals;': '\u211d', - 'rect;': '\u25ad', - 'REG': '\xae', - 'reg': '\xae', - 'REG;': '\xae', - 'reg;': '\xae', - 'ReverseElement;': '\u220b', - 'ReverseEquilibrium;': '\u21cb', - 'ReverseUpEquilibrium;': '\u296f', - 'rfisht;': '\u297d', - 'rfloor;': '\u230b', - 'Rfr;': '\u211c', - 'rfr;': '\U0001d52f', - 'rHar;': '\u2964', - 'rhard;': '\u21c1', - 'rharu;': '\u21c0', - 'rharul;': '\u296c', - 'Rho;': '\u03a1', - 'rho;': '\u03c1', - 'rhov;': '\u03f1', - 'RightAngleBracket;': '\u27e9', - 'RightArrow;': '\u2192', - 'Rightarrow;': '\u21d2', - 'rightarrow;': '\u2192', - 'RightArrowBar;': '\u21e5', - 'RightArrowLeftArrow;': '\u21c4', - 'rightarrowtail;': '\u21a3', - 'RightCeiling;': '\u2309', - 'RightDoubleBracket;': '\u27e7', - 'RightDownTeeVector;': '\u295d', - 'RightDownVector;': '\u21c2', - 'RightDownVectorBar;': '\u2955', - 'RightFloor;': '\u230b', - 'rightharpoondown;': '\u21c1', - 'rightharpoonup;': '\u21c0', - 'rightleftarrows;': '\u21c4', - 'rightleftharpoons;': '\u21cc', - 'rightrightarrows;': '\u21c9', - 'rightsquigarrow;': '\u219d', - 'RightTee;': '\u22a2', - 'RightTeeArrow;': '\u21a6', - 'RightTeeVector;': '\u295b', - 'rightthreetimes;': '\u22cc', - 'RightTriangle;': '\u22b3', - 'RightTriangleBar;': '\u29d0', - 'RightTriangleEqual;': '\u22b5', - 'RightUpDownVector;': '\u294f', - 'RightUpTeeVector;': '\u295c', - 'RightUpVector;': '\u21be', - 'RightUpVectorBar;': '\u2954', - 'RightVector;': '\u21c0', - 'RightVectorBar;': '\u2953', - 'ring;': '\u02da', - 'risingdotseq;': '\u2253', - 'rlarr;': '\u21c4', - 'rlhar;': '\u21cc', - 'rlm;': '\u200f', - 'rmoust;': '\u23b1', - 'rmoustache;': '\u23b1', - 'rnmid;': '\u2aee', - 'roang;': '\u27ed', - 'roarr;': '\u21fe', - 'robrk;': '\u27e7', - 'ropar;': '\u2986', - 'Ropf;': '\u211d', - 'ropf;': '\U0001d563', - 'roplus;': '\u2a2e', - 'rotimes;': '\u2a35', - 'RoundImplies;': '\u2970', - 'rpar;': ')', - 'rpargt;': '\u2994', - 'rppolint;': '\u2a12', - 'rrarr;': '\u21c9', - 'Rrightarrow;': '\u21db', - 'rsaquo;': '\u203a', - 'Rscr;': '\u211b', - 'rscr;': '\U0001d4c7', - 'Rsh;': '\u21b1', - 'rsh;': '\u21b1', - 'rsqb;': ']', - 'rsquo;': '\u2019', - 'rsquor;': '\u2019', - 'rthree;': '\u22cc', - 'rtimes;': '\u22ca', - 'rtri;': '\u25b9', - 'rtrie;': '\u22b5', - 'rtrif;': '\u25b8', - 'rtriltri;': '\u29ce', - 'RuleDelayed;': '\u29f4', - 'ruluhar;': '\u2968', - 'rx;': '\u211e', - 'Sacute;': '\u015a', - 'sacute;': '\u015b', - 'sbquo;': '\u201a', - 'Sc;': '\u2abc', - 'sc;': '\u227b', - 'scap;': '\u2ab8', - 'Scaron;': '\u0160', - 'scaron;': '\u0161', - 'sccue;': '\u227d', - 'scE;': '\u2ab4', - 'sce;': '\u2ab0', - 'Scedil;': '\u015e', - 'scedil;': '\u015f', - 'Scirc;': '\u015c', - 'scirc;': '\u015d', - 'scnap;': '\u2aba', - 'scnE;': '\u2ab6', - 'scnsim;': '\u22e9', - 'scpolint;': '\u2a13', - 'scsim;': '\u227f', - 'Scy;': '\u0421', - 'scy;': '\u0441', - 'sdot;': '\u22c5', - 'sdotb;': '\u22a1', - 'sdote;': '\u2a66', - 'searhk;': '\u2925', - 'seArr;': '\u21d8', - 'searr;': '\u2198', - 'searrow;': '\u2198', - 'sect': '\xa7', - 'sect;': '\xa7', - 'semi;': ';', - 'seswar;': '\u2929', - 'setminus;': '\u2216', - 'setmn;': '\u2216', - 'sext;': '\u2736', - 'Sfr;': '\U0001d516', - 'sfr;': '\U0001d530', - 'sfrown;': '\u2322', - 'sharp;': '\u266f', - 'SHCHcy;': '\u0429', - 'shchcy;': '\u0449', - 'SHcy;': '\u0428', - 'shcy;': '\u0448', - 'ShortDownArrow;': '\u2193', - 'ShortLeftArrow;': '\u2190', - 'shortmid;': '\u2223', - 'shortparallel;': '\u2225', - 'ShortRightArrow;': '\u2192', - 'ShortUpArrow;': '\u2191', - 'shy': '\xad', - 'shy;': '\xad', - 'Sigma;': '\u03a3', - 'sigma;': '\u03c3', - 'sigmaf;': '\u03c2', - 'sigmav;': '\u03c2', - 'sim;': '\u223c', - 'simdot;': '\u2a6a', - 'sime;': '\u2243', - 'simeq;': '\u2243', - 'simg;': '\u2a9e', - 'simgE;': '\u2aa0', - 'siml;': '\u2a9d', - 'simlE;': '\u2a9f', - 'simne;': '\u2246', - 'simplus;': '\u2a24', - 'simrarr;': '\u2972', - 'slarr;': '\u2190', - 'SmallCircle;': '\u2218', - 'smallsetminus;': '\u2216', - 'smashp;': '\u2a33', - 'smeparsl;': '\u29e4', - 'smid;': '\u2223', - 'smile;': '\u2323', - 'smt;': '\u2aaa', - 'smte;': '\u2aac', - 'smtes;': '\u2aac\ufe00', - 'SOFTcy;': '\u042c', - 'softcy;': '\u044c', - 'sol;': '/', - 'solb;': '\u29c4', - 'solbar;': '\u233f', - 'Sopf;': '\U0001d54a', - 'sopf;': '\U0001d564', - 'spades;': '\u2660', - 'spadesuit;': '\u2660', - 'spar;': '\u2225', - 'sqcap;': '\u2293', - 'sqcaps;': '\u2293\ufe00', - 'sqcup;': '\u2294', - 'sqcups;': '\u2294\ufe00', - 'Sqrt;': '\u221a', - 'sqsub;': '\u228f', - 'sqsube;': '\u2291', - 'sqsubset;': '\u228f', - 'sqsubseteq;': '\u2291', - 'sqsup;': '\u2290', - 'sqsupe;': '\u2292', - 'sqsupset;': '\u2290', - 'sqsupseteq;': '\u2292', - 'squ;': '\u25a1', - 'Square;': '\u25a1', - 'square;': '\u25a1', - 'SquareIntersection;': '\u2293', - 'SquareSubset;': '\u228f', - 'SquareSubsetEqual;': '\u2291', - 'SquareSuperset;': '\u2290', - 'SquareSupersetEqual;': '\u2292', - 'SquareUnion;': '\u2294', - 'squarf;': '\u25aa', - 'squf;': '\u25aa', - 'srarr;': '\u2192', - 'Sscr;': '\U0001d4ae', - 'sscr;': '\U0001d4c8', - 'ssetmn;': '\u2216', - 'ssmile;': '\u2323', - 'sstarf;': '\u22c6', - 'Star;': '\u22c6', - 'star;': '\u2606', - 'starf;': '\u2605', - 'straightepsilon;': '\u03f5', - 'straightphi;': '\u03d5', - 'strns;': '\xaf', - 'Sub;': '\u22d0', - 'sub;': '\u2282', - 'subdot;': '\u2abd', - 'subE;': '\u2ac5', - 'sube;': '\u2286', - 'subedot;': '\u2ac3', - 'submult;': '\u2ac1', - 'subnE;': '\u2acb', - 'subne;': '\u228a', - 'subplus;': '\u2abf', - 'subrarr;': '\u2979', - 'Subset;': '\u22d0', - 'subset;': '\u2282', - 'subseteq;': '\u2286', - 'subseteqq;': '\u2ac5', - 'SubsetEqual;': '\u2286', - 'subsetneq;': '\u228a', - 'subsetneqq;': '\u2acb', - 'subsim;': '\u2ac7', - 'subsub;': '\u2ad5', - 'subsup;': '\u2ad3', - 'succ;': '\u227b', - 'succapprox;': '\u2ab8', - 'succcurlyeq;': '\u227d', - 'Succeeds;': '\u227b', - 'SucceedsEqual;': '\u2ab0', - 'SucceedsSlantEqual;': '\u227d', - 'SucceedsTilde;': '\u227f', - 'succeq;': '\u2ab0', - 'succnapprox;': '\u2aba', - 'succneqq;': '\u2ab6', - 'succnsim;': '\u22e9', - 'succsim;': '\u227f', - 'SuchThat;': '\u220b', - 'Sum;': '\u2211', - 'sum;': '\u2211', - 'sung;': '\u266a', - 'sup1': '\xb9', - 'sup1;': '\xb9', - 'sup2': '\xb2', - 'sup2;': '\xb2', - 'sup3': '\xb3', - 'sup3;': '\xb3', - 'Sup;': '\u22d1', - 'sup;': '\u2283', - 'supdot;': '\u2abe', - 'supdsub;': '\u2ad8', - 'supE;': '\u2ac6', - 'supe;': '\u2287', - 'supedot;': '\u2ac4', - 'Superset;': '\u2283', - 'SupersetEqual;': '\u2287', - 'suphsol;': '\u27c9', - 'suphsub;': '\u2ad7', - 'suplarr;': '\u297b', - 'supmult;': '\u2ac2', - 'supnE;': '\u2acc', - 'supne;': '\u228b', - 'supplus;': '\u2ac0', - 'Supset;': '\u22d1', - 'supset;': '\u2283', - 'supseteq;': '\u2287', - 'supseteqq;': '\u2ac6', - 'supsetneq;': '\u228b', - 'supsetneqq;': '\u2acc', - 'supsim;': '\u2ac8', - 'supsub;': '\u2ad4', - 'supsup;': '\u2ad6', - 'swarhk;': '\u2926', - 'swArr;': '\u21d9', - 'swarr;': '\u2199', - 'swarrow;': '\u2199', - 'swnwar;': '\u292a', - 'szlig': '\xdf', - 'szlig;': '\xdf', - 'Tab;': '\t', - 'target;': '\u2316', - 'Tau;': '\u03a4', - 'tau;': '\u03c4', - 'tbrk;': '\u23b4', - 'Tcaron;': '\u0164', - 'tcaron;': '\u0165', - 'Tcedil;': '\u0162', - 'tcedil;': '\u0163', - 'Tcy;': '\u0422', - 'tcy;': '\u0442', - 'tdot;': '\u20db', - 'telrec;': '\u2315', - 'Tfr;': '\U0001d517', - 'tfr;': '\U0001d531', - 'there4;': '\u2234', - 'Therefore;': '\u2234', - 'therefore;': '\u2234', - 'Theta;': '\u0398', - 'theta;': '\u03b8', - 'thetasym;': '\u03d1', - 'thetav;': '\u03d1', - 'thickapprox;': '\u2248', - 'thicksim;': '\u223c', - 'ThickSpace;': '\u205f\u200a', - 'thinsp;': '\u2009', - 'ThinSpace;': '\u2009', - 'thkap;': '\u2248', - 'thksim;': '\u223c', - 'THORN': '\xde', - 'thorn': '\xfe', - 'THORN;': '\xde', - 'thorn;': '\xfe', - 'Tilde;': '\u223c', - 'tilde;': '\u02dc', - 'TildeEqual;': '\u2243', - 'TildeFullEqual;': '\u2245', - 'TildeTilde;': '\u2248', - 'times': '\xd7', - 'times;': '\xd7', - 'timesb;': '\u22a0', - 'timesbar;': '\u2a31', - 'timesd;': '\u2a30', - 'tint;': '\u222d', - 'toea;': '\u2928', - 'top;': '\u22a4', - 'topbot;': '\u2336', - 'topcir;': '\u2af1', - 'Topf;': '\U0001d54b', - 'topf;': '\U0001d565', - 'topfork;': '\u2ada', - 'tosa;': '\u2929', - 'tprime;': '\u2034', - 'TRADE;': '\u2122', - 'trade;': '\u2122', - 'triangle;': '\u25b5', - 'triangledown;': '\u25bf', - 'triangleleft;': '\u25c3', - 'trianglelefteq;': '\u22b4', - 'triangleq;': '\u225c', - 'triangleright;': '\u25b9', - 'trianglerighteq;': '\u22b5', - 'tridot;': '\u25ec', - 'trie;': '\u225c', - 'triminus;': '\u2a3a', - 'TripleDot;': '\u20db', - 'triplus;': '\u2a39', - 'trisb;': '\u29cd', - 'tritime;': '\u2a3b', - 'trpezium;': '\u23e2', - 'Tscr;': '\U0001d4af', - 'tscr;': '\U0001d4c9', - 'TScy;': '\u0426', - 'tscy;': '\u0446', - 'TSHcy;': '\u040b', - 'tshcy;': '\u045b', - 'Tstrok;': '\u0166', - 'tstrok;': '\u0167', - 'twixt;': '\u226c', - 'twoheadleftarrow;': '\u219e', - 'twoheadrightarrow;': '\u21a0', - 'Uacute': '\xda', - 'uacute': '\xfa', - 'Uacute;': '\xda', - 'uacute;': '\xfa', - 'Uarr;': '\u219f', - 'uArr;': '\u21d1', - 'uarr;': '\u2191', - 'Uarrocir;': '\u2949', - 'Ubrcy;': '\u040e', - 'ubrcy;': '\u045e', - 'Ubreve;': '\u016c', - 'ubreve;': '\u016d', - 'Ucirc': '\xdb', - 'ucirc': '\xfb', - 'Ucirc;': '\xdb', - 'ucirc;': '\xfb', - 'Ucy;': '\u0423', - 'ucy;': '\u0443', - 'udarr;': '\u21c5', - 'Udblac;': '\u0170', - 'udblac;': '\u0171', - 'udhar;': '\u296e', - 'ufisht;': '\u297e', - 'Ufr;': '\U0001d518', - 'ufr;': '\U0001d532', - 'Ugrave': '\xd9', - 'ugrave': '\xf9', - 'Ugrave;': '\xd9', - 'ugrave;': '\xf9', - 'uHar;': '\u2963', - 'uharl;': '\u21bf', - 'uharr;': '\u21be', - 'uhblk;': '\u2580', - 'ulcorn;': '\u231c', - 'ulcorner;': '\u231c', - 'ulcrop;': '\u230f', - 'ultri;': '\u25f8', - 'Umacr;': '\u016a', - 'umacr;': '\u016b', - 'uml': '\xa8', - 'uml;': '\xa8', - 'UnderBar;': '_', - 'UnderBrace;': '\u23df', - 'UnderBracket;': '\u23b5', - 'UnderParenthesis;': '\u23dd', - 'Union;': '\u22c3', - 'UnionPlus;': '\u228e', - 'Uogon;': '\u0172', - 'uogon;': '\u0173', - 'Uopf;': '\U0001d54c', - 'uopf;': '\U0001d566', - 'UpArrow;': '\u2191', - 'Uparrow;': '\u21d1', - 'uparrow;': '\u2191', - 'UpArrowBar;': '\u2912', - 'UpArrowDownArrow;': '\u21c5', - 'UpDownArrow;': '\u2195', - 'Updownarrow;': '\u21d5', - 'updownarrow;': '\u2195', - 'UpEquilibrium;': '\u296e', - 'upharpoonleft;': '\u21bf', - 'upharpoonright;': '\u21be', - 'uplus;': '\u228e', - 'UpperLeftArrow;': '\u2196', - 'UpperRightArrow;': '\u2197', - 'Upsi;': '\u03d2', - 'upsi;': '\u03c5', - 'upsih;': '\u03d2', - 'Upsilon;': '\u03a5', - 'upsilon;': '\u03c5', - 'UpTee;': '\u22a5', - 'UpTeeArrow;': '\u21a5', - 'upuparrows;': '\u21c8', - 'urcorn;': '\u231d', - 'urcorner;': '\u231d', - 'urcrop;': '\u230e', - 'Uring;': '\u016e', - 'uring;': '\u016f', - 'urtri;': '\u25f9', - 'Uscr;': '\U0001d4b0', - 'uscr;': '\U0001d4ca', - 'utdot;': '\u22f0', - 'Utilde;': '\u0168', - 'utilde;': '\u0169', - 'utri;': '\u25b5', - 'utrif;': '\u25b4', - 'uuarr;': '\u21c8', - 'Uuml': '\xdc', - 'uuml': '\xfc', - 'Uuml;': '\xdc', - 'uuml;': '\xfc', - 'uwangle;': '\u29a7', - 'vangrt;': '\u299c', - 'varepsilon;': '\u03f5', - 'varkappa;': '\u03f0', - 'varnothing;': '\u2205', - 'varphi;': '\u03d5', - 'varpi;': '\u03d6', - 'varpropto;': '\u221d', - 'vArr;': '\u21d5', - 'varr;': '\u2195', - 'varrho;': '\u03f1', - 'varsigma;': '\u03c2', - 'varsubsetneq;': '\u228a\ufe00', - 'varsubsetneqq;': '\u2acb\ufe00', - 'varsupsetneq;': '\u228b\ufe00', - 'varsupsetneqq;': '\u2acc\ufe00', - 'vartheta;': '\u03d1', - 'vartriangleleft;': '\u22b2', - 'vartriangleright;': '\u22b3', - 'Vbar;': '\u2aeb', - 'vBar;': '\u2ae8', - 'vBarv;': '\u2ae9', - 'Vcy;': '\u0412', - 'vcy;': '\u0432', - 'VDash;': '\u22ab', - 'Vdash;': '\u22a9', - 'vDash;': '\u22a8', - 'vdash;': '\u22a2', - 'Vdashl;': '\u2ae6', - 'Vee;': '\u22c1', - 'vee;': '\u2228', - 'veebar;': '\u22bb', - 'veeeq;': '\u225a', - 'vellip;': '\u22ee', - 'Verbar;': '\u2016', - 'verbar;': '|', - 'Vert;': '\u2016', - 'vert;': '|', - 'VerticalBar;': '\u2223', - 'VerticalLine;': '|', - 'VerticalSeparator;': '\u2758', - 'VerticalTilde;': '\u2240', - 'VeryThinSpace;': '\u200a', - 'Vfr;': '\U0001d519', - 'vfr;': '\U0001d533', - 'vltri;': '\u22b2', - 'vnsub;': '\u2282\u20d2', - 'vnsup;': '\u2283\u20d2', - 'Vopf;': '\U0001d54d', - 'vopf;': '\U0001d567', - 'vprop;': '\u221d', - 'vrtri;': '\u22b3', - 'Vscr;': '\U0001d4b1', - 'vscr;': '\U0001d4cb', - 'vsubnE;': '\u2acb\ufe00', - 'vsubne;': '\u228a\ufe00', - 'vsupnE;': '\u2acc\ufe00', - 'vsupne;': '\u228b\ufe00', - 'Vvdash;': '\u22aa', - 'vzigzag;': '\u299a', - 'Wcirc;': '\u0174', - 'wcirc;': '\u0175', - 'wedbar;': '\u2a5f', - 'Wedge;': '\u22c0', - 'wedge;': '\u2227', - 'wedgeq;': '\u2259', - 'weierp;': '\u2118', - 'Wfr;': '\U0001d51a', - 'wfr;': '\U0001d534', - 'Wopf;': '\U0001d54e', - 'wopf;': '\U0001d568', - 'wp;': '\u2118', - 'wr;': '\u2240', - 'wreath;': '\u2240', - 'Wscr;': '\U0001d4b2', - 'wscr;': '\U0001d4cc', - 'xcap;': '\u22c2', - 'xcirc;': '\u25ef', - 'xcup;': '\u22c3', - 'xdtri;': '\u25bd', - 'Xfr;': '\U0001d51b', - 'xfr;': '\U0001d535', - 'xhArr;': '\u27fa', - 'xharr;': '\u27f7', - 'Xi;': '\u039e', - 'xi;': '\u03be', - 'xlArr;': '\u27f8', - 'xlarr;': '\u27f5', - 'xmap;': '\u27fc', - 'xnis;': '\u22fb', - 'xodot;': '\u2a00', - 'Xopf;': '\U0001d54f', - 'xopf;': '\U0001d569', - 'xoplus;': '\u2a01', - 'xotime;': '\u2a02', - 'xrArr;': '\u27f9', - 'xrarr;': '\u27f6', - 'Xscr;': '\U0001d4b3', - 'xscr;': '\U0001d4cd', - 'xsqcup;': '\u2a06', - 'xuplus;': '\u2a04', - 'xutri;': '\u25b3', - 'xvee;': '\u22c1', - 'xwedge;': '\u22c0', - 'Yacute': '\xdd', - 'yacute': '\xfd', - 'Yacute;': '\xdd', - 'yacute;': '\xfd', - 'YAcy;': '\u042f', - 'yacy;': '\u044f', - 'Ycirc;': '\u0176', - 'ycirc;': '\u0177', - 'Ycy;': '\u042b', - 'ycy;': '\u044b', - 'yen': '\xa5', - 'yen;': '\xa5', - 'Yfr;': '\U0001d51c', - 'yfr;': '\U0001d536', - 'YIcy;': '\u0407', - 'yicy;': '\u0457', - 'Yopf;': '\U0001d550', - 'yopf;': '\U0001d56a', - 'Yscr;': '\U0001d4b4', - 'yscr;': '\U0001d4ce', - 'YUcy;': '\u042e', - 'yucy;': '\u044e', - 'yuml': '\xff', - 'Yuml;': '\u0178', - 'yuml;': '\xff', - 'Zacute;': '\u0179', - 'zacute;': '\u017a', - 'Zcaron;': '\u017d', - 'zcaron;': '\u017e', - 'Zcy;': '\u0417', - 'zcy;': '\u0437', - 'Zdot;': '\u017b', - 'zdot;': '\u017c', - 'zeetrf;': '\u2128', - 'ZeroWidthSpace;': '\u200b', - 'Zeta;': '\u0396', - 'zeta;': '\u03b6', - 'Zfr;': '\u2128', - 'zfr;': '\U0001d537', - 'ZHcy;': '\u0416', - 'zhcy;': '\u0436', - 'zigrarr;': '\u21dd', - 'Zopf;': '\u2124', - 'zopf;': '\U0001d56b', - 'Zscr;': '\U0001d4b5', - 'zscr;': '\U0001d4cf', - 'zwj;': '\u200d', - 'zwnj;': '\u200c', - } - -try: - import http.client as compat_http_client -except ImportError: # Python 2 - import httplib as compat_http_client - -try: - from urllib.error import HTTPError as compat_HTTPError -except ImportError: # Python 2 - from urllib2 import HTTPError as compat_HTTPError +# HTMLParseError has been deprecated in Python 3.3 and removed in +# Python 3.5. Introducing dummy exception for Python >3.5 for compatible +# and uniform cross-version exception handling +class compat_HTMLParseError(Exception): + pass -try: - from urllib.request import urlretrieve as compat_urlretrieve -except ImportError: # Python 2 - from urllib import urlretrieve as compat_urlretrieve -try: - from html.parser import HTMLParser as compat_HTMLParser -except ImportError: # Python 2 - from HTMLParser import HTMLParser as compat_HTMLParser - -try: # Python 2 - from HTMLParser import HTMLParseError as compat_HTMLParseError -except ImportError: # Python <3.4 - try: - from html.parser import HTMLParseError as compat_HTMLParseError - except ImportError: # Python >3.4 - - # HTMLParseError has been deprecated in Python 3.3 and removed in - # Python 3.5. Introducing dummy exception for Python >3.5 for compatible - # and uniform cross-version exception handling - class compat_HTMLParseError(Exception): - pass - -try: - from subprocess import DEVNULL - compat_subprocess_get_DEVNULL = lambda: DEVNULL -except ImportError: - compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w') - -try: - import http.server as compat_http_server -except ImportError: - import BaseHTTPServer as compat_http_server - -try: - compat_str = unicode # Python 2 -except NameError: - compat_str = str - -try: - from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes - from urllib.parse import unquote as compat_urllib_parse_unquote - from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus -except ImportError: # Python 2 - _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire') - else re.compile(r'([\x00-\x7f]+)')) - - # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus - # implementations from cpython 3.4.3's stdlib. Python 2's version - # is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244) - - def compat_urllib_parse_unquote_to_bytes(string): - """unquote_to_bytes('abc%20def') -> b'abc def'.""" - # Note: strings are encoded as UTF-8. This is only an issue if it contains - # unescaped non-ASCII characters, which URIs should not. - if not string: - # Is it a string-like object? - string.split - return b'' - if isinstance(string, compat_str): - string = string.encode('utf-8') - bits = string.split(b'%') - if len(bits) == 1: - return string - res = [bits[0]] - append = res.append - for item in bits[1:]: - try: - append(compat_urllib_parse._hextochr[item[:2]]) - append(item[2:]) - except KeyError: - append(b'%') - append(item) - return b''.join(res) - - def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'): - """Replace %xx escapes by their single-character equivalent. The optional - encoding and errors parameters specify how to decode percent-encoded - sequences into Unicode characters, as accepted by the bytes.decode() - method. - By default, percent-encoded sequences are decoded with UTF-8, and invalid - sequences are replaced by a placeholder character. - - unquote('abc%20def') -> 'abc def'. - """ - if '%' not in string: - string.split - return string - if encoding is None: - encoding = 'utf-8' - if errors is None: - errors = 'replace' - bits = _asciire.split(string) - res = [bits[0]] - append = res.append - for i in range(1, len(bits), 2): - append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors)) - append(bits[i + 1]) - return ''.join(res) - - def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'): - """Like unquote(), but also replace plus signs by spaces, as required for - unquoting HTML form values. - - unquote_plus('%7e/abc+def') -> '~/abc def' - """ - string = string.replace('+', ' ') - return compat_urllib_parse_unquote(string, encoding, errors) - -try: - from urllib.parse import urlencode as compat_urllib_parse_urlencode -except ImportError: # Python 2 - # Python 2 will choke in urlencode on mixture of byte and unicode strings. - # Possible solutions are to either port it from python 3 with all - # the friends or manually ensure input query contains only byte strings. - # We will stick with latter thus recursively encoding the whole query. - def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'): - def encode_elem(e): - if isinstance(e, dict): - e = encode_dict(e) - elif isinstance(e, (list, tuple,)): - list_e = encode_list(e) - e = tuple(list_e) if isinstance(e, tuple) else list_e - elif isinstance(e, compat_str): - e = e.encode(encoding) - return e - - def encode_dict(d): - return dict((encode_elem(k), encode_elem(v)) for k, v in d.items()) - - def encode_list(l): - return [encode_elem(e) for e in l] - - return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq) - -try: - from urllib.request import DataHandler as compat_urllib_request_DataHandler -except ImportError: # Python < 3.4 - # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py - class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler): - def data_open(self, req): - # data URLs as specified in RFC 2397. - # - # ignores POSTed data - # - # syntax: - # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data - # mediatype := [ type "/" subtype ] *( ";" parameter ) - # data := *urlchar - # parameter := attribute "=" value - url = req.get_full_url() - - scheme, data = url.split(':', 1) - mediatype, data = data.split(',', 1) - - # even base64 encoded data URLs might be quoted so unquote in any case: - data = compat_urllib_parse_unquote_to_bytes(data) - if mediatype.endswith(';base64'): - data = binascii.a2b_base64(data) - mediatype = mediatype[:-7] - - if not mediatype: - mediatype = 'text/plain;charset=US-ASCII' - - headers = email.message_from_string( - 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data))) - - return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url) - -try: - compat_basestring = basestring # Python 2 -except NameError: - compat_basestring = str - -try: - compat_chr = unichr # Python 2 -except NameError: - compat_chr = chr - -try: - from xml.etree.ElementTree import ParseError as compat_xml_parse_error -except ImportError: # Python 2.6 - from xml.parsers.expat import ExpatError as compat_xml_parse_error - - -etree = xml.etree.ElementTree +# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE +# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines +def compat_ctypes_WINFUNCTYPE(*args, **kwargs): + return ctypes.WINFUNCTYPE(*args, **kwargs) class _TreeBuilder(etree.TreeBuilder): @@ -2528,126 +44,8 @@ class _TreeBuilder(etree.TreeBuilder): pass -try: - # xml.etree.ElementTree.Element is a method in Python <=2.6 and - # the following will crash with: - # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types - isinstance(None, xml.etree.ElementTree.Element) - from xml.etree.ElementTree import Element as compat_etree_Element -except TypeError: # Python <=2.6 - from xml.etree.ElementTree import _ElementInterface as compat_etree_Element - -if sys.version_info[0] >= 3: - def compat_etree_fromstring(text): - return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) -else: - # python 2.x tries to encode unicode strings with ascii (see the - # XMLParser._fixtext method) - try: - _etree_iter = etree.Element.iter - except AttributeError: # Python <=2.6 - def _etree_iter(root): - for el in root.findall('*'): - yield el - for sub in _etree_iter(el): - yield sub - - # on 2.6 XML doesn't have a parser argument, function copied from CPython - # 2.7 source - def _XML(text, parser=None): - if not parser: - parser = etree.XMLParser(target=_TreeBuilder()) - parser.feed(text) - return parser.close() - - def _element_factory(*args, **kwargs): - el = etree.Element(*args, **kwargs) - for k, v in el.items(): - if isinstance(v, bytes): - el.set(k, v.decode('utf-8')) - return el - - def compat_etree_fromstring(text): - doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory))) - for el in _etree_iter(doc): - if el.text is not None and isinstance(el.text, bytes): - el.text = el.text.decode('utf-8') - return doc - -if hasattr(etree, 'register_namespace'): - compat_etree_register_namespace = etree.register_namespace -else: - def compat_etree_register_namespace(prefix, uri): - """Register a namespace prefix. - The registry is global, and any existing mapping for either the - given prefix or the namespace URI will be removed. - *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and - attributes in this namespace will be serialized with prefix if possible. - ValueError is raised if prefix is reserved or is invalid. - """ - if re.match(r"ns\d+$", prefix): - raise ValueError("Prefix format reserved for internal use") - for k, v in list(etree._namespace_map.items()): - if k == uri or v == prefix: - del etree._namespace_map[k] - etree._namespace_map[uri] = prefix - -if sys.version_info < (2, 7): - # Here comes the crazy part: In 2.6, if the xpath is a unicode, - # .//node does not match if a node is a direct child of . ! - def compat_xpath(xpath): - if isinstance(xpath, compat_str): - xpath = xpath.encode('ascii') - return xpath -else: - compat_xpath = lambda xpath: xpath - -try: - from urllib.parse import parse_qs as compat_parse_qs -except ImportError: # Python 2 - # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib. - # Python 2's version is apparently totally broken - - def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace'): - qs, _coerce_result = qs, compat_str - pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] - r = [] - for name_value in pairs: - if not name_value and not strict_parsing: - continue - nv = name_value.split('=', 1) - if len(nv) != 2: - if strict_parsing: - raise ValueError('bad query field: %r' % (name_value,)) - # Handle case of a control-name with no equal sign - if keep_blank_values: - nv.append('') - else: - continue - if len(nv[1]) or keep_blank_values: - name = nv[0].replace('+', ' ') - name = compat_urllib_parse_unquote( - name, encoding=encoding, errors=errors) - name = _coerce_result(name) - value = nv[1].replace('+', ' ') - value = compat_urllib_parse_unquote( - value, encoding=encoding, errors=errors) - value = _coerce_result(value) - r.append((name, value)) - return r - - def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace'): - parsed_result = {} - pairs = _parse_qsl(qs, keep_blank_values, strict_parsing, - encoding=encoding, errors=errors) - for name, value in pairs: - if name in parsed_result: - parsed_result[name].append(value) - else: - parsed_result[name] = [value] - return parsed_result +def compat_etree_fromstring(text): + return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) compat_os_name = os._name if os.name == 'java' else os.name @@ -2657,29 +55,7 @@ if compat_os_name == 'nt': def compat_shlex_quote(s): return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') else: - try: - from shlex import quote as compat_shlex_quote - except ImportError: # Python < 3.3 - def compat_shlex_quote(s): - if re.match(r'^[-_\w./]+$', s): - return s - else: - return "'" + s.replace("'", "'\"'\"'") + "'" - - -try: - args = shlex.split('中文') - assert (isinstance(args, list) - and isinstance(args[0], compat_str) - and args[0] == '中文') - compat_shlex_split = shlex.split -except (AssertionError, UnicodeEncodeError): - # Working around shlex issue with unicode strings on some python 2 - # versions (see http://bugs.python.org/issue1548891) - def compat_shlex_split(s, comments=False, posix=True): - if isinstance(s, compat_str): - s = s.encode('utf-8') - return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix))) + from shlex import quote as compat_shlex_quote def compat_ord(c): @@ -2689,89 +65,8 @@ def compat_ord(c): return ord(c) -if sys.version_info >= (3, 0): - compat_getenv = os.getenv - compat_expanduser = os.path.expanduser - - def compat_setenv(key, value, env=os.environ): - env[key] = value -else: - # Environment variables should be decoded with filesystem encoding. - # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918) - - def compat_getenv(key, default=None): - from .utils import get_filesystem_encoding - env = os.getenv(key, default) - if env: - env = env.decode(get_filesystem_encoding()) - return env - - def compat_setenv(key, value, env=os.environ): - def encode(v): - from .utils import get_filesystem_encoding - return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v - env[encode(key)] = encode(value) - - # HACK: The default implementations of os.path.expanduser from cpython do not decode - # environment variables with filesystem encoding. We will work around this by - # providing adjusted implementations. - # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib - # for different platforms with correct environment variables decoding. - - if compat_os_name == 'posix': - def compat_expanduser(path): - """Expand ~ and ~user constructions. If user or $HOME is unknown, - do nothing.""" - if not path.startswith('~'): - return path - i = path.find('/', 1) - if i < 0: - i = len(path) - if i == 1: - if 'HOME' not in os.environ: - import pwd - userhome = pwd.getpwuid(os.getuid()).pw_dir - else: - userhome = compat_getenv('HOME') - else: - import pwd - try: - pwent = pwd.getpwnam(path[1:i]) - except KeyError: - return path - userhome = pwent.pw_dir - userhome = userhome.rstrip('/') - return (userhome + path[i:]) or '/' - elif compat_os_name in ('nt', 'ce'): - def compat_expanduser(path): - """Expand ~ and ~user constructs. - - If user or $HOME is unknown, do nothing.""" - if path[:1] != '~': - return path - i, n = 1, len(path) - while i < n and path[i] not in '/\\': - i = i + 1 - - if 'HOME' in os.environ: - userhome = compat_getenv('HOME') - elif 'USERPROFILE' in os.environ: - userhome = compat_getenv('USERPROFILE') - elif 'HOMEPATH' not in os.environ: - return path - else: - try: - drive = compat_getenv('HOMEDRIVE') - except KeyError: - drive = '' - userhome = os.path.join(drive, compat_getenv('HOMEPATH')) - - if i != 1: # ~user - userhome = os.path.join(os.path.dirname(userhome), path[1:i]) - - return userhome + path[i:] - else: - compat_expanduser = os.path.expanduser +def compat_setenv(key, value, env=os.environ): + env[key] = value if compat_os_name == 'nt' and sys.version_info < (3, 8): @@ -2785,78 +80,9 @@ else: compat_realpath = os.path.realpath -if sys.version_info < (3, 0): - def compat_print(s): - from .utils import preferredencoding - print(s.encode(preferredencoding(), 'xmlcharrefreplace')) -else: - def compat_print(s): - assert isinstance(s, compat_str) - print(s) - - -if sys.version_info < (3, 0) and sys.platform == 'win32': - def compat_getpass(prompt, *args, **kwargs): - if isinstance(prompt, compat_str): - from .utils import preferredencoding - prompt = prompt.encode(preferredencoding()) - return getpass.getpass(prompt, *args, **kwargs) -else: - compat_getpass = getpass.getpass - -try: - compat_input = raw_input -except NameError: # Python 3 - compat_input = input - -# Python < 2.6.5 require kwargs to be bytes -try: - def _testfunc(x): - pass - _testfunc(**{'x': 0}) -except TypeError: - def compat_kwargs(kwargs): - return dict((bytes(k), v) for k, v in kwargs.items()) -else: - compat_kwargs = lambda kwargs: kwargs - - -try: - compat_numeric_types = (int, float, long, complex) -except NameError: # Python 3 - compat_numeric_types = (int, float, complex) - - -try: - compat_integer_types = (int, long) -except NameError: # Python 3 - compat_integer_types = (int, ) - - -if sys.version_info < (2, 7): - def compat_socket_create_connection(address, timeout, source_address=None): - host, port = address - err = None - for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): - af, socktype, proto, canonname, sa = res - sock = None - try: - sock = socket.socket(af, socktype, proto) - sock.settimeout(timeout) - if source_address: - sock.bind(source_address) - sock.connect(sa) - return sock - except socket.error as _: - err = _ - if sock is not None: - sock.close() - if err is not None: - raise err - else: - raise socket.error('getaddrinfo returns an empty list') -else: - compat_socket_create_connection = socket.create_connection +def compat_print(s): + assert isinstance(s, compat_str) + print(s) # Fix https://github.com/ytdl-org/youtube-dl/issues/4223 @@ -2880,129 +106,130 @@ def workaround_optparse_bug9161(): optparse.OptionGroup.add_option = _compat_add_option -if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3 - compat_get_terminal_size = shutil.get_terminal_size -else: - _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines']) - - def compat_get_terminal_size(fallback=(80, 24)): - columns = compat_getenv('COLUMNS') - if columns: - columns = int(columns) - else: - columns = None - lines = compat_getenv('LINES') - if lines: - lines = int(lines) - else: - lines = None - - if columns is None or lines is None or columns <= 0 or lines <= 0: - try: - sp = subprocess.Popen( - ['stty', 'size'], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = sp.communicate() - _lines, _columns = map(int, out.split()) - except Exception: - _columns, _lines = _terminal_size(*fallback) - - if columns is None or columns <= 0: - columns = _columns - if lines is None or lines <= 0: - lines = _lines - return _terminal_size(columns, lines) - try: - itertools.count(start=0, step=1) - compat_itertools_count = itertools.count -except TypeError: # Python 2.6 - def compat_itertools_count(start=0, step=1): - n = start - while True: - yield n - n += step - -if sys.version_info >= (3, 0): - from tokenize import tokenize as compat_tokenize_tokenize -else: - from tokenize import generate_tokens as compat_tokenize_tokenize + compat_Pattern = re.Pattern +except AttributeError: + compat_Pattern = type(re.compile('')) try: - struct.pack('!I', 0) -except TypeError: - # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument - # See https://bugs.python.org/issue19099 - def compat_struct_pack(spec, *args): - if isinstance(spec, compat_str): - spec = spec.encode('ascii') - return struct.pack(spec, *args) + compat_Match = re.Match +except AttributeError: + compat_Match = type(re.compile('').match('')) - def compat_struct_unpack(spec, *args): - if isinstance(spec, compat_str): - spec = spec.encode('ascii') - return struct.unpack(spec, *args) - class compat_Struct(struct.Struct): - def __init__(self, fmt): - if isinstance(fmt, compat_str): - fmt = fmt.encode('ascii') - super(compat_Struct, self).__init__(fmt) +try: + compat_asyncio_run = asyncio.run # >= 3.7 +except AttributeError: + def compat_asyncio_run(coro): + try: + loop = asyncio.get_event_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + loop.run_until_complete(coro) + + asyncio.run = compat_asyncio_run + + +# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl +# See https://github.com/hypervideo/hypervideo/issues/792 +# https://docs.python.org/3/library/os.path.html#os.path.expanduser +if compat_os_name in ('nt', 'ce') and 'HOME' in os.environ: + _userhome = os.environ['HOME'] + + def compat_expanduser(path): + if not path.startswith('~'): + return path + i = path.replace('\\', '/', 1).find('/') # ~user + if i < 0: + i = len(path) + userhome = os.path.join(os.path.dirname(_userhome), path[1:i]) if i > 1 else _userhome + return userhome + path[i:] else: - compat_struct_pack = struct.pack - compat_struct_unpack = struct.unpack - if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8): - class compat_Struct(struct.Struct): - def unpack(self, string): - if not isinstance(string, buffer): # noqa: F821 - string = buffer(string) # noqa: F821 - return super(compat_Struct, self).unpack(string) - else: - compat_Struct = struct.Struct + compat_expanduser = os.path.expanduser try: - from future_builtins import zip as compat_zip -except ImportError: # not 2.6+ or is 3.x + from Cryptodome.Cipher import AES as compat_pycrypto_AES +except ImportError: try: - from itertools import izip as compat_zip # < 2.5 or 3.x + from Crypto.Cipher import AES as compat_pycrypto_AES except ImportError: - compat_zip = zip - - -if sys.version_info < (3, 3): - def compat_b64decode(s, *args, **kwargs): - if isinstance(s, compat_str): - s = s.encode('ascii') - return base64.b64decode(s, *args, **kwargs) -else: - compat_b64decode = base64.b64decode - - -if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0): - # PyPy2 prior to version 5.4.0 expects byte strings as Windows function - # names, see the original PyPy issue [1] and the hypervideo one [2]. - # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name - # 2. https://github.com/ytdl-org/youtube-dl/pull/4392 - def compat_ctypes_WINFUNCTYPE(*args, **kwargs): - real = ctypes.WINFUNCTYPE(*args, **kwargs) - - def resf(tpl, *args, **kwargs): - funcname, dll = tpl - return real((str(funcname), dll), *args, **kwargs) - - return resf -else: - def compat_ctypes_WINFUNCTYPE(*args, **kwargs): - return ctypes.WINFUNCTYPE(*args, **kwargs) - + compat_pycrypto_AES = None + + +def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075 + if compat_os_name != 'nt': + return + os.system('') + + +# Deprecated + +compat_basestring = str +compat_chr = chr +compat_input = input +compat_integer_types = (int, ) +compat_kwargs = lambda kwargs: kwargs +compat_numeric_types = (int, float, complex) +compat_str = str +compat_xpath = lambda xpath: xpath +compat_zip = zip + +compat_HTMLParser = html.parser.HTMLParser +compat_HTTPError = urllib.error.HTTPError +compat_Struct = struct.Struct +compat_b64decode = base64.b64decode +compat_cookiejar = http.cookiejar +compat_cookiejar_Cookie = compat_cookiejar.Cookie +compat_cookies = http.cookies +compat_cookies_SimpleCookie = compat_cookies.SimpleCookie +compat_etree_Element = etree.Element +compat_etree_register_namespace = etree.register_namespace +compat_get_terminal_size = shutil.get_terminal_size +compat_getenv = os.getenv +compat_getpass = getpass.getpass +compat_html_entities = html.entities +compat_html_entities_html5 = compat_html_entities.html5 +compat_http_client = http.client +compat_http_server = http.server +compat_itertools_count = itertools.count +compat_parse_qs = urllib.parse.parse_qs +compat_shlex_split = shlex.split +compat_socket_create_connection = socket.create_connection +compat_struct_pack = struct.pack +compat_struct_unpack = struct.unpack +compat_subprocess_get_DEVNULL = lambda: DEVNULL +compat_tokenize_tokenize = tokenize.tokenize +compat_urllib_error = urllib.error +compat_urllib_parse = urllib.parse +compat_urllib_parse_quote = urllib.parse.quote +compat_urllib_parse_quote_plus = urllib.parse.quote_plus +compat_urllib_parse_unquote = urllib.parse.unquote +compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus +compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes +compat_urllib_parse_urlencode = urllib.parse.urlencode +compat_urllib_parse_urlparse = urllib.parse.urlparse +compat_urllib_parse_urlunparse = urllib.parse.urlunparse +compat_urllib_request = urllib.request +compat_urllib_request_DataHandler = urllib.request.DataHandler +compat_urllib_response = urllib.response +compat_urlparse = urllib.parse +compat_urlretrieve = urllib.request.urlretrieve +compat_xml_parse_error = etree.ParseError + + +# Set public objects __all__ = [ 'compat_HTMLParseError', 'compat_HTMLParser', 'compat_HTTPError', + 'compat_Match', + 'compat_Pattern', 'compat_Struct', + 'compat_asyncio_run', 'compat_b64decode', 'compat_basestring', 'compat_chr', @@ -3031,6 +258,7 @@ __all__ = [ 'compat_os_name', 'compat_parse_qs', 'compat_print', + 'compat_pycrypto_AES', 'compat_realpath', 'compat_setenv', 'compat_shlex_quote', @@ -3043,11 +271,14 @@ __all__ = [ 'compat_tokenize_tokenize', 'compat_urllib_error', 'compat_urllib_parse', + 'compat_urllib_parse_quote', + 'compat_urllib_parse_quote_plus', 'compat_urllib_parse_unquote', 'compat_urllib_parse_unquote_plus', 'compat_urllib_parse_unquote_to_bytes', 'compat_urllib_parse_urlencode', 'compat_urllib_parse_urlparse', + 'compat_urllib_parse_urlunparse', 'compat_urllib_request', 'compat_urllib_request_DataHandler', 'compat_urllib_response', @@ -3056,5 +287,6 @@ __all__ = [ 'compat_xml_parse_error', 'compat_xpath', 'compat_zip', + 'windows_enable_vt_mode', 'workaround_optparse_bug9161', ] diff --git a/hypervideo_dl/cookies.py b/hypervideo_dl/cookies.py new file mode 100644 index 0000000..38fbdfa --- /dev/null +++ b/hypervideo_dl/cookies.py @@ -0,0 +1,745 @@ +import ctypes +import json +import os +import shutil +import struct +import subprocess +import sys +import tempfile +from datetime import datetime, timedelta, timezone +from hashlib import pbkdf2_hmac + +from .aes import aes_cbc_decrypt_bytes, aes_gcm_decrypt_and_verify_bytes +from .compat import ( + compat_b64decode, + compat_cookiejar_Cookie, +) +from .utils import ( + bug_reports_message, + expand_path, + process_communicate_or_kill, + YoutubeDLCookieJar, +) + +try: + import sqlite3 + SQLITE_AVAILABLE = True +except ImportError: + # although sqlite3 is part of the standard library, it is possible to compile python without + # sqlite support. See: https://github.com/hypervideo/hypervideo/issues/544 + SQLITE_AVAILABLE = False + + +try: + import keyring + KEYRING_AVAILABLE = True + KEYRING_UNAVAILABLE_REASON = f'due to unknown reasons{bug_reports_message()}' +except ImportError: + KEYRING_AVAILABLE = False + KEYRING_UNAVAILABLE_REASON = ( + 'as the `keyring` module is not installed. ' + 'Please install by running `python3 -m pip install keyring`. ' + 'Depending on your platform, additional packages may be required ' + 'to access the keyring; see https://pypi.org/project/keyring') +except Exception as _err: + KEYRING_AVAILABLE = False + KEYRING_UNAVAILABLE_REASON = 'as the `keyring` module could not be initialized: %s' % _err + + +CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'} +SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'} + + +class YDLLogger: + def __init__(self, ydl=None): + self._ydl = ydl + + def debug(self, message): + if self._ydl: + self._ydl.write_debug(message) + + def info(self, message): + if self._ydl: + self._ydl.to_screen(f'[Cookies] {message}') + + def warning(self, message, only_once=False): + if self._ydl: + self._ydl.report_warning(message, only_once) + + def error(self, message): + if self._ydl: + self._ydl.report_error(message) + + +def load_cookies(cookie_file, browser_specification, ydl): + cookie_jars = [] + if browser_specification is not None: + browser_name, profile = _parse_browser_specification(*browser_specification) + cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl))) + + if cookie_file is not None: + cookie_file = expand_path(cookie_file) + jar = YoutubeDLCookieJar(cookie_file) + if os.access(cookie_file, os.R_OK): + jar.load(ignore_discard=True, ignore_expires=True) + cookie_jars.append(jar) + + return _merge_cookie_jars(cookie_jars) + + +def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger()): + if browser_name == 'firefox': + return _extract_firefox_cookies(profile, logger) + elif browser_name == 'safari': + return _extract_safari_cookies(profile, logger) + elif browser_name in CHROMIUM_BASED_BROWSERS: + return _extract_chrome_cookies(browser_name, profile, logger) + else: + raise ValueError('unknown browser: {}'.format(browser_name)) + + +def _extract_firefox_cookies(profile, logger): + logger.info('Extracting cookies from firefox') + if not SQLITE_AVAILABLE: + logger.warning('Cannot extract cookies from firefox without sqlite3 support. ' + 'Please use a python interpreter compiled with sqlite3 support') + return YoutubeDLCookieJar() + + if profile is None: + search_root = _firefox_browser_dir() + elif _is_path(profile): + search_root = profile + else: + search_root = os.path.join(_firefox_browser_dir(), profile) + + cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite') + if cookie_database_path is None: + raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root)) + logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) + + with tempfile.TemporaryDirectory(prefix='youtube_dl') as tmpdir: + cursor = None + try: + cursor = _open_database_copy(cookie_database_path, tmpdir) + cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies') + jar = YoutubeDLCookieJar() + for host, name, value, path, expiry, is_secure in cursor.fetchall(): + cookie = compat_cookiejar_Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False, + comment=None, comment_url=None, rest={}) + jar.set_cookie(cookie) + logger.info('Extracted {} cookies from firefox'.format(len(jar))) + return jar + finally: + if cursor is not None: + cursor.connection.close() + + +def _firefox_browser_dir(): + if sys.platform in ('linux', 'linux2'): + return os.path.expanduser('~/.mozilla/firefox') + elif sys.platform == 'win32': + return os.path.expandvars(r'%APPDATA%\Mozilla\Firefox\Profiles') + elif sys.platform == 'darwin': + return os.path.expanduser('~/Library/Application Support/Firefox') + else: + raise ValueError('unsupported platform: {}'.format(sys.platform)) + + +def _get_chromium_based_browser_settings(browser_name): + # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md + if sys.platform in ('linux', 'linux2'): + config = _config_home() + browser_dir = { + 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'), + 'chrome': os.path.join(config, 'google-chrome'), + 'chromium': os.path.join(config, 'chromium'), + 'edge': os.path.join(config, 'microsoft-edge'), + 'opera': os.path.join(config, 'opera'), + 'vivaldi': os.path.join(config, 'vivaldi'), + }[browser_name] + + elif sys.platform == 'win32': + appdata_local = os.path.expandvars('%LOCALAPPDATA%') + appdata_roaming = os.path.expandvars('%APPDATA%') + browser_dir = { + 'brave': os.path.join(appdata_local, r'BraveSoftware\Brave-Browser\User Data'), + 'chrome': os.path.join(appdata_local, r'Google\Chrome\User Data'), + 'chromium': os.path.join(appdata_local, r'Chromium\User Data'), + 'edge': os.path.join(appdata_local, r'Microsoft\Edge\User Data'), + 'opera': os.path.join(appdata_roaming, r'Opera Software\Opera Stable'), + 'vivaldi': os.path.join(appdata_local, r'Vivaldi\User Data'), + }[browser_name] + + elif sys.platform == 'darwin': + appdata = os.path.expanduser('~/Library/Application Support') + browser_dir = { + 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'), + 'chrome': os.path.join(appdata, 'Google/Chrome'), + 'chromium': os.path.join(appdata, 'Chromium'), + 'edge': os.path.join(appdata, 'Microsoft Edge'), + 'opera': os.path.join(appdata, 'com.operasoftware.Opera'), + 'vivaldi': os.path.join(appdata, 'Vivaldi'), + }[browser_name] + + else: + raise ValueError('unsupported platform: {}'.format(sys.platform)) + + # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE: + # dbus-monitor "interface='org.kde.KWallet'" "type=method_return" + keyring_name = { + 'brave': 'Brave', + 'chrome': 'Chrome', + 'chromium': 'Chromium', + 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium', + 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium', + 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome', + }[browser_name] + + browsers_without_profiles = {'opera'} + + return { + 'browser_dir': browser_dir, + 'keyring_name': keyring_name, + 'supports_profiles': browser_name not in browsers_without_profiles + } + + +def _extract_chrome_cookies(browser_name, profile, logger): + logger.info('Extracting cookies from {}'.format(browser_name)) + + if not SQLITE_AVAILABLE: + logger.warning(('Cannot extract cookies from {} without sqlite3 support. ' + 'Please use a python interpreter compiled with sqlite3 support').format(browser_name)) + return YoutubeDLCookieJar() + + config = _get_chromium_based_browser_settings(browser_name) + + if profile is None: + search_root = config['browser_dir'] + elif _is_path(profile): + search_root = profile + config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile + else: + if config['supports_profiles']: + search_root = os.path.join(config['browser_dir'], profile) + else: + logger.error('{} does not support profiles'.format(browser_name)) + search_root = config['browser_dir'] + + cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies') + if cookie_database_path is None: + raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root)) + logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) + + decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger) + + with tempfile.TemporaryDirectory(prefix='youtube_dl') as tmpdir: + cursor = None + try: + cursor = _open_database_copy(cookie_database_path, tmpdir) + cursor.connection.text_factory = bytes + column_names = _get_column_names(cursor, 'cookies') + secure_column = 'is_secure' if 'is_secure' in column_names else 'secure' + cursor.execute('SELECT host_key, name, value, encrypted_value, path, ' + 'expires_utc, {} FROM cookies'.format(secure_column)) + jar = YoutubeDLCookieJar() + failed_cookies = 0 + for host_key, name, value, encrypted_value, path, expires_utc, is_secure in cursor.fetchall(): + host_key = host_key.decode('utf-8') + name = name.decode('utf-8') + value = value.decode('utf-8') + path = path.decode('utf-8') + + if not value and encrypted_value: + value = decryptor.decrypt(encrypted_value) + if value is None: + failed_cookies += 1 + continue + + cookie = compat_cookiejar_Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False, + comment=None, comment_url=None, rest={}) + jar.set_cookie(cookie) + if failed_cookies > 0: + failed_message = ' ({} could not be decrypted)'.format(failed_cookies) + else: + failed_message = '' + logger.info('Extracted {} cookies from {}{}'.format(len(jar), browser_name, failed_message)) + return jar + finally: + if cursor is not None: + cursor.connection.close() + + +class ChromeCookieDecryptor: + """ + Overview: + + Linux: + - cookies are either v10 or v11 + - v10: AES-CBC encrypted with a fixed key + - v11: AES-CBC encrypted with an OS protected key (keyring) + - v11 keys can be stored in various places depending on the activate desktop environment [2] + + Mac: + - cookies are either v10 or not v10 + - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux + - not v10: 'old data' stored as plaintext + + Windows: + - cookies are either v10 or not v10 + - v10: AES-GCM encrypted with a key which is encrypted with DPAPI + - not v10: encrypted with DPAPI + + Sources: + - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/ + - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc + - KeyStorageLinux::CreateService + """ + + def decrypt(self, encrypted_value): + raise NotImplementedError + + +def get_cookie_decryptor(browser_root, browser_keyring_name, logger): + if sys.platform in ('linux', 'linux2'): + return LinuxChromeCookieDecryptor(browser_keyring_name, logger) + elif sys.platform == 'darwin': + return MacChromeCookieDecryptor(browser_keyring_name, logger) + elif sys.platform == 'win32': + return WindowsChromeCookieDecryptor(browser_root, logger) + else: + raise NotImplementedError('Chrome cookie decryption is not supported ' + 'on this platform: {}'.format(sys.platform)) + + +class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): + def __init__(self, browser_keyring_name, logger): + self._logger = logger + self._v10_key = self.derive_key(b'peanuts') + if KEYRING_AVAILABLE: + self._v11_key = self.derive_key(_get_linux_keyring_password(browser_keyring_name)) + else: + self._v11_key = None + + @staticmethod + def derive_key(password): + # values from + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc + return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16) + + def decrypt(self, encrypted_value): + version = encrypted_value[:3] + ciphertext = encrypted_value[3:] + + if version == b'v10': + return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger) + + elif version == b'v11': + if self._v11_key is None: + self._logger.warning(f'cannot decrypt cookie {KEYRING_UNAVAILABLE_REASON}', only_once=True) + return None + return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger) + + else: + return None + + +class MacChromeCookieDecryptor(ChromeCookieDecryptor): + def __init__(self, browser_keyring_name, logger): + self._logger = logger + password = _get_mac_keyring_password(browser_keyring_name, logger) + self._v10_key = None if password is None else self.derive_key(password) + + @staticmethod + def derive_key(password): + # values from + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm + return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16) + + def decrypt(self, encrypted_value): + version = encrypted_value[:3] + ciphertext = encrypted_value[3:] + + if version == b'v10': + if self._v10_key is None: + self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) + return None + + return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger) + + else: + # other prefixes are considered 'old data' which were stored as plaintext + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm + return encrypted_value + + +class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): + def __init__(self, browser_root, logger): + self._logger = logger + self._v10_key = _get_windows_v10_key(browser_root, logger) + + def decrypt(self, encrypted_value): + version = encrypted_value[:3] + ciphertext = encrypted_value[3:] + + if version == b'v10': + if self._v10_key is None: + self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) + return None + + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc + # kNonceLength + nonce_length = 96 // 8 + # boringssl + # EVP_AEAD_AES_GCM_TAG_LEN + authentication_tag_length = 16 + + raw_ciphertext = ciphertext + nonce = raw_ciphertext[:nonce_length] + ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length] + authentication_tag = raw_ciphertext[-authentication_tag_length:] + + return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger) + + else: + # any other prefix means the data is DPAPI encrypted + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc + return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8') + + +def _extract_safari_cookies(profile, logger): + if profile is not None: + logger.error('safari does not support profiles') + if sys.platform != 'darwin': + raise ValueError('unsupported platform: {}'.format(sys.platform)) + + cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies') + + if not os.path.isfile(cookies_path): + raise FileNotFoundError('could not find safari cookies database') + + with open(cookies_path, 'rb') as f: + cookies_data = f.read() + + jar = parse_safari_cookies(cookies_data, logger=logger) + logger.info('Extracted {} cookies from safari'.format(len(jar))) + return jar + + +class ParserError(Exception): + pass + + +class DataParser: + def __init__(self, data, logger): + self._data = data + self.cursor = 0 + self._logger = logger + + def read_bytes(self, num_bytes): + if num_bytes < 0: + raise ParserError('invalid read of {} bytes'.format(num_bytes)) + end = self.cursor + num_bytes + if end > len(self._data): + raise ParserError('reached end of input') + data = self._data[self.cursor:end] + self.cursor = end + return data + + def expect_bytes(self, expected_value, message): + value = self.read_bytes(len(expected_value)) + if value != expected_value: + raise ParserError('unexpected value: {} != {} ({})'.format(value, expected_value, message)) + + def read_uint(self, big_endian=False): + data_format = '>I' if big_endian else ' 0: + self._logger.debug('skipping {} bytes ({}): {}'.format( + num_bytes, description, self.read_bytes(num_bytes))) + elif num_bytes < 0: + raise ParserError('invalid skip of {} bytes'.format(num_bytes)) + + def skip_to(self, offset, description='unknown'): + self.skip(offset - self.cursor, description) + + def skip_to_end(self, description='unknown'): + self.skip_to(len(self._data), description) + + +def _mac_absolute_time_to_posix(timestamp): + return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp()) + + +def _parse_safari_cookies_header(data, logger): + p = DataParser(data, logger) + p.expect_bytes(b'cook', 'database signature') + number_of_pages = p.read_uint(big_endian=True) + page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)] + return page_sizes, p.cursor + + +def _parse_safari_cookies_page(data, jar, logger): + p = DataParser(data, logger) + p.expect_bytes(b'\x00\x00\x01\x00', 'page signature') + number_of_cookies = p.read_uint() + record_offsets = [p.read_uint() for _ in range(number_of_cookies)] + if number_of_cookies == 0: + logger.debug('a cookies page of size {} has no cookies'.format(len(data))) + return + + p.skip_to(record_offsets[0], 'unknown page header field') + + for record_offset in record_offsets: + p.skip_to(record_offset, 'space between records') + record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger) + p.read_bytes(record_length) + p.skip_to_end('space in between pages') + + +def _parse_safari_cookies_record(data, jar, logger): + p = DataParser(data, logger) + record_size = p.read_uint() + p.skip(4, 'unknown record field 1') + flags = p.read_uint() + is_secure = bool(flags & 0x0001) + p.skip(4, 'unknown record field 2') + domain_offset = p.read_uint() + name_offset = p.read_uint() + path_offset = p.read_uint() + value_offset = p.read_uint() + p.skip(8, 'unknown record field 3') + expiration_date = _mac_absolute_time_to_posix(p.read_double()) + _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841 + + try: + p.skip_to(domain_offset) + domain = p.read_cstring() + + p.skip_to(name_offset) + name = p.read_cstring() + + p.skip_to(path_offset) + path = p.read_cstring() + + p.skip_to(value_offset) + value = p.read_cstring() + except UnicodeDecodeError: + logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True) + return record_size + + p.skip_to(record_size, 'space at the end of the record') + + cookie = compat_cookiejar_Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False, + comment=None, comment_url=None, rest={}) + jar.set_cookie(cookie) + return record_size + + +def parse_safari_cookies(data, jar=None, logger=YDLLogger()): + """ + References: + - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc + - this data appears to be out of date but the important parts of the database structure is the same + - there are a few bytes here and there which are skipped during parsing + """ + if jar is None: + jar = YoutubeDLCookieJar() + page_sizes, body_start = _parse_safari_cookies_header(data, logger) + p = DataParser(data[body_start:], logger) + for page_size in page_sizes: + _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger) + p.skip_to_end('footer') + return jar + + +def _get_linux_keyring_password(browser_keyring_name): + password = keyring.get_password('{} Keys'.format(browser_keyring_name), + '{} Safe Storage'.format(browser_keyring_name)) + if password is None: + # this sometimes occurs in KDE because chrome does not check hasEntry and instead + # just tries to read the value (which kwallet returns "") whereas keyring checks hasEntry + # to verify this: + # dbus-monitor "interface='org.kde.KWallet'" "type=method_return" + # while starting chrome. + # this may be a bug as the intended behaviour is to generate a random password and store + # it, but that doesn't matter here. + password = '' + return password.encode('utf-8') + + +def _get_mac_keyring_password(browser_keyring_name, logger): + if KEYRING_AVAILABLE: + logger.debug('using keyring to obtain password') + password = keyring.get_password('{} Safe Storage'.format(browser_keyring_name), browser_keyring_name) + return password.encode('utf-8') + else: + logger.debug('using find-generic-password to obtain password') + proc = subprocess.Popen(['security', 'find-generic-password', + '-w', # write password to stdout + '-a', browser_keyring_name, # match 'account' + '-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service' + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL) + try: + stdout, stderr = process_communicate_or_kill(proc) + if stdout[-1:] == b'\n': + stdout = stdout[:-1] + return stdout + except BaseException as e: + logger.warning(f'exception running find-generic-password: {type(e).__name__}({e})') + return None + + +def _get_windows_v10_key(browser_root, logger): + path = _find_most_recently_used_file(browser_root, 'Local State') + if path is None: + logger.error('could not find local state file') + return None + with open(path, 'r') as f: + data = json.load(f) + try: + base64_key = data['os_crypt']['encrypted_key'] + except KeyError: + logger.error('no encrypted key in Local State') + return None + encrypted_key = compat_b64decode(base64_key) + prefix = b'DPAPI' + if not encrypted_key.startswith(prefix): + logger.error('invalid key') + return None + return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger) + + +def pbkdf2_sha1(password, salt, iterations, key_length): + return pbkdf2_hmac('sha1', password, salt, iterations, key_length) + + +def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16): + plaintext = aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector) + padding_length = plaintext[-1] + try: + return plaintext[:-padding_length].decode('utf-8') + except UnicodeDecodeError: + logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) + return None + + +def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): + try: + plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce) + except ValueError: + logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True) + return None + + try: + return plaintext.decode('utf-8') + except UnicodeDecodeError: + logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) + return None + + +def _decrypt_windows_dpapi(ciphertext, logger): + """ + References: + - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata + """ + from ctypes.wintypes import DWORD + + class DATA_BLOB(ctypes.Structure): + _fields_ = [('cbData', DWORD), + ('pbData', ctypes.POINTER(ctypes.c_char))] + + buffer = ctypes.create_string_buffer(ciphertext) + blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer) + blob_out = DATA_BLOB() + ret = ctypes.windll.crypt32.CryptUnprotectData( + ctypes.byref(blob_in), # pDataIn + None, # ppszDataDescr: human readable description of pDataIn + None, # pOptionalEntropy: salt? + None, # pvReserved: must be NULL + None, # pPromptStruct: information about prompts to display + 0, # dwFlags + ctypes.byref(blob_out) # pDataOut + ) + if not ret: + logger.warning('failed to decrypt with DPAPI', only_once=True) + return None + + result = ctypes.string_at(blob_out.pbData, blob_out.cbData) + ctypes.windll.kernel32.LocalFree(blob_out.pbData) + return result + + +def _config_home(): + return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config')) + + +def _open_database_copy(database_path, tmpdir): + # cannot open sqlite databases if they are already in use (e.g. by the browser) + database_copy_path = os.path.join(tmpdir, 'temporary.sqlite') + shutil.copy(database_path, database_copy_path) + conn = sqlite3.connect(database_copy_path) + return conn.cursor() + + +def _get_column_names(cursor, table_name): + table_info = cursor.execute('PRAGMA table_info({})'.format(table_name)).fetchall() + return [row[1].decode('utf-8') for row in table_info] + + +def _find_most_recently_used_file(root, filename): + # if there are multiple browser profiles, take the most recently used one + paths = [] + for root, dirs, files in os.walk(root): + for file in files: + if file == filename: + paths.append(os.path.join(root, file)) + return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime) + + +def _merge_cookie_jars(jars): + output_jar = YoutubeDLCookieJar() + for jar in jars: + for cookie in jar: + output_jar.set_cookie(cookie) + if jar.filename is not None: + output_jar.filename = jar.filename + return output_jar + + +def _is_path(value): + return os.path.sep in value + + +def _parse_browser_specification(browser_name, profile=None): + browser_name = browser_name.lower() + if browser_name not in SUPPORTED_BROWSERS: + raise ValueError(f'unsupported browser: "{browser_name}"') + if profile is not None and _is_path(profile): + profile = os.path.expanduser(profile) + return browser_name, profile diff --git a/hypervideo_dl/downloader/__init__.py b/hypervideo_dl/downloader/__init__.py index 2e485df..2449c74 100644 --- a/hypervideo_dl/downloader/__init__.py +++ b/hypervideo_dl/downloader/__init__.py @@ -1,24 +1,47 @@ from __future__ import unicode_literals +from ..compat import compat_str +from ..utils import ( + determine_protocol, + NO_DEFAULT +) + + +def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False): + info_dict['protocol'] = determine_protocol(info_dict) + info_copy = info_dict.copy() + info_copy['to_stdout'] = to_stdout + + downloaders = [_get_suitable_downloader(info_copy, proto, params, default) + for proto in (protocol or info_copy['protocol']).split('+')] + if set(downloaders) == {FFmpegFD} and FFmpegFD.can_merge_formats(info_copy, params): + return FFmpegFD + elif len(downloaders) == 1: + return downloaders[0] + return None + + +# Some of these require get_suitable_downloader from .common import FileDownloader +from .dash import DashSegmentsFD from .f4m import F4mFD from .hls import HlsFD from .http import HttpFD from .rtmp import RtmpFD -from .dash import DashSegmentsFD from .rtsp import RtspFD from .ism import IsmFD +from .mhtml import MhtmlFD +from .niconico import NiconicoDmcFD +from .websocket import WebSocketFragmentFD +from .youtube_live_chat import YoutubeLiveChatFD from .external import ( get_external_downloader, FFmpegFD, ) -from ..utils import ( - determine_protocol, -) - PROTOCOL_MAP = { 'rtmp': RtmpFD, + 'rtmp_ffmpeg': FFmpegFD, 'm3u8_native': HlsFD, 'm3u8': FFmpegFD, 'mms': RtspFD, @@ -26,36 +49,78 @@ PROTOCOL_MAP = { 'f4m': F4mFD, 'http_dash_segments': DashSegmentsFD, 'ism': IsmFD, + 'mhtml': MhtmlFD, + 'niconico_dmc': NiconicoDmcFD, + 'websocket_frag': WebSocketFragmentFD, + 'youtube_live_chat': YoutubeLiveChatFD, + 'youtube_live_chat_replay': YoutubeLiveChatFD, } -def get_suitable_downloader(info_dict, params={}): +def shorten_protocol_name(proto, simplify=False): + short_protocol_names = { + 'm3u8_native': 'm3u8_n', + 'rtmp_ffmpeg': 'rtmp_f', + 'http_dash_segments': 'dash', + 'niconico_dmc': 'dmc', + 'websocket_frag': 'WSfrag', + } + if simplify: + short_protocol_names.update({ + 'https': 'http', + 'ftps': 'ftp', + 'm3u8_native': 'm3u8', + 'rtmp_ffmpeg': 'rtmp', + 'm3u8_frag_urls': 'm3u8', + 'dash_frag_urls': 'dash', + }) + return short_protocol_names.get(proto, proto) + + +def _get_suitable_downloader(info_dict, protocol, params, default): """Get the downloader class that can handle the info dict.""" - protocol = determine_protocol(info_dict) - info_dict['protocol'] = protocol + if default is NO_DEFAULT: + default = HttpFD # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict): # return FFmpegFD - external_downloader = params.get('external_downloader') - if external_downloader is not None: + info_dict['protocol'] = protocol + downloaders = params.get('external_downloader') + external_downloader = ( + downloaders if isinstance(downloaders, compat_str) or downloaders is None + else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default'))) + + if external_downloader is None: + if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params): + return FFmpegFD + elif external_downloader.lower() != 'native': ed = get_external_downloader(external_downloader) - if ed.can_download(info_dict): + if ed.can_download(info_dict, external_downloader): return ed - if protocol.startswith('m3u8') and info_dict.get('is_live'): - return FFmpegFD - - if protocol == 'm3u8' and params.get('hls_prefer_native') is True: - return HlsFD + if protocol == 'http_dash_segments': + if info_dict.get('is_live') and (external_downloader or '').lower() != 'native': + return FFmpegFD - if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False: - return FFmpegFD + if protocol in ('m3u8', 'm3u8_native'): + if info_dict.get('is_live'): + return FFmpegFD + elif (external_downloader or '').lower() == 'native': + return HlsFD + elif get_suitable_downloader( + info_dict, params, None, protocol='m3u8_frag_urls', to_stdout=info_dict['to_stdout']): + return HlsFD + elif params.get('hls_prefer_native') is True: + return HlsFD + elif params.get('hls_prefer_native') is False: + return FFmpegFD - return PROTOCOL_MAP.get(protocol, HttpFD) + return PROTOCOL_MAP.get(protocol, default) __all__ = [ - 'get_suitable_downloader', 'FileDownloader', + 'get_suitable_downloader', + 'shorten_protocol_name', ] diff --git a/hypervideo_dl/downloader/common.py b/hypervideo_dl/downloader/common.py index d023168..27ca2cd 100644 --- a/hypervideo_dl/downloader/common.py +++ b/hypervideo_dl/downloader/common.py @@ -2,11 +2,9 @@ from __future__ import division, unicode_literals import os import re -import sys import time import random -from ..compat import compat_os_name from ..utils import ( decodeArgument, encodeFilename, @@ -15,6 +13,12 @@ from ..utils import ( shell_quote, timeconvert, ) +from ..minicurses import ( + MultilineLogger, + MultilinePrinter, + QuietMultilinePrinter, + BreaklineStatusPrinter +) class FileDownloader(object): @@ -32,25 +36,28 @@ class FileDownloader(object): verbose: Print additional info to stdout. quiet: Do not print messages to stdout. ratelimit: Download speed limit, in bytes/sec. + throttledratelimit: Assume the download is being throttled below this speed (bytes/sec) retries: Number of times to retry for HTTP error 5xx buffersize: Size of download buffer in bytes. noresizebuffer: Do not automatically resize the download buffer. continuedl: Try to continue downloads if possible. noprogress: Do not print the progress bar. - logtostderr: Log messages to stderr instead of stdout. - consoletitle: Display progress in console window's titlebar. nopart: Do not use temporary .part files. updatetime: Use the Last-modified header to set output file timestamps. test: Download only first bytes to test the downloader. min_filesize: Skip files smaller than this size max_filesize: Skip files larger than this size xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. - external_downloader_args: A list of additional command-line arguments for the - external downloader. + external_downloader_args: A dictionary of downloader keys (in lower case) + and a list of additional command-line arguments for the + executable. Use 'default' as the name for arguments to be + passed to all downloaders. For compatibility with youtube-dl, + a single list of args can also be used hls_use_mpegts: Use the mpegts container for HLS videos. http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be useful for bypassing bandwidth throttling imposed by a webserver (experimental) + progress_template: See YoutubeDL.py Subclasses of this one must re-define the real_download method. """ @@ -63,6 +70,7 @@ class FileDownloader(object): self.ydl = ydl self._progress_hooks = [] self.params = params + self._prepare_multiline_status() self.add_progress_hook(self.report_progress) @staticmethod @@ -147,10 +155,10 @@ class FileDownloader(object): return int(round(number * multiplier)) def to_screen(self, *args, **kargs): - self.ydl.to_screen(*args, **kargs) + self.ydl.to_stdout(*args, quiet=self.params.get('quiet'), **kargs) def to_stderr(self, message): - self.ydl.to_screen(message) + self.ydl.to_stderr(message) def to_console_title(self, message): self.ydl.to_console_title(message) @@ -164,6 +172,9 @@ class FileDownloader(object): def report_error(self, *args, **kargs): self.ydl.report_error(*args, **kargs) + def write_debug(self, *args, **kargs): + self.ydl.write_debug(*args, **kargs) + def slow_down(self, start_time, now, byte_counter): """Sleep if the download speed is over the rate limit.""" rate_limit = self.params.get('ratelimit') @@ -196,12 +207,12 @@ class FileDownloader(object): return filename + '.ytdl' def try_rename(self, old_filename, new_filename): + if old_filename == new_filename: + return try: - if old_filename == new_filename: - return - os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) + os.replace(old_filename, new_filename) except (IOError, OSError) as err: - self.report_error('unable to rename file: %s' % error_to_compat_str(err)) + self.report_error(f'unable to rename file: {err}') def try_utime(self, filename, last_modified_hdr): """Try to set the last-modified time of the given file.""" @@ -228,39 +239,46 @@ class FileDownloader(object): """Report destination filename.""" self.to_screen('[download] Destination: ' + filename) - def _report_progress_status(self, msg, is_last_line=False): - fullmsg = '[download] ' + msg - if self.params.get('progress_with_newline', False): - self.to_screen(fullmsg) + def _prepare_multiline_status(self, lines=1): + if self.params.get('noprogress'): + self._multiline = QuietMultilinePrinter() + elif self.ydl.params.get('logger'): + self._multiline = MultilineLogger(self.ydl.params['logger'], lines) + elif self.params.get('progress_with_newline'): + self._multiline = BreaklineStatusPrinter(self.ydl._screen_file, lines) else: - if compat_os_name == 'nt': - prev_len = getattr(self, '_report_progress_prev_line_length', - 0) - if prev_len > len(fullmsg): - fullmsg += ' ' * (prev_len - len(fullmsg)) - self._report_progress_prev_line_length = len(fullmsg) - clear_line = '\r' - else: - clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r') - self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) - self.to_console_title('hypervideo ' + msg) + self._multiline = MultilinePrinter(self.ydl._screen_file, lines, not self.params.get('quiet')) + + def _finish_multiline_status(self): + self._multiline.end() + + def _report_progress_status(self, s): + progress_dict = s.copy() + progress_dict.pop('info_dict') + progress_dict = {'info': s['info_dict'], 'progress': progress_dict} + + progress_template = self.params.get('progress_template', {}) + self._multiline.print_at_line(self.ydl.evaluate_outtmpl( + progress_template.get('download') or '[download] %(progress._default_template)s', + progress_dict), s.get('progress_idx') or 0) + self.to_console_title(self.ydl.evaluate_outtmpl( + progress_template.get('download-title') or 'hypervideo %(progress._default_template)s', + progress_dict)) def report_progress(self, s): if s['status'] == 'finished': - if self.params.get('noprogress', False): + if self.params.get('noprogress'): self.to_screen('[download] Download completed') - else: - msg_template = '100%%' - if s.get('total_bytes') is not None: - s['_total_bytes_str'] = format_bytes(s['total_bytes']) - msg_template += ' of %(_total_bytes_str)s' - if s.get('elapsed') is not None: - s['_elapsed_str'] = self.format_seconds(s['elapsed']) - msg_template += ' in %(_elapsed_str)s' - self._report_progress_status( - msg_template % s, is_last_line=True) - - if self.params.get('noprogress'): + msg_template = '100%%' + if s.get('total_bytes') is not None: + s['_total_bytes_str'] = format_bytes(s['total_bytes']) + msg_template += ' of %(_total_bytes_str)s' + if s.get('elapsed') is not None: + s['_elapsed_str'] = self.format_seconds(s['elapsed']) + msg_template += ' in %(_elapsed_str)s' + s['_percent_str'] = self.format_percent(100) + s['_default_template'] = msg_template % s + self._report_progress_status(s) return if s['status'] != 'downloading': @@ -302,8 +320,8 @@ class FileDownloader(object): msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' else: msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' - - self._report_progress_status(msg_template % s) + s['_default_template'] = msg_template % s + self._report_progress_status(s) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" @@ -312,27 +330,30 @@ class FileDownloader(object): def report_retry(self, err, count, retries): """Report retry in case of HTTP error 5xx""" self.to_screen( - '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...' + '[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...' % (error_to_compat_str(err), count, self.format_retries(retries))) - def report_file_already_downloaded(self, file_name): + def report_file_already_downloaded(self, *args, **kwargs): """Report file has already been fully downloaded.""" - try: - self.to_screen('[download] %s has already been downloaded' % file_name) - except UnicodeEncodeError: - self.to_screen('[download] The file has already been downloaded') + return self.ydl.report_file_already_downloaded(*args, **kwargs) def report_unable_to_resume(self): """Report it was impossible to resume download.""" self.to_screen('[download] Unable to resume') - def download(self, filename, info_dict): + @staticmethod + def supports_manifest(manifest): + """ Whether the downloader can download the fragments from the manifest. + Redefine in subclasses if needed. """ + pass + + def download(self, filename, info_dict, subtitle=False): """Download to a filename using the info from info_dict Return True on success and False otherwise """ nooverwrites_and_exists = ( - self.params.get('nooverwrites', False) + not self.params.get('overwrites', True) and os.path.exists(encodeFilename(filename)) ) @@ -350,26 +371,43 @@ class FileDownloader(object): 'filename': filename, 'status': 'finished', 'total_bytes': os.path.getsize(encodeFilename(filename)), - }) - return True - - min_sleep_interval = self.params.get('sleep_interval') - if min_sleep_interval: - max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) - sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) - self.to_screen( - '[download] Sleeping %s seconds...' % ( - int(sleep_interval) if sleep_interval.is_integer() - else '%.2f' % sleep_interval)) - time.sleep(sleep_interval) - - return self.real_download(filename, info_dict) + }, info_dict) + return True, False + + if subtitle is False: + min_sleep_interval = self.params.get('sleep_interval') + if min_sleep_interval: + max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) + sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) + self.to_screen( + '[download] Sleeping %s seconds ...' % ( + int(sleep_interval) if sleep_interval.is_integer() + else '%.2f' % sleep_interval)) + time.sleep(sleep_interval) + else: + sleep_interval_sub = 0 + if type(self.params.get('sleep_interval_subtitles')) is int: + sleep_interval_sub = self.params.get('sleep_interval_subtitles') + if sleep_interval_sub > 0: + self.to_screen( + '[download] Sleeping %s seconds ...' % ( + sleep_interval_sub)) + time.sleep(sleep_interval_sub) + ret = self.real_download(filename, info_dict) + self._finish_multiline_status() + return ret, True def real_download(self, filename, info_dict): """Real download process. Redefine in subclasses.""" raise NotImplementedError('This method must be implemented by subclasses') - def _hook_progress(self, status): + def _hook_progress(self, status, info_dict): + if not self._progress_hooks: + return + status['info_dict'] = info_dict + # youtube-dl passes the same status object to all the hooks. + # Some third party scripts seems to be relying on this. + # So keep this behavior if possible for ph in self._progress_hooks: ph(status) @@ -387,5 +425,4 @@ class FileDownloader(object): if exe is None: exe = os.path.basename(str_args[0]) - self.to_screen('[debug] %s command line: %s' % ( - exe, shell_quote(str_args))) + self.write_debug('%s command line: %s' % (exe, shell_quote(str_args))) diff --git a/hypervideo_dl/downloader/dash.py b/hypervideo_dl/downloader/dash.py index c6d674b..6444ad6 100644 --- a/hypervideo_dl/downloader/dash.py +++ b/hypervideo_dl/downloader/dash.py @@ -1,80 +1,62 @@ from __future__ import unicode_literals +from ..downloader import get_suitable_downloader from .fragment import FragmentFD -from ..compat import compat_urllib_error -from ..utils import ( - DownloadError, - urljoin, -) + +from ..utils import urljoin class DashSegmentsFD(FragmentFD): """ - Download segments in a DASH manifest + Download segments in a DASH manifest. External downloaders can take over + the fragment downloads by supporting the 'dash_frag_urls' protocol """ FD_NAME = 'dashsegments' def real_download(self, filename, info_dict): + if info_dict.get('is_live'): + self.report_error('Live DASH videos are not supported') + fragment_base_url = info_dict.get('fragment_base_url') fragments = info_dict['fragments'][:1] if self.params.get( 'test', False) else info_dict['fragments'] + real_downloader = get_suitable_downloader( + info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-')) + ctx = { 'filename': filename, 'total_frags': len(fragments), } - self._prepare_and_start_frag_download(ctx) - - fragment_retries = self.params.get('fragment_retries', 0) - skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + if real_downloader: + self._prepare_external_frag_download(ctx) + else: + self._prepare_and_start_frag_download(ctx, info_dict) + fragments_to_download = [] frag_index = 0 for i, fragment in enumerate(fragments): frag_index += 1 if frag_index <= ctx['fragment_index']: continue - # In DASH, the first segment contains necessary headers to - # generate a valid MP4 file, so always abort for the first segment - fatal = i == 0 or not skip_unavailable_fragments - count = 0 - while count <= fragment_retries: - try: - fragment_url = fragment.get('url') - if not fragment_url: - assert fragment_base_url - fragment_url = urljoin(fragment_base_url, fragment['path']) - success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) - if not success: - return False - self._append_fragment(ctx, frag_content) - break - except compat_urllib_error.HTTPError as err: - # YouTube may often return 404 HTTP error for a fragment causing the - # whole download to fail. However if the same fragment is immediately - # retried with the same request data this usually succeeds (1-2 attempts - # is usually enough) thus allowing to download the whole file successfully. - # To be future-proof we will retry all fragments that fail with any - # HTTP error. - count += 1 - if count <= fragment_retries: - self.report_retry_fragment(err, frag_index, count, fragment_retries) - except DownloadError: - # Don't retry fragment if error occurred during HTTP downloading - # itself since it has own retry settings - if not fatal: - self.report_skip_fragment(frag_index) - break - raise - - if count > fragment_retries: - if not fatal: - self.report_skip_fragment(frag_index) - continue - self.report_error('giving up after %s fragment retries' % fragment_retries) - return False - - self._finish_frag_download(ctx) - - return True + fragment_url = fragment.get('url') + if not fragment_url: + assert fragment_base_url + fragment_url = urljoin(fragment_base_url, fragment['path']) + + fragments_to_download.append({ + 'frag_index': frag_index, + 'index': i, + 'url': fragment_url, + }) + + if real_downloader: + self.to_screen( + '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) + info_dict['fragments'] = fragments_to_download + fd = real_downloader(self.ydl, self.params) + return fd.real_download(filename, info_dict) + + return self.download_and_append_fragments(ctx, fragments_to_download, info_dict) diff --git a/hypervideo_dl/downloader/external.py b/hypervideo_dl/downloader/external.py index c31f891..74adb05 100644 --- a/hypervideo_dl/downloader/external.py +++ b/hypervideo_dl/downloader/external.py @@ -6,7 +6,7 @@ import subprocess import sys import time -from .common import FileDownloader +from .fragment import FragmentFD from ..compat import ( compat_setenv, compat_str, @@ -16,16 +16,21 @@ from ..utils import ( cli_option, cli_valueless_option, cli_bool_option, - cli_configuration_args, + _configuration_args, encodeFilename, encodeArgument, handle_youtubedl_headers, check_executable, is_outdated_version, + process_communicate_or_kill, + sanitize_open, ) -class ExternalFD(FileDownloader): +class ExternalFD(FragmentFD): + SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps') + can_download_to_stdout = False + def real_download(self, filename, info_dict): self.report_destination(filename) tmpfilename = self.temp_name(filename) @@ -56,7 +61,7 @@ class ExternalFD(FileDownloader): 'downloaded_bytes': fsize, 'total_bytes': fsize, }) - self._hook_progress(status) + self._hook_progress(status, info_dict) return True else: self.to_stderr('\n') @@ -70,19 +75,25 @@ class ExternalFD(FileDownloader): @property def exe(self): - return self.params.get('external_downloader') + return self.get_basename() @classmethod - def available(cls): - return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT]) + def available(cls, path=None): + path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT]) + if path: + cls.exe = path + return path + return False @classmethod def supports(cls, info_dict): - return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') + return ( + (cls.can_download_to_stdout or not info_dict.get('to_stdout')) + and info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS) @classmethod - def can_download(cls, info_dict): - return cls.available() and cls.supports(info_dict) + def can_download(cls, info_dict, path=None): + return cls.available(path) and cls.supports(info_dict) def _option(self, command_option, param): return cli_option(self.params, command_option, param) @@ -93,8 +104,10 @@ class ExternalFD(FileDownloader): def _valueless_option(self, command_option, param, expected_value=True): return cli_valueless_option(self.params, command_option, param, expected_value) - def _configuration_args(self, default=[]): - return cli_configuration_args(self.params, 'external_downloader_args', default) + def _configuration_args(self, keys=None, *args, **kwargs): + return _configuration_args( + self.get_basename(), self.params.get('external_downloader_args'), self.get_basename(), + keys, *args, **kwargs) def _call_downloader(self, tmpfilename, info_dict): """ Either overwrite this or implement _make_cmd """ @@ -102,12 +115,56 @@ class ExternalFD(FileDownloader): self._debug_cmd(cmd) - p = subprocess.Popen( - cmd, stderr=subprocess.PIPE) - _, stderr = p.communicate() - if p.returncode != 0: + if 'fragments' not in info_dict: + p = subprocess.Popen( + cmd, stderr=subprocess.PIPE) + _, stderr = process_communicate_or_kill(p) + if p.returncode != 0: + self.to_stderr(stderr.decode('utf-8', 'replace')) + return p.returncode + + fragment_retries = self.params.get('fragment_retries', 0) + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + + count = 0 + while count <= fragment_retries: + p = subprocess.Popen( + cmd, stderr=subprocess.PIPE) + _, stderr = process_communicate_or_kill(p) + if p.returncode == 0: + break + # TODO: Decide whether to retry based on error code + # https://aria2.github.io/manual/en/html/aria2c.html#exit-status self.to_stderr(stderr.decode('utf-8', 'replace')) - return p.returncode + count += 1 + if count <= fragment_retries: + self.to_screen( + '[%s] Got error. Retrying fragments (attempt %d of %s)...' + % (self.get_basename(), count, self.format_retries(fragment_retries))) + if count > fragment_retries: + if not skip_unavailable_fragments: + self.report_error('Giving up after %s fragment retries' % fragment_retries) + return -1 + + decrypt_fragment = self.decrypter(info_dict) + dest, _ = sanitize_open(tmpfilename, 'wb') + for frag_index, fragment in enumerate(info_dict['fragments']): + fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) + try: + src, _ = sanitize_open(fragment_filename, 'rb') + except IOError: + if skip_unavailable_fragments and frag_index > 1: + self.to_screen('[%s] Skipping fragment %d ...' % (self.get_basename(), frag_index)) + continue + self.report_error('Unable to open fragment %d' % frag_index) + return -1 + dest.write(decrypt_fragment(fragment, src.read())) + src.close() + if not self.params.get('keep_fragments', False): + os.remove(encodeFilename(fragment_filename)) + dest.close() + os.remove(encodeFilename('%s.frag.urls' % tmpfilename)) + return 0 class CurlFD(ExternalFD): @@ -115,8 +172,10 @@ class CurlFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '--location', '-o', tmpfilename] - for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['--header', '%s: %s' % (key, val)] + cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') cmd += self._valueless_option('--silent', 'noprogress') cmd += self._valueless_option('--verbose', 'verbose') @@ -141,7 +200,7 @@ class CurlFD(ExternalFD): # curl writes the progress to stderr so don't capture it. p = subprocess.Popen(cmd) - p.communicate() + process_communicate_or_kill(p) return p.returncode @@ -150,8 +209,9 @@ class AxelFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-o', tmpfilename] - for key, val in info_dict['http_headers'].items(): - cmd += ['-H', '%s: %s' % (key, val)] + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['-H', '%s: %s' % (key, val)] cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd @@ -162,8 +222,9 @@ class WgetFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] - for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['--header', '%s: %s' % (key, val)] cmd += self._option('--limit-rate', 'ratelimit') retry = self._option('--tries', 'retries') if len(retry) == 2: @@ -180,51 +241,115 @@ class WgetFD(ExternalFD): class Aria2cFD(ExternalFD): AVAILABLE_OPT = '-v' + SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls') + + @staticmethod + def supports_manifest(manifest): + UNSUPPORTED_FEATURES = [ + r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1] + # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 + ] + check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) + return all(check_results) def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '-c'] - cmd += self._configuration_args([ - '--min-split-size', '1M', '--max-connection-per-server', '4']) - dn = os.path.dirname(tmpfilename) - if dn: - cmd += ['--dir', dn] - cmd += ['--out', os.path.basename(tmpfilename)] - for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] + cmd = [self.exe, '-c', + '--console-log-level=warn', '--summary-interval=0', '--download-result=hide', + '--file-allocation=none', '-x16', '-j16', '-s16'] + if 'fragments' in info_dict: + cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true'] + else: + cmd += ['--min-split-size', '1M'] + + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['--header', '%s: %s' % (key, val)] + cmd += self._option('--max-overall-download-limit', 'ratelimit') cmd += self._option('--interface', 'source_address') cmd += self._option('--all-proxy', 'proxy') cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=') - cmd += ['--', info_dict['url']] + cmd += self._configuration_args() + + # aria2c strips out spaces from the beginning/end of filenames and paths. + # We work around this issue by adding a "./" to the beginning of the + # filename and relative path, and adding a "/" at the end of the path. + # See: https://github.com/hypervideo/hypervideo/issues/276 + # https://github.com/ytdl-org/youtube-dl/issues/20312 + # https://github.com/aria2/aria2/issues/1373 + dn = os.path.dirname(tmpfilename) + if dn: + if not os.path.isabs(dn): + dn = '.%s%s' % (os.path.sep, dn) + cmd += ['--dir', dn + os.path.sep] + if 'fragments' not in info_dict: + cmd += ['--out', '.%s%s' % (os.path.sep, os.path.basename(tmpfilename))] + cmd += ['--auto-file-renaming=false'] + + if 'fragments' in info_dict: + cmd += ['--file-allocation=none', '--uri-selector=inorder'] + url_list_file = '%s.frag.urls' % tmpfilename + url_list = [] + for frag_index, fragment in enumerate(info_dict['fragments']): + fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) + url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename)) + stream, _ = sanitize_open(url_list_file, 'wb') + stream.write('\n'.join(url_list).encode('utf-8')) + stream.close() + cmd += ['-i', url_list_file] + else: + cmd += ['--', info_dict['url']] return cmd class HttpieFD(ExternalFD): + AVAILABLE_OPT = '--version' + @classmethod - def available(cls): - return check_executable('http', ['--version']) + def available(cls, path=None): + return ExternalFD.available(cls, path or 'http') def _make_cmd(self, tmpfilename, info_dict): cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] - for key, val in info_dict['http_headers'].items(): - cmd += ['%s:%s' % (key, val)] + + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['%s:%s' % (key, val)] return cmd class FFmpegFD(ExternalFD): + SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments') + can_download_to_stdout = True + + @classmethod + def available(cls, path=None): + # TODO: Fix path for ffmpeg + # Fixme: This may be wrong when --ffmpeg-location is used + return FFmpegPostProcessor().available + @classmethod def supports(cls, info_dict): - return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms') + return all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')) + + def on_process_started(self, proc, stdin): + """ Override this in subclasses """ + pass @classmethod - def available(cls): - return FFmpegPostProcessor().available + def can_merge_formats(cls, info_dict, params): + return ( + info_dict.get('requested_formats') + and info_dict.get('protocol') + and not params.get('allow_unplayable_formats') + and 'no-direct-merge' not in params.get('compat_opts', []) + and cls.can_download(info_dict)) def _call_downloader(self, tmpfilename, info_dict): - url = info_dict['url'] + urls = [f['url'] for f in info_dict.get('requested_formats', [])] or [info_dict['url']] ffpp = FFmpegPostProcessor(downloader=self) if not ffpp.available: - self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') + self.report_error('m3u8 download detected but ffmpeg could not be found. Please install') return False ffpp.check_version() @@ -234,7 +359,12 @@ class FFmpegFD(ExternalFD): if self.params.get(log_level, False): args += ['-loglevel', log_level] break + if not self.params.get('verbose'): + args += ['-hide_banner'] + args += info_dict.get('_ffmpeg_args', []) + + # This option exists only for compatibility. Extractors should use `_ffmpeg_args` instead seekable = info_dict.get('_seekable') if seekable is not None: # setting -seekable prevents ffmpeg from guessing if the server @@ -244,8 +374,6 @@ class FFmpegFD(ExternalFD): # http://trac.ffmpeg.org/ticket/6125#comment:10 args += ['-seekable', '1' if seekable else '0'] - args += self._configuration_args() - # start_time = info_dict.get('start_time') or 0 # if start_time: # args += ['-ss', compat_str(start_time)] @@ -253,7 +381,7 @@ class FFmpegFD(ExternalFD): # if end_time: # args += ['-t', compat_str(end_time - start_time)] - if info_dict['http_headers'] and re.match(r'^https?://', url): + if info_dict.get('http_headers') is not None and re.match(r'^https?://', urls[0]): # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. headers = handle_youtubedl_headers(info_dict['http_headers']) @@ -311,13 +439,25 @@ class FFmpegFD(ExternalFD): elif isinstance(conn, compat_str): args += ['-rtmp_conn', conn] - args += ['-i', url, '-c', 'copy'] + for i, url in enumerate(urls): + args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url] + + args += ['-c', 'copy'] + if info_dict.get('requested_formats') or protocol == 'http_dash_segments': + for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]): + stream_number = fmt.get('manifest_stream_number', 0) + a_or_v = 'a' if fmt.get('acodec') != 'none' else 'v' + args.extend(['-map', f'{i}:{a_or_v}:{stream_number}']) if self.params.get('test', False): args += ['-fs', compat_str(self._TEST_FILE_SIZE)] + ext = info_dict['ext'] if protocol in ('m3u8', 'm3u8_native'): - if self.params.get('hls_use_mpegts', False) or tmpfilename == '-': + use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts') + if use_mpegts is None: + use_mpegts = info_dict.get('is_live') + if use_mpegts: args += ['-f', 'mpegts'] else: args += ['-f', 'mp4'] @@ -325,25 +465,33 @@ class FFmpegFD(ExternalFD): args += ['-bsf:a', 'aac_adtstoasc'] elif protocol == 'rtmp': args += ['-f', 'flv'] + elif ext == 'mp4' and tmpfilename == '-': + args += ['-f', 'mpegts'] else: - args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])] + args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)] + + args += self._configuration_args(('_o1', '_o', '')) args = [encodeArgument(opt) for opt in args] args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) - self._debug_cmd(args) proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env) + if url in ('-', 'pipe:'): + self.on_process_started(proc, proc.stdin) try: retval = proc.wait() - except KeyboardInterrupt: + except BaseException as e: # subprocces.run would send the SIGKILL signal to ffmpeg and the # mp4 file couldn't be played, but if we ask ffmpeg to quit it # produces a file that is playable (this is mostly useful for live # streams). Note that Windows is not affected and produces playable # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). - if sys.platform != 'win32': - proc.communicate(b'q') + if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'): + process_communicate_or_kill(proc, b'q') + else: + proc.kill() + proc.wait() raise return retval @@ -355,7 +503,7 @@ class AVconvFD(FFmpegFD): _BY_NAME = dict( (klass.get_basename(), klass) for name, klass in globals().items() - if name.endswith('FD') and name != 'ExternalFD' + if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD') ) @@ -368,4 +516,4 @@ def get_external_downloader(external_downloader): downloader . """ # Drop .exe extension on Windows bn = os.path.splitext(os.path.basename(external_downloader))[0] - return _BY_NAME[bn] + return _BY_NAME.get(bn) diff --git a/hypervideo_dl/downloader/f4m.py b/hypervideo_dl/downloader/f4m.py index 8dd3c2e..9da2776 100644 --- a/hypervideo_dl/downloader/f4m.py +++ b/hypervideo_dl/downloader/f4m.py @@ -267,13 +267,14 @@ class F4mFD(FragmentFD): media = doc.findall(_add_ns('media')) if not media: self.report_error('No media found') - for e in (doc.findall(_add_ns('drmAdditionalHeader')) - + doc.findall(_add_ns('drmAdditionalHeaderSet'))): - # If id attribute is missing it's valid for all media nodes - # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute - if 'id' not in e.attrib: - self.report_error('Missing ID in f4m DRM') - media = remove_encrypted_media(media) + if not self.params.get('allow_unplayable_formats'): + for e in (doc.findall(_add_ns('drmAdditionalHeader')) + + doc.findall(_add_ns('drmAdditionalHeaderSet'))): + # If id attribute is missing it's valid for all media nodes + # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute + if 'id' not in e.attrib: + self.report_error('Missing ID in f4m DRM') + media = remove_encrypted_media(media) if not media: self.report_error('Unsupported DRM') return media @@ -379,7 +380,7 @@ class F4mFD(FragmentFD): base_url_parsed = compat_urllib_parse_urlparse(base_url) - self._start_frag_download(ctx) + self._start_frag_download(ctx, info_dict) frag_index = 0 while fragments_list: @@ -433,6 +434,6 @@ class F4mFD(FragmentFD): msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) self.report_warning(msg) - self._finish_frag_download(ctx) + self._finish_frag_download(ctx, info_dict) return True diff --git a/hypervideo_dl/downloader/fragment.py b/hypervideo_dl/downloader/fragment.py index b82e3cf..57068db 100644 --- a/hypervideo_dl/downloader/fragment.py +++ b/hypervideo_dl/downloader/fragment.py @@ -3,10 +3,23 @@ from __future__ import division, unicode_literals import os import time import json +from math import ceil + +try: + import concurrent.futures + can_threaded_download = True +except ImportError: + can_threaded_download = False from .common import FileDownloader from .http import HttpFD +from ..aes import aes_cbc_decrypt_bytes +from ..compat import ( + compat_urllib_error, + compat_struct_pack, +) from ..utils import ( + DownloadError, error_to_compat_str, encodeFilename, sanitize_open, @@ -31,6 +44,7 @@ class FragmentFD(FileDownloader): Skip unavailable fragments (DASH and hlsnative only) keep_fragments: Keep downloaded fragments on disk after downloading is finished + _no_ytdl_file: Don't use .ytdl file For each incomplete fragment download hypervideo keeps on disk a special bookkeeping file with download state and metadata (in future such files will @@ -55,29 +69,31 @@ class FragmentFD(FileDownloader): def report_retry_fragment(self, err, frag_index, count, retries): self.to_screen( - '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...' + '\r[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...' % (error_to_compat_str(err), frag_index, count, self.format_retries(retries))) def report_skip_fragment(self, frag_index): - self.to_screen('[download] Skipping fragment %d...' % frag_index) + self.to_screen('[download] Skipping fragment %d ...' % frag_index) def _prepare_url(self, info_dict, url): headers = info_dict.get('http_headers') return sanitized_Request(url, None, headers) if headers else url - def _prepare_and_start_frag_download(self, ctx): + def _prepare_and_start_frag_download(self, ctx, info_dict): self._prepare_frag_download(ctx) - self._start_frag_download(ctx) + self._start_frag_download(ctx, info_dict) - @staticmethod - def __do_ytdl_file(ctx): - return not ctx['live'] and not ctx['tmpfilename'] == '-' + def __do_ytdl_file(self, ctx): + return not ctx['live'] and not ctx['tmpfilename'] == '-' and not self.params.get('_no_ytdl_file') def _read_ytdl_file(self, ctx): assert 'ytdl_corrupt' not in ctx stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r') try: - ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index'] + ytdl_data = json.loads(stream.read()) + ctx['fragment_index'] = ytdl_data['downloader']['current_fragment']['index'] + if 'extra_state' in ytdl_data['downloader']: + ctx['extra_state'] = ytdl_data['downloader']['extra_state'] except Exception: ctx['ytdl_corrupt'] = True finally: @@ -85,32 +101,42 @@ class FragmentFD(FileDownloader): def _write_ytdl_file(self, ctx): frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w') - downloader = { - 'current_fragment': { - 'index': ctx['fragment_index'], - }, - } - if ctx.get('fragment_count') is not None: - downloader['fragment_count'] = ctx['fragment_count'] - frag_index_stream.write(json.dumps({'downloader': downloader})) - frag_index_stream.close() + try: + downloader = { + 'current_fragment': { + 'index': ctx['fragment_index'], + }, + } + if 'extra_state' in ctx: + downloader['extra_state'] = ctx['extra_state'] + if ctx.get('fragment_count') is not None: + downloader['fragment_count'] = ctx['fragment_count'] + frag_index_stream.write(json.dumps({'downloader': downloader})) + finally: + frag_index_stream.close() - def _download_fragment(self, ctx, frag_url, info_dict, headers=None): + def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None): fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) fragment_info_dict = { 'url': frag_url, 'http_headers': headers or info_dict.get('http_headers'), + 'request_data': request_data, + 'ctx_id': ctx.get('ctx_id'), } success = ctx['dl'].download(fragment_filename, fragment_info_dict) if not success: return False, None if fragment_info_dict.get('filetime'): ctx['fragment_filetime'] = fragment_info_dict.get('filetime') - down, frag_sanitized = sanitize_open(fragment_filename, 'rb') + ctx['fragment_filename_sanitized'] = fragment_filename + return True, self._read_fragment(ctx) + + def _read_fragment(self, ctx): + down, frag_sanitized = sanitize_open(ctx['fragment_filename_sanitized'], 'rb') ctx['fragment_filename_sanitized'] = frag_sanitized frag_content = down.read() down.close() - return True, frag_content + return frag_content def _append_fragment(self, ctx, frag_content): try: @@ -173,7 +199,7 @@ class FragmentFD(FileDownloader): '.ytdl file is corrupt' if is_corrupt else 'Inconsistent state of incomplete fragment download') self.report_warning( - '%s. Restarting from the beginning...' % message) + '%s. Restarting from the beginning ...' % message) ctx['fragment_index'] = resume_len = 0 if 'ytdl_corrupt' in ctx: del ctx['ytdl_corrupt'] @@ -192,9 +218,10 @@ class FragmentFD(FileDownloader): 'complete_frags_downloaded_bytes': resume_len, }) - def _start_frag_download(self, ctx): + def _start_frag_download(self, ctx, info_dict): resume_len = ctx['complete_frags_downloaded_bytes'] total_frags = ctx['total_frags'] + ctx_id = ctx.get('ctx_id') # This dict stores the download progress, it's updated by the progress # hook state = { @@ -218,9 +245,16 @@ class FragmentFD(FileDownloader): if s['status'] not in ('downloading', 'finished'): return + if ctx_id is not None and s.get('ctx_id') != ctx_id: + return + + state['max_progress'] = ctx.get('max_progress') + state['progress_idx'] = ctx.get('progress_idx') + time_now = time.time() state['elapsed'] = time_now - start frag_total_bytes = s.get('total_bytes') or 0 + s['fragment_info_dict'] = s.pop('info_dict', {}) if not ctx['live']: estimated_size = ( (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) @@ -243,13 +277,13 @@ class FragmentFD(FileDownloader): state['speed'] = s.get('speed') or ctx.get('speed') ctx['speed'] = state['speed'] ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes - self._hook_progress(state) + self._hook_progress(state, info_dict) ctx['dl'].add_progress_hook(frag_progress_hook) return start - def _finish_frag_download(self, ctx): + def _finish_frag_download(self, ctx, info_dict): ctx['dest_stream'].close() if self.__do_ytdl_file(ctx): ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) @@ -276,4 +310,177 @@ class FragmentFD(FileDownloader): 'filename': ctx['filename'], 'status': 'finished', 'elapsed': elapsed, + 'ctx_id': ctx.get('ctx_id'), + 'max_progress': ctx.get('max_progress'), + 'progress_idx': ctx.get('progress_idx'), + }, info_dict) + + def _prepare_external_frag_download(self, ctx): + if 'live' not in ctx: + ctx['live'] = False + if not ctx['live']: + total_frags_str = '%d' % ctx['total_frags'] + ad_frags = ctx.get('ad_frags', 0) + if ad_frags: + total_frags_str += ' (not including %d ad)' % ad_frags + else: + total_frags_str = 'unknown (live)' + self.to_screen( + '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) + + tmpfilename = self.temp_name(ctx['filename']) + + # Should be initialized before ytdl file check + ctx.update({ + 'tmpfilename': tmpfilename, + 'fragment_index': 0, }) + + def decrypter(self, info_dict): + _key_cache = {} + + def _get_key(url): + if url not in _key_cache: + _key_cache[url] = self.ydl.urlopen(self._prepare_url(info_dict, url)).read() + return _key_cache[url] + + def decrypt_fragment(fragment, frag_content): + decrypt_info = fragment.get('decrypt_info') + if not decrypt_info or decrypt_info['METHOD'] != 'AES-128': + return frag_content + iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence']) + decrypt_info['KEY'] = decrypt_info.get('KEY') or _get_key(info_dict.get('_decryption_key_url') or decrypt_info['URI']) + # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block + # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded, + # not what it decrypts to. + if self.params.get('test', False): + return frag_content + padding_len = 16 - (len(frag_content) % 16) + decrypted_data = aes_cbc_decrypt_bytes(frag_content + bytes([padding_len] * padding_len), decrypt_info['KEY'], iv) + return decrypted_data[:-decrypted_data[-1]] + + return decrypt_fragment + + def download_and_append_fragments_multiple(self, *args, pack_func=None, finish_func=None): + ''' + @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ... + all args must be either tuple or list + ''' + max_progress = len(args) + if max_progress == 1: + return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func) + max_workers = self.params.get('concurrent_fragment_downloads', max_progress) + self._prepare_multiline_status(max_progress) + + def thread_func(idx, ctx, fragments, info_dict, tpe): + ctx['max_progress'] = max_progress + ctx['progress_idx'] = idx + return self.download_and_append_fragments(ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func, tpe=tpe) + + class FTPE(concurrent.futures.ThreadPoolExecutor): + # has to stop this or it's going to wait on the worker thread itself + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + spins = [] + for idx, (ctx, fragments, info_dict) in enumerate(args): + tpe = FTPE(ceil(max_workers / max_progress)) + job = tpe.submit(thread_func, idx, ctx, fragments, info_dict, tpe) + spins.append((tpe, job)) + + result = True + for tpe, job in spins: + try: + result = result and job.result() + finally: + tpe.shutdown(wait=True) + return result + + def download_and_append_fragments(self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None, tpe=None): + fragment_retries = self.params.get('fragment_retries', 0) + is_fatal = (lambda idx: idx == 0) if self.params.get('skip_unavailable_fragments', True) else (lambda _: True) + if not pack_func: + pack_func = lambda frag_content, _: frag_content + + def download_fragment(fragment, ctx): + frag_index = ctx['fragment_index'] = fragment['frag_index'] + headers = info_dict.get('http_headers', {}).copy() + byte_range = fragment.get('byte_range') + if byte_range: + headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) + + # Never skip the first fragment + fatal = is_fatal(fragment.get('index') or (frag_index - 1)) + count, frag_content = 0, None + while count <= fragment_retries: + try: + success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers) + if not success: + return False, frag_index + break + except compat_urllib_error.HTTPError as err: + # Unavailable (possibly temporary) fragments may be served. + # First we try to retry then either skip or abort. + # See https://github.com/ytdl-org/youtube-dl/issues/10165, + # https://github.com/ytdl-org/youtube-dl/issues/10448). + count += 1 + if count <= fragment_retries: + self.report_retry_fragment(err, frag_index, count, fragment_retries) + except DownloadError: + # Don't retry fragment if error occurred during HTTP downloading + # itself since it has own retry settings + if not fatal: + break + raise + + if count > fragment_retries: + if not fatal: + return False, frag_index + ctx['dest_stream'].close() + self.report_error('Giving up after %s fragment retries' % fragment_retries) + return False, frag_index + return frag_content, frag_index + + def append_fragment(frag_content, frag_index, ctx): + if not frag_content: + if not is_fatal(frag_index - 1): + self.report_skip_fragment(frag_index) + return True + else: + ctx['dest_stream'].close() + self.report_error( + 'fragment %s not found, unable to continue' % frag_index) + return False + self._append_fragment(ctx, pack_func(frag_content, frag_index)) + return True + + decrypt_fragment = self.decrypter(info_dict) + + max_workers = self.params.get('concurrent_fragment_downloads', 1) + if can_threaded_download and max_workers > 1: + + def _download_fragment(fragment): + ctx_copy = ctx.copy() + frag_content, frag_index = download_fragment(fragment, ctx_copy) + return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized') + + self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome') + with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: + for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments): + ctx['fragment_filename_sanitized'] = frag_filename + ctx['fragment_index'] = frag_index + result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) + if not result: + return False + else: + for fragment in fragments: + frag_content, frag_index = download_fragment(fragment, ctx) + result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) + if not result: + return False + + if finish_func is not None: + ctx['dest_stream'].write(finish_func()) + ctx['dest_stream'].flush() + self._finish_frag_download(ctx, info_dict) + return True diff --git a/hypervideo_dl/downloader/hls.py b/hypervideo_dl/downloader/hls.py index 7aaebc9..61312c5 100644 --- a/hypervideo_dl/downloader/hls.py +++ b/hypervideo_dl/downloader/hls.py @@ -1,36 +1,37 @@ from __future__ import unicode_literals import re +import io import binascii -try: - from Crypto.Cipher import AES - can_decrypt_frag = True -except ImportError: - can_decrypt_frag = False +from ..downloader import get_suitable_downloader from .fragment import FragmentFD from .external import FFmpegFD from ..compat import ( - compat_urllib_error, + compat_pycrypto_AES, compat_urlparse, - compat_struct_pack, ) from ..utils import ( parse_m3u8_attributes, update_url_query, + bug_reports_message, ) +from .. import webvtt class HlsFD(FragmentFD): - """ A limited implementation that does not require ffmpeg """ + """ + Download segments in a m3u8 manifest. External downloaders can take over + the fragment downloads by supporting the 'm3u8_frag_urls' protocol and + re-defining 'supports_manifest' function + """ FD_NAME = 'hlsnative' @staticmethod - def can_download(manifest, info_dict): - UNSUPPORTED_FEATURES = ( - r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] + def can_download(manifest, info_dict, allow_unplayable_formats=False): + UNSUPPORTED_FEATURES = [ # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] # Live streams heuristic does not always work (e.g. geo restricted to Germany @@ -42,20 +43,23 @@ class HlsFD(FragmentFD): # no segments will definitely be appended to the end of the playlist. # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of # # event media playlists [4] - r'#EXT-X-MAP:', # media initialization [5] - + # r'#EXT-X-MAP:', # media initialization [5] # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5 - ) - check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] - is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest - check_results.append(can_decrypt_frag or not is_aes128_enc) - check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest)) - check_results.append(not info_dict.get('is_live')) - return all(check_results) + ] + if not allow_unplayable_formats: + UNSUPPORTED_FEATURES += [ + r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] + ] + + def check_results(): + yield not info_dict.get('is_live') + for feature in UNSUPPORTED_FEATURES: + yield not re.search(feature, manifest) + return all(check_results()) def real_download(self, filename, info_dict): man_url = info_dict['url'] @@ -65,17 +69,32 @@ class HlsFD(FragmentFD): man_url = urlh.geturl() s = urlh.read().decode('utf-8', 'ignore') - if not self.can_download(s, info_dict): - if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'): - self.report_error('pycrypto not found. Please install it.') - return False - self.report_warning( - 'hlsnative has detected features it does not support, ' - 'extraction will be delegated to ffmpeg') + can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None + if can_download and not compat_pycrypto_AES and '#EXT-X-KEY:METHOD=AES-128' in s: + if FFmpegFD.available(): + can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available' + else: + message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; ' + 'Decryption will be performed natively, but will be extremely slow') + if not can_download: + message = message or 'Unsupported features have been detected' fd = FFmpegFD(self.ydl, self.params) - for ph in self._progress_hooks: - fd.add_progress_hook(ph) + self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}') return fd.real_download(filename, info_dict) + elif message: + self.report_warning(message) + + is_webvtt = info_dict['ext'] == 'vtt' + if is_webvtt: + real_downloader = None # Packing the fragments is not currently supported for external downloader + else: + real_downloader = get_suitable_downloader( + info_dict, self.params, None, protocol='m3u8_frag_urls', to_stdout=(filename == '-')) + if real_downloader and not real_downloader.supports_manifest(s): + real_downloader = None + if real_downloader: + self.to_screen( + '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) def is_ad_fragment_start(s): return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s @@ -85,6 +104,8 @@ class HlsFD(FragmentFD): return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) + fragments = [] + media_frags = 0 ad_frags = 0 ad_frag_next = False @@ -109,12 +130,14 @@ class HlsFD(FragmentFD): 'ad_frags': ad_frags, } - self._prepare_and_start_frag_download(ctx) + if real_downloader: + self._prepare_external_frag_download(ctx) + else: + self._prepare_and_start_frag_download(ctx, info_dict) - fragment_retries = self.params.get('fragment_retries', 0) - skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - test = self.params.get('test', False) + extra_state = ctx.setdefault('extra_state', {}) + format_index = info_dict.get('format_index') extra_query = None extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') if extra_param_to_segment_url: @@ -123,12 +146,15 @@ class HlsFD(FragmentFD): media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} byte_range = {} + discontinuity_count = 0 frag_index = 0 ad_frag_next = False for line in s.splitlines(): line = line.strip() if line: if not line.startswith('#'): + if format_index and discontinuity_count != format_index: + continue if ad_frag_next: continue frag_index += 1 @@ -140,50 +166,49 @@ class HlsFD(FragmentFD): else compat_urlparse.urljoin(man_url, line)) if extra_query: frag_url = update_url_query(frag_url, extra_query) - count = 0 - headers = info_dict.get('http_headers', {}) - if byte_range: - headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) - while count <= fragment_retries: - try: - success, frag_content = self._download_fragment( - ctx, frag_url, info_dict, headers) - if not success: - return False - break - except compat_urllib_error.HTTPError as err: - # Unavailable (possibly temporary) fragments may be served. - # First we try to retry then either skip or abort. - # See https://github.com/ytdl-org/youtube-dl/issues/10165, - # https://github.com/ytdl-org/youtube-dl/issues/10448). - count += 1 - if count <= fragment_retries: - self.report_retry_fragment(err, frag_index, count, fragment_retries) - if count > fragment_retries: - if skip_unavailable_fragments: - i += 1 - media_sequence += 1 - self.report_skip_fragment(frag_index) - continue + + fragments.append({ + 'frag_index': frag_index, + 'url': frag_url, + 'decrypt_info': decrypt_info, + 'byte_range': byte_range, + 'media_sequence': media_sequence, + }) + media_sequence += 1 + + elif line.startswith('#EXT-X-MAP'): + if format_index and discontinuity_count != format_index: + continue + if frag_index > 0: self.report_error( - 'giving up after %s fragment retries' % fragment_retries) + 'Initialization fragment found after media fragments, unable to download') return False - if decrypt_info['METHOD'] == 'AES-128': - iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) - decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( - self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read() - # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block - # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded, - # not what it decrypts to. - if not test: - frag_content = AES.new( - decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) - self._append_fragment(ctx, frag_content) - # We only download the first fragment during the test - if test: - break - i += 1 + frag_index += 1 + map_info = parse_m3u8_attributes(line[11:]) + frag_url = ( + map_info.get('URI') + if re.match(r'^https?://', map_info.get('URI')) + else compat_urlparse.urljoin(man_url, map_info.get('URI'))) + if extra_query: + frag_url = update_url_query(frag_url, extra_query) + + fragments.append({ + 'frag_index': frag_index, + 'url': frag_url, + 'decrypt_info': decrypt_info, + 'byte_range': byte_range, + 'media_sequence': media_sequence + }) media_sequence += 1 + + if map_info.get('BYTERANGE'): + splitted_byte_range = map_info.get('BYTERANGE').split('@') + sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] + byte_range = { + 'start': sub_range_start, + 'end': sub_range_start + int(splitted_byte_range[0]), + } + elif line.startswith('#EXT-X-KEY'): decrypt_url = decrypt_info.get('URI') decrypt_info = parse_m3u8_attributes(line[11:]) @@ -197,6 +222,7 @@ class HlsFD(FragmentFD): decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) if decrypt_url != decrypt_info['URI']: decrypt_info['KEY'] = None + elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): media_sequence = int(line[22:]) elif line.startswith('#EXT-X-BYTERANGE'): @@ -210,7 +236,114 @@ class HlsFD(FragmentFD): ad_frag_next = True elif is_ad_fragment_end(line): ad_frag_next = False + elif line.startswith('#EXT-X-DISCONTINUITY'): + discontinuity_count += 1 + i += 1 + + # We only download the first fragment during the test + if self.params.get('test', False): + fragments = [fragments[0] if fragments else None] + + if real_downloader: + info_dict['fragments'] = fragments + fd = real_downloader(self.ydl, self.params) + # TODO: Make progress updates work without hooking twice + # for ph in self._progress_hooks: + # fd.add_progress_hook(ph) + return fd.real_download(filename, info_dict) + + if is_webvtt: + def pack_fragment(frag_content, frag_index): + output = io.StringIO() + adjust = 0 + overflow = False + mpegts_last = None + for block in webvtt.parse_fragment(frag_content): + if isinstance(block, webvtt.CueBlock): + extra_state['webvtt_mpegts_last'] = mpegts_last + if overflow: + extra_state['webvtt_mpegts_adjust'] += 1 + overflow = False + block.start += adjust + block.end += adjust + + dedup_window = extra_state.setdefault('webvtt_dedup_window', []) + + ready = [] + + i = 0 + is_new = True + while i < len(dedup_window): + wcue = dedup_window[i] + wblock = webvtt.CueBlock.from_json(wcue) + i += 1 + if wblock.hinges(block): + wcue['end'] = block.end + is_new = False + continue + if wblock == block: + is_new = False + continue + if wblock.end > block.start: + continue + ready.append(wblock) + i -= 1 + del dedup_window[i] + + if is_new: + dedup_window.append(block.as_json) + for block in ready: + block.write_into(output) + + # we only emit cues once they fall out of the duplicate window + continue + elif isinstance(block, webvtt.Magic): + # take care of MPEG PES timestamp overflow + if block.mpegts is None: + block.mpegts = 0 + extra_state.setdefault('webvtt_mpegts_adjust', 0) + block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33 + if block.mpegts < extra_state.get('webvtt_mpegts_last', 0): + overflow = True + block.mpegts += 1 << 33 + mpegts_last = block.mpegts + + if frag_index == 1: + extra_state['webvtt_mpegts'] = block.mpegts or 0 + extra_state['webvtt_local'] = block.local or 0 + # XXX: block.local = block.mpegts = None ? + else: + if block.mpegts is not None and block.local is not None: + adjust = ( + (block.mpegts - extra_state.get('webvtt_mpegts', 0)) + - (block.local - extra_state.get('webvtt_local', 0)) + ) + continue + elif isinstance(block, webvtt.HeaderBlock): + if frag_index != 1: + # XXX: this should probably be silent as well + # or verify that all segments contain the same data + self.report_warning(bug_reports_message( + 'Discarding a %s block found in the middle of the stream; ' + 'if the subtitles display incorrectly,' + % (type(block).__name__))) + continue + block.write_into(output) + + return output.getvalue().encode('utf-8') + + def fin_fragments(): + dedup_window = extra_state.get('webvtt_dedup_window') + if not dedup_window: + return b'' + + output = io.StringIO() + for cue in dedup_window: + webvtt.CueBlock.from_json(cue).write_into(output) - self._finish_frag_download(ctx) + return output.getvalue().encode('utf-8') - return True + self.download_and_append_fragments( + ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments) + else: + return self.download_and_append_fragments(ctx, fragments, info_dict) diff --git a/hypervideo_dl/downloader/http.py b/hypervideo_dl/downloader/http.py index d8ac41d..2e95bb9 100644 --- a/hypervideo_dl/downloader/http.py +++ b/hypervideo_dl/downloader/http.py @@ -18,6 +18,7 @@ from ..utils import ( int_or_none, sanitize_open, sanitized_Request, + ThrottledDownload, write_xattr, XAttrMetadataError, XAttrUnavailableError, @@ -27,6 +28,7 @@ from ..utils import ( class HttpFD(FileDownloader): def real_download(self, filename, info_dict): url = info_dict['url'] + request_data = info_dict.get('request_data', None) class DownloadContext(dict): __getattr__ = dict.get @@ -46,8 +48,9 @@ class HttpFD(FileDownloader): is_test = self.params.get('test', False) chunk_size = self._TEST_FILE_SIZE if is_test else ( - info_dict.get('downloader_options', {}).get('http_chunk_size') - or self.params.get('http_chunk_size') or 0) + self.params.get('http_chunk_size') + or info_dict.get('downloader_options', {}).get('http_chunk_size') + or 0) ctx.open_mode = 'wb' ctx.resume_len = 0 @@ -55,6 +58,7 @@ class HttpFD(FileDownloader): ctx.block_size = self.params.get('buffersize', 1024) ctx.start_time = time.time() ctx.chunk_size = None + throttle_start = None if self.params.get('continuedl', True): # Establish possible resume length @@ -101,7 +105,7 @@ class HttpFD(FileDownloader): range_end = ctx.data_len - 1 has_range = range_start is not None ctx.has_range = has_range - request = sanitized_Request(url, None, headers) + request = sanitized_Request(url, request_data, headers) if has_range: set_range(request, range_start, range_end) # Establish connection @@ -152,7 +156,7 @@ class HttpFD(FileDownloader): try: # Open the connection again without the range header ctx.data = self.ydl.urlopen( - sanitized_Request(url, None, headers)) + sanitized_Request(url, request_data, headers)) content_length = ctx.data.info()['Content-Length'] except (compat_urllib_error.HTTPError, ) as err: if err.code < 500 or err.code >= 600: @@ -175,7 +179,7 @@ class HttpFD(FileDownloader): 'status': 'finished', 'downloaded_bytes': ctx.resume_len, 'total_bytes': ctx.resume_len, - }) + }, info_dict) raise SucceedDownload() else: # The length does not match, we start the download over @@ -194,6 +198,7 @@ class HttpFD(FileDownloader): raise RetryDownload(err) def download(): + nonlocal throttle_start data_len = ctx.data.info().get('Content-length', None) # Range HTTP header may be ignored/unsupported by a webserver @@ -235,7 +240,7 @@ class HttpFD(FileDownloader): while True: try: # Download and write - data_block = ctx.data.read(block_size if data_len is None else min(block_size, data_len - byte_counter)) + data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) # socket.timeout is a subclass of socket.error but may not have # errno set except socket.timeout as e: @@ -307,11 +312,24 @@ class HttpFD(FileDownloader): 'eta': eta, 'speed': speed, 'elapsed': now - ctx.start_time, - }) + 'ctx_id': info_dict.get('ctx_id'), + }, info_dict) if data_len is not None and byte_counter == data_len: break + if speed and speed < (self.params.get('throttledratelimit') or 0): + # The speed must stay below the limit for 3 seconds + # This prevents raising error when the speed temporarily goes down + if throttle_start is None: + throttle_start = now + elif now - throttle_start > 3: + if ctx.stream is not None and ctx.tmpfilename != '-': + ctx.stream.close() + raise ThrottledDownload() + elif speed: + throttle_start = None + if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: ctx.resume_len = byte_counter # ctx.block_size = block_size @@ -342,7 +360,8 @@ class HttpFD(FileDownloader): 'filename': ctx.filename, 'status': 'finished', 'elapsed': time.time() - ctx.start_time, - }) + 'ctx_id': info_dict.get('ctx_id'), + }, info_dict) return True @@ -354,6 +373,8 @@ class HttpFD(FileDownloader): count += 1 if count <= retries: self.report_retry(e.source_error, count, retries) + else: + self.to_screen(f'[download] Got server HTTP error: {e.source_error}') continue except NextFragment: continue diff --git a/hypervideo_dl/downloader/ism.py b/hypervideo_dl/downloader/ism.py index 1ca666b..09516ab 100644 --- a/hypervideo_dl/downloader/ism.py +++ b/hypervideo_dl/downloader/ism.py @@ -48,7 +48,7 @@ def write_piff_header(stream, params): language = params.get('language', 'und') height = params.get('height', 0) width = params.get('width', 0) - is_audio = width == 0 and height == 0 + stream_type = params['stream_type'] creation_time = modification_time = int(time.time()) ftyp_payload = b'isml' # major brand @@ -77,7 +77,7 @@ def write_piff_header(stream, params): tkhd_payload += u32.pack(0) * 2 # reserved tkhd_payload += s16.pack(0) # layer tkhd_payload += s16.pack(0) # alternate group - tkhd_payload += s88.pack(1 if is_audio else 0) # volume + tkhd_payload += s88.pack(1 if stream_type == 'audio' else 0) # volume tkhd_payload += u16.pack(0) # reserved tkhd_payload += unity_matrix tkhd_payload += u1616.pack(width) @@ -93,19 +93,34 @@ def write_piff_header(stream, params): mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box hdlr_payload = u32.pack(0) # pre defined - hdlr_payload += b'soun' if is_audio else b'vide' # handler type - hdlr_payload += u32.pack(0) * 3 # reserved - hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name + if stream_type == 'audio': # handler type + hdlr_payload += b'soun' + hdlr_payload += u32.pack(0) * 3 # reserved + hdlr_payload += b'SoundHandler\0' # name + elif stream_type == 'video': + hdlr_payload += b'vide' + hdlr_payload += u32.pack(0) * 3 # reserved + hdlr_payload += b'VideoHandler\0' # name + elif stream_type == 'text': + hdlr_payload += b'subt' + hdlr_payload += u32.pack(0) * 3 # reserved + hdlr_payload += b'SubtitleHandler\0' # name + else: + assert False mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box - if is_audio: + if stream_type == 'audio': smhd_payload = s88.pack(0) # balance smhd_payload += u16.pack(0) # reserved media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header - else: + elif stream_type == 'video': vmhd_payload = u16.pack(0) # graphics mode vmhd_payload += u16.pack(0) * 3 # opcolor media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header + elif stream_type == 'text': + media_header_box = full_box(b'sthd', 0, 0, b'') # Subtitle Media Header + else: + assert False minf_payload = media_header_box dref_payload = u32.pack(1) # entry count @@ -117,7 +132,7 @@ def write_piff_header(stream, params): sample_entry_payload = u8.pack(0) * 6 # reserved sample_entry_payload += u16.pack(1) # data reference index - if is_audio: + if stream_type == 'audio': sample_entry_payload += u32.pack(0) * 2 # reserved sample_entry_payload += u16.pack(params.get('channels', 2)) sample_entry_payload += u16.pack(params.get('bits_per_sample', 16)) @@ -127,7 +142,7 @@ def write_piff_header(stream, params): if fourcc == 'AACL': sample_entry_box = box(b'mp4a', sample_entry_payload) - else: + elif stream_type == 'video': sample_entry_payload += u16.pack(0) # pre defined sample_entry_payload += u16.pack(0) # reserved sample_entry_payload += u32.pack(0) * 3 # pre defined @@ -155,6 +170,18 @@ def write_piff_header(stream, params): avcc_payload += pps sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry + else: + assert False + elif stream_type == 'text': + if fourcc == 'TTML': + sample_entry_payload += b'http://www.w3.org/ns/ttml\0' # namespace + sample_entry_payload += b'\0' # schema location + sample_entry_payload += b'\0' # auxilary mime types(??) + sample_entry_box = box(b'stpp', sample_entry_payload) + else: + assert False + else: + assert False stsd_payload += sample_entry_box stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box @@ -219,12 +246,15 @@ class IsmFD(FragmentFD): 'total_frags': len(segments), } - self._prepare_and_start_frag_download(ctx) + self._prepare_and_start_frag_download(ctx, info_dict) + + extra_state = ctx.setdefault('extra_state', { + 'ism_track_written': False, + }) fragment_retries = self.params.get('fragment_retries', 0) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - track_written = False frag_index = 0 for i, segment in enumerate(segments): frag_index += 1 @@ -236,11 +266,11 @@ class IsmFD(FragmentFD): success, frag_content = self._download_fragment(ctx, segment['url'], info_dict) if not success: return False - if not track_written: + if not extra_state['ism_track_written']: tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd']) info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0] write_piff_header(ctx['dest_stream'], info_dict['_download_params']) - track_written = True + extra_state['ism_track_written'] = True self._append_fragment(ctx, frag_content) break except compat_urllib_error.HTTPError as err: @@ -254,6 +284,6 @@ class IsmFD(FragmentFD): self.report_error('giving up after %s fragment retries' % fragment_retries) return False - self._finish_frag_download(ctx) + self._finish_frag_download(ctx, info_dict) return True diff --git a/hypervideo_dl/downloader/mhtml.py b/hypervideo_dl/downloader/mhtml.py new file mode 100644 index 0000000..f0f4dc6 --- /dev/null +++ b/hypervideo_dl/downloader/mhtml.py @@ -0,0 +1,202 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import io +import quopri +import re +import uuid + +from .fragment import FragmentFD +from ..utils import ( + escapeHTML, + formatSeconds, + srt_subtitles_timecode, + urljoin, +) +from ..version import __version__ as YT_DLP_VERSION + + +class MhtmlFD(FragmentFD): + FD_NAME = 'mhtml' + + _STYLESHEET = """\ +html, body { + margin: 0; + padding: 0; + height: 100vh; +} + +html { + overflow-y: scroll; + scroll-snap-type: y mandatory; +} + +body { + scroll-snap-type: y mandatory; + display: flex; + flex-flow: column; +} + +body > figure { + max-width: 100vw; + max-height: 100vh; + scroll-snap-align: center; +} + +body > figure > figcaption { + text-align: center; + height: 2.5em; +} + +body > figure > img { + display: block; + margin: auto; + max-width: 100%; + max-height: calc(100vh - 5em); +} +""" + _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET) + _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET) + + @staticmethod + def _escape_mime(s): + return '=?utf-8?Q?' + (b''.join( + bytes((b,)) if b >= 0x20 else b'=%02X' % b + for b in quopri.encodestring(s.encode('utf-8'), header=True) + )).decode('us-ascii') + '?=' + + def _gen_cid(self, i, fragment, frag_boundary): + return '%u.%s@hypervideo.github.io.invalid' % (i, frag_boundary) + + def _gen_stub(self, *, fragments, frag_boundary, title): + output = io.StringIO() + + output.write(( + '' + '' + '' + '' '' + '' '{title}' + '' '' + '' + ).format( + version=escapeHTML(YT_DLP_VERSION), + styles=self._STYLESHEET, + title=escapeHTML(title) + )) + + t0 = 0 + for i, frag in enumerate(fragments): + output.write('
') + try: + t1 = t0 + frag['duration'] + output.write(( + '
Slide #{num}: {t0} – {t1} (duration: {duration})
' + ).format( + num=i + 1, + t0=srt_subtitles_timecode(t0), + t1=srt_subtitles_timecode(t1), + duration=formatSeconds(frag['duration'], msec=True) + )) + except (KeyError, ValueError, TypeError): + t1 = None + output.write(( + '
Slide #{num}
' + ).format(num=i + 1)) + output.write(''.format( + cid=self._gen_cid(i, frag, frag_boundary))) + output.write('
') + t0 = t1 + + return output.getvalue() + + def real_download(self, filename, info_dict): + fragment_base_url = info_dict.get('fragment_base_url') + fragments = info_dict['fragments'][:1] if self.params.get( + 'test', False) else info_dict['fragments'] + title = info_dict['title'] + origin = info_dict['webpage_url'] + + ctx = { + 'filename': filename, + 'total_frags': len(fragments), + } + + self._prepare_and_start_frag_download(ctx, info_dict) + + extra_state = ctx.setdefault('extra_state', { + 'header_written': False, + 'mime_boundary': str(uuid.uuid4()).replace('-', ''), + }) + + frag_boundary = extra_state['mime_boundary'] + + if not extra_state['header_written']: + stub = self._gen_stub( + fragments=fragments, + frag_boundary=frag_boundary, + title=title + ) + + ctx['dest_stream'].write(( + 'MIME-Version: 1.0\r\n' + 'From: \r\n' + 'To: \r\n' + 'Subject: {title}\r\n' + 'Content-type: multipart/related; ' + '' 'boundary="{boundary}"; ' + '' 'type="text/html"\r\n' + 'X.hypervideo.Origin: {origin}\r\n' + '\r\n' + '--{boundary}\r\n' + 'Content-Type: text/html; charset=utf-8\r\n' + 'Content-Length: {length}\r\n' + '\r\n' + '{stub}\r\n' + ).format( + origin=origin, + boundary=frag_boundary, + length=len(stub), + title=self._escape_mime(title), + stub=stub + ).encode('utf-8')) + extra_state['header_written'] = True + + for i, fragment in enumerate(fragments): + if (i + 1) <= ctx['fragment_index']: + continue + + fragment_url = urljoin(fragment_base_url, fragment['path']) + success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) + if not success: + continue + + mime_type = b'image/jpeg' + if frag_content.startswith(b'\x89PNG\r\n\x1a\n'): + mime_type = b'image/png' + if frag_content.startswith((b'GIF87a', b'GIF89a')): + mime_type = b'image/gif' + if frag_content.startswith(b'RIFF') and frag_content[8:12] == 'WEBP': + mime_type = b'image/webp' + + frag_header = io.BytesIO() + frag_header.write( + b'--%b\r\n' % frag_boundary.encode('us-ascii')) + frag_header.write( + b'Content-ID: <%b>\r\n' % self._gen_cid(i, fragment, frag_boundary).encode('us-ascii')) + frag_header.write( + b'Content-type: %b\r\n' % mime_type) + frag_header.write( + b'Content-length: %u\r\n' % len(frag_content)) + frag_header.write( + b'Content-location: %b\r\n' % fragment_url.encode('us-ascii')) + frag_header.write( + b'X.hypervideo.Duration: %f\r\n' % fragment['duration']) + frag_header.write(b'\r\n') + self._append_fragment( + ctx, frag_header.getvalue() + frag_content + b'\r\n') + + ctx['dest_stream'].write( + b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii')) + self._finish_frag_download(ctx, info_dict) + return True diff --git a/hypervideo_dl/downloader/niconico.py b/hypervideo_dl/downloader/niconico.py new file mode 100644 index 0000000..521dfec --- /dev/null +++ b/hypervideo_dl/downloader/niconico.py @@ -0,0 +1,57 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import threading + +from .common import FileDownloader +from ..downloader import get_suitable_downloader +from ..extractor.niconico import NiconicoIE +from ..utils import sanitized_Request + + +class NiconicoDmcFD(FileDownloader): + """ Downloading niconico douga from DMC with heartbeat """ + + FD_NAME = 'niconico_dmc' + + def real_download(self, filename, info_dict): + self.to_screen('[%s] Downloading from DMC' % self.FD_NAME) + + ie = NiconicoIE(self.ydl) + info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict) + + fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params) + + success = download_complete = False + timer = [None] + heartbeat_lock = threading.Lock() + heartbeat_url = heartbeat_info_dict['url'] + heartbeat_data = heartbeat_info_dict['data'].encode() + heartbeat_interval = heartbeat_info_dict.get('interval', 30) + + request = sanitized_Request(heartbeat_url, heartbeat_data) + + def heartbeat(): + try: + self.ydl.urlopen(request).read() + except Exception: + self.to_screen('[%s] Heartbeat failed' % self.FD_NAME) + + with heartbeat_lock: + if not download_complete: + timer[0] = threading.Timer(heartbeat_interval, heartbeat) + timer[0].start() + + heartbeat_info_dict['ping']() + self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval)) + try: + heartbeat() + if type(fd).__name__ == 'HlsFD': + info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0]) + success = fd.real_download(filename, info_dict) + finally: + if heartbeat_lock: + with heartbeat_lock: + timer[0].cancel() + download_complete = True + return success diff --git a/hypervideo_dl/downloader/rtmp.py b/hypervideo_dl/downloader/rtmp.py index fbb7f51..6dca647 100644 --- a/hypervideo_dl/downloader/rtmp.py +++ b/hypervideo_dl/downloader/rtmp.py @@ -66,7 +66,7 @@ class RtmpFD(FileDownloader): 'eta': eta, 'elapsed': time_now - start, 'speed': speed, - }) + }, info_dict) cursor_in_new_line = False else: # no percent for live streams @@ -82,18 +82,20 @@ class RtmpFD(FileDownloader): 'status': 'downloading', 'elapsed': time_now - start, 'speed': speed, - }) + }, info_dict) cursor_in_new_line = False elif self.params.get('verbose', False): if not cursor_in_new_line: self.to_screen('') cursor_in_new_line = True self.to_screen('[rtmpdump] ' + line) - finally: + if not cursor_in_new_line: + self.to_screen('') + return proc.wait() + except BaseException: # Including KeyboardInterrupt + proc.kill() proc.wait() - if not cursor_in_new_line: - self.to_screen('') - return proc.returncode + raise url = info_dict['url'] player_url = info_dict.get('player_url') @@ -115,7 +117,7 @@ class RtmpFD(FileDownloader): # Check for rtmpdump first if not check_executable('rtmpdump', ['-h']): - self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.') + self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install') return False # Download using rtmpdump. rtmpdump returns exit code 2 when @@ -206,7 +208,7 @@ class RtmpFD(FileDownloader): 'filename': filename, 'status': 'finished', 'elapsed': time.time() - started, - }) + }, info_dict) return True else: self.to_stderr('\n') diff --git a/hypervideo_dl/downloader/rtsp.py b/hypervideo_dl/downloader/rtsp.py index 939358b..7815d59 100644 --- a/hypervideo_dl/downloader/rtsp.py +++ b/hypervideo_dl/downloader/rtsp.py @@ -24,7 +24,7 @@ class RtspFD(FileDownloader): args = [ 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url] else: - self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.') + self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install one') return False self._debug_cmd(args) @@ -39,7 +39,7 @@ class RtspFD(FileDownloader): 'total_bytes': fsize, 'filename': filename, 'status': 'finished', - }) + }, info_dict) return True else: self.to_stderr('\n') diff --git a/hypervideo_dl/downloader/websocket.py b/hypervideo_dl/downloader/websocket.py new file mode 100644 index 0000000..0882220 --- /dev/null +++ b/hypervideo_dl/downloader/websocket.py @@ -0,0 +1,59 @@ +import os +import signal +import asyncio +import threading + +try: + import websockets + has_websockets = True +except ImportError: + has_websockets = False + +from .common import FileDownloader +from .external import FFmpegFD + + +class FFmpegSinkFD(FileDownloader): + """ A sink to ffmpeg for downloading fragments in any form """ + + def real_download(self, filename, info_dict): + info_copy = info_dict.copy() + info_copy['url'] = '-' + + async def call_conn(proc, stdin): + try: + await self.real_connection(stdin, info_dict) + except (BrokenPipeError, OSError): + pass + finally: + try: + stdin.flush() + stdin.close() + except OSError: + pass + os.kill(os.getpid(), signal.SIGINT) + + class FFmpegStdinFD(FFmpegFD): + @classmethod + def get_basename(cls): + return FFmpegFD.get_basename() + + def on_process_started(self, proc, stdin): + thread = threading.Thread(target=asyncio.run, daemon=True, args=(call_conn(proc, stdin), )) + thread.start() + + return FFmpegStdinFD(self.ydl, self.params or {}).download(filename, info_copy) + + async def real_connection(self, sink, info_dict): + """ Override this in subclasses """ + raise NotImplementedError('This method must be implemented by subclasses') + + +class WebSocketFragmentFD(FFmpegSinkFD): + async def real_connection(self, sink, info_dict): + async with websockets.connect(info_dict['url'], extra_headers=info_dict.get('http_headers', {})) as ws: + while True: + recv = await ws.recv() + if isinstance(recv, str): + recv = recv.encode('utf8') + sink.write(recv) diff --git a/hypervideo_dl/downloader/youtube_live_chat.py b/hypervideo_dl/downloader/youtube_live_chat.py new file mode 100644 index 0000000..ef4205e --- /dev/null +++ b/hypervideo_dl/downloader/youtube_live_chat.py @@ -0,0 +1,236 @@ +from __future__ import division, unicode_literals + +import json +import time + +from .fragment import FragmentFD +from ..compat import compat_urllib_error +from ..utils import ( + try_get, + dict_get, + int_or_none, + RegexNotFoundError, +) +from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE + + +class YoutubeLiveChatFD(FragmentFD): + """ Downloads YouTube live chats fragment by fragment """ + + FD_NAME = 'youtube_live_chat' + + def real_download(self, filename, info_dict): + video_id = info_dict['video_id'] + self.to_screen('[%s] Downloading live chat' % self.FD_NAME) + + fragment_retries = self.params.get('fragment_retries', 0) + test = self.params.get('test', False) + + ctx = { + 'filename': filename, + 'live': True, + 'total_frags': None, + } + + ie = YT_BaseIE(self.ydl) + + start_time = int(time.time() * 1000) + + def dl_fragment(url, data=None, headers=None): + http_headers = info_dict.get('http_headers', {}) + if headers: + http_headers = http_headers.copy() + http_headers.update(headers) + return self._download_fragment(ctx, url, info_dict, http_headers, data) + + def parse_actions_replay(live_chat_continuation): + offset = continuation_id = click_tracking_params = None + processed_fragment = bytearray() + for action in live_chat_continuation.get('actions', []): + if 'replayChatItemAction' in action: + replay_chat_item_action = action['replayChatItemAction'] + offset = int(replay_chat_item_action['videoOffsetTimeMsec']) + processed_fragment.extend( + json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') + if offset is not None: + continuation = try_get( + live_chat_continuation, + lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict) + if continuation: + continuation_id = continuation.get('continuation') + click_tracking_params = continuation.get('clickTrackingParams') + self._append_fragment(ctx, processed_fragment) + return continuation_id, offset, click_tracking_params + + def try_refresh_replay_beginning(live_chat_continuation): + # choose the second option that contains the unfiltered live chat replay + refresh_continuation = try_get( + live_chat_continuation, + lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict) + if refresh_continuation: + # no data yet but required to call _append_fragment + self._append_fragment(ctx, b'') + refresh_continuation_id = refresh_continuation.get('continuation') + offset = 0 + click_tracking_params = refresh_continuation.get('trackingParams') + return refresh_continuation_id, offset, click_tracking_params + return parse_actions_replay(live_chat_continuation) + + live_offset = 0 + + def parse_actions_live(live_chat_continuation): + nonlocal live_offset + continuation_id = click_tracking_params = None + processed_fragment = bytearray() + for action in live_chat_continuation.get('actions', []): + timestamp = self.parse_live_timestamp(action) + if timestamp is not None: + live_offset = timestamp - start_time + # compatibility with replay format + pseudo_action = { + 'replayChatItemAction': {'actions': [action]}, + 'videoOffsetTimeMsec': str(live_offset), + 'isLive': True, + } + processed_fragment.extend( + json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n') + continuation_data_getters = [ + lambda x: x['continuations'][0]['invalidationContinuationData'], + lambda x: x['continuations'][0]['timedContinuationData'], + ] + continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict) + if continuation_data: + continuation_id = continuation_data.get('continuation') + click_tracking_params = continuation_data.get('clickTrackingParams') + timeout_ms = int_or_none(continuation_data.get('timeoutMs')) + if timeout_ms is not None: + time.sleep(timeout_ms / 1000) + self._append_fragment(ctx, processed_fragment) + return continuation_id, live_offset, click_tracking_params + + def download_and_parse_fragment(url, frag_index, request_data=None, headers=None): + count = 0 + while count <= fragment_retries: + try: + success, raw_fragment = dl_fragment(url, request_data, headers) + if not success: + return False, None, None, None + try: + data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) + except RegexNotFoundError: + data = None + if not data: + data = json.loads(raw_fragment) + live_chat_continuation = try_get( + data, + lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} + if info_dict['protocol'] == 'youtube_live_chat_replay': + if frag_index == 1: + continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation) + else: + continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation) + elif info_dict['protocol'] == 'youtube_live_chat': + continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation) + return True, continuation_id, offset, click_tracking_params + except compat_urllib_error.HTTPError as err: + count += 1 + if count <= fragment_retries: + self.report_retry_fragment(err, frag_index, count, fragment_retries) + if count > fragment_retries: + self.report_error('giving up after %s fragment retries' % fragment_retries) + return False, None, None, None + + self._prepare_and_start_frag_download(ctx, info_dict) + + success, raw_fragment = dl_fragment(info_dict['url']) + if not success: + return False + try: + data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) + except RegexNotFoundError: + return False + continuation_id = try_get( + data, + lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']) + # no data yet but required to call _append_fragment + self._append_fragment(ctx, b'') + + ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace')) + + if not ytcfg: + return False + api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY']) + innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT']) + if not api_key or not innertube_context: + return False + visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str) + if info_dict['protocol'] == 'youtube_live_chat_replay': + url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key + chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id + elif info_dict['protocol'] == 'youtube_live_chat': + url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key + chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id + + frag_index = offset = 0 + click_tracking_params = None + while continuation_id is not None: + frag_index += 1 + request_data = { + 'context': innertube_context, + 'continuation': continuation_id, + } + if frag_index > 1: + request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))} + if click_tracking_params: + request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params} + headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data) + headers.update({'content-type': 'application/json'}) + fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n' + success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( + url, frag_index, fragment_request_data, headers) + else: + success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( + chat_page_url, frag_index) + if not success: + return False + if test: + break + + self._finish_frag_download(ctx, info_dict) + return True + + @staticmethod + def parse_live_timestamp(action): + action_content = dict_get( + action, + ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand']) + if not isinstance(action_content, dict): + return None + item = dict_get(action_content, ['item', 'bannerRenderer']) + if not isinstance(item, dict): + return None + renderer = dict_get(item, [ + # text + 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', + 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', + # ticker + 'liveChatTickerPaidMessageItemRenderer', + 'liveChatTickerSponsorItemRenderer', + # banner + 'liveChatBannerRenderer', + ]) + if not isinstance(renderer, dict): + return None + parent_item_getters = [ + lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'], + lambda x: x['contents'], + ] + parent_item = try_get(renderer, parent_item_getters, dict) + if parent_item: + renderer = dict_get(parent_item, [ + 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', + 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', + ]) + if not isinstance(renderer, dict): + return None + return int_or_none(renderer.get('timestampUsec'), 1000) diff --git a/hypervideo_dl/extractor/__init__.py b/hypervideo_dl/extractor/__init__.py index 18d8dbc..198c4ae 100644 --- a/hypervideo_dl/extractor/__init__.py +++ b/hypervideo_dl/extractor/__init__.py @@ -1,13 +1,17 @@ from __future__ import unicode_literals +from ..utils import load_plugins + try: from .lazy_extractors import * from .lazy_extractors import _ALL_CLASSES _LAZY_LOADER = True + _PLUGIN_CLASSES = {} except ImportError: _LAZY_LOADER = False - from .extractors import * +if not _LAZY_LOADER: + from .extractors import * _ALL_CLASSES = [ klass for name, klass in globals().items() @@ -15,6 +19,9 @@ except ImportError: ] _ALL_CLASSES.append(GenericIE) + _PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals()) + _ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES + def gen_extractor_classes(): """ Return a list of supported extractors. diff --git a/hypervideo_dl/extractor/abc.py b/hypervideo_dl/extractor/abc.py index 6637f4f..3e20216 100644 --- a/hypervideo_dl/extractor/abc.py +++ b/hypervideo_dl/extractor/abc.py @@ -12,6 +12,7 @@ from ..utils import ( js_to_json, int_or_none, parse_iso8601, + str_or_none, try_get, unescapeHTML, update_url_query, @@ -20,7 +21,7 @@ from ..utils import ( class ABCIE(InfoExtractor): IE_NAME = 'abc.net.au' - _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/news/(?:[^/]+/){1,2}(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/(?:news|btn)/(?:[^/]+/){1,4}(?P\d{5,})' _TESTS = [{ 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', @@ -34,7 +35,7 @@ class ABCIE(InfoExtractor): 'skip': 'this video has expired', }, { 'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326', - 'md5': 'db2a5369238b51f9811ad815b69dc086', + 'md5': '4ebd61bdc82d9a8b722f64f1f4b4d121', 'info_dict': { 'id': 'NvqvPeNZsHU', 'ext': 'mp4', @@ -58,39 +59,102 @@ class ABCIE(InfoExtractor): }, { 'url': 'http://www.abc.net.au/news/2015-10-19/6866214', 'only_matching': True, + }, { + 'url': 'https://www.abc.net.au/btn/classroom/wwi-centenary/10527914', + 'info_dict': { + 'id': '10527914', + 'ext': 'mp4', + 'title': 'WWI Centenary', + 'description': 'md5:c2379ec0ca84072e86b446e536954546', + } + }, { + 'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074', + 'info_dict': { + 'id': '12342074', + 'ext': 'mp4', + 'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia', + 'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f', + } + }, { + 'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476', + 'info_dict': { + 'id': 'tDL8Ld4dK_8', + 'ext': 'mp4', + 'title': 'Fortnite Banned From Apple and Google App Stores', + 'description': 'md5:a6df3f36ce8f816b74af4bd6462f5651', + 'upload_date': '20200813', + 'uploader': 'Behind the News', + 'uploader_id': 'behindthenews', + } }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - mobj = re.search( - r'inline(?PVideo|Audio|YouTube)Data\.push\((?P[^)]+)\);', - webpage) + mobj = re.search(r'[^"]+)"\s+data-duration="\d+"\s+title="Download audio directly">', webpage) + if mobj: + urls_info = mobj.groupdict() + youtube = False + video = False + else: + mobj = re.search(r'External Link:', + webpage) + if mobj is None: + mobj = re.search(r'