diff options
| author | pukkandan <pukkandan.ytdlp@gmail.com> | 2023-06-21 09:21:20 +0530 | 
|---|---|---|
| committer | pukkandan <pukkandan.ytdlp@gmail.com> | 2023-06-21 09:21:20 +0530 | 
| commit | ad54c9130e793ce433bf9da334fa80df9f3aee58 (patch) | |
| tree | 859a37aa6647ac90e619ba85aa5db58a0c733ae9 | |
| parent | db3ad8a67661d7b234a6954d9c6a4a9b1749f5eb (diff) | |
| download | hypervideo-pre-ad54c9130e793ce433bf9da334fa80df9f3aee58.tar.lz hypervideo-pre-ad54c9130e793ce433bf9da334fa80df9f3aee58.tar.xz hypervideo-pre-ad54c9130e793ce433bf9da334fa80df9f3aee58.zip | |
[cleanup] Misc
Closes #6288, Closes #7197, Closes #7265, Closes #7353, Closes #5773
Authored by: mikf, freezboltz, pukkandan
| -rw-r--r-- | .github/workflows/potential-duplicates.yml | 2 | ||||
| -rw-r--r-- | README.md | 28 | ||||
| -rw-r--r-- | devscripts/changelog_override.json | 27 | ||||
| -rw-r--r-- | devscripts/cli_to_api.py | 4 | ||||
| -rw-r--r-- | devscripts/make_changelog.py | 14 | ||||
| -rw-r--r-- | test/test_YoutubeDL.py | 16 | ||||
| -rw-r--r-- | test/test_jsinterp.py | 67 | ||||
| -rw-r--r-- | test/test_youtube_signature.py | 2 | ||||
| -rw-r--r-- | yt_dlp/YoutubeDL.py | 20 | ||||
| -rw-r--r-- | yt_dlp/cookies.py | 4 | ||||
| -rw-r--r-- | yt_dlp/downloader/common.py | 1 | ||||
| -rw-r--r-- | yt_dlp/downloader/niconico.py | 4 | ||||
| -rw-r--r-- | yt_dlp/extractor/ciscowebex.py | 4 | ||||
| -rw-r--r-- | yt_dlp/extractor/common.py | 3 | ||||
| -rw-r--r--[-rwxr-xr-x] | yt_dlp/extractor/dumpert.py | 0 | ||||
| -rw-r--r--[-rwxr-xr-x] | yt_dlp/extractor/globalplayer.py | 0 | ||||
| -rw-r--r-- | yt_dlp/extractor/odnoklassniki.py | 6 | ||||
| -rw-r--r-- | yt_dlp/extractor/tvp.py | 4 | ||||
| -rw-r--r-- | yt_dlp/extractor/vidio.py | 2 | ||||
| -rw-r--r-- | yt_dlp/extractor/youtube.py | 10 | ||||
| -rw-r--r-- | yt_dlp/options.py | 6 | ||||
| -rw-r--r-- | yt_dlp/utils/_legacy.py | 6 | ||||
| -rw-r--r-- | yt_dlp/utils/_utils.py | 10 | 
23 files changed, 138 insertions, 102 deletions
| diff --git a/.github/workflows/potential-duplicates.yml b/.github/workflows/potential-duplicates.yml index 1521ae20c..cfc583186 100644 --- a/.github/workflows/potential-duplicates.yml +++ b/.github/workflows/potential-duplicates.yml @@ -12,7 +12,7 @@ jobs:            GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}            label: potential-duplicate            state: all -          threshold: 0.7 +          threshold: 0.3            comment: |              This issue is potentially a duplicate of one of the following issues:              {{#issues}} @@ -152,7 +152,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu  * The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/yt_dlp/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date.  * If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this  * Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead -* Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this +* Some internal metadata such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this  * When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this  * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi`  * yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior @@ -251,7 +251,7 @@ gpg --verify SHA2-512SUMS.sig SHA2-512SUMS  ```  <!-- MANPAGE: END EXCLUDED SECTION --> -**Note**: The manpages, shell completion files etc. are available inside the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) +**Note**: The manpages, shell completion (autocomplete) files etc. are available inside the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)  ## DEPENDENCIES  Python versions 3.7+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. @@ -699,9 +699,8 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git                                      --write-description etc. (default)      --no-write-playlist-metafiles   Do not write playlist metadata when using                                      --write-info-json, --write-description etc. -    --clean-info-json               Remove some private fields such as filenames -                                    from the infojson. Note that it could still -                                    contain some personal information (default) +    --clean-info-json               Remove some internal metadata such as +                                    filenames from the infojson (default)      --no-clean-info-json            Write all fields to the infojson      --write-comments                Retrieve video comments to be placed in the                                      infojson. The comments are fetched even @@ -1041,13 +1040,10 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git                                      that of --use-postprocessor (default:                                      after_move). Same syntax as the output                                      template can be used to pass any field as -                                    arguments to the command. After download, an -                                    additional field "filepath" that contains -                                    the final path of the downloaded file is -                                    also available, and if no fields are passed, -                                    %(filepath,_filename|)q is appended to the -                                    end of the command. This option can be used -                                    multiple times +                                    arguments to the command. If no fields are +                                    passed, %(filepath,_filename|)q is appended +                                    to the end of the command. This option can +                                    be used multiple times      --no-exec                       Remove any previously defined --exec      --convert-subs FORMAT           Convert the subtitles to another format                                      (currently supported: ass, lrc, srt, vtt) @@ -1225,8 +1221,7 @@ To activate authentication with the `.netrc` file you should pass `--netrc` to y  The default location of the .netrc file is `~` (see below). -As an alternative to using the `.netrc` file, which has the disadvantage of keeping your passwords in a plain text file, you can configure a custom shell command to provide the credentials for an extractor. This is done by providing the `--netrc-cmd` parameter, it shall output the credentials in the netrc format and return `0` on success, other values will be treated as an error. `{}` in the command will be replaced by the name of the extractor to make it possible to select the credentials for the right extractor. -To use braces in the command, they need to be escaped by doubling them. (see example bellow) +As an alternative to using the `.netrc` file, which has the disadvantage of keeping your passwords in a plain text file, you can configure a custom shell command to provide the credentials for an extractor. This is done by providing the `--netrc-cmd` parameter, it shall output the credentials in the netrc format and return `0` on success, other values will be treated as an error. `{}` in the command will be replaced by the name of the extractor to make it possible to select the credentials for the right extractor (To use literal braces, double them like `{{}}`).  E.g. To use an encrypted `.netrc` file stored as `.authinfo.gpg`  ``` @@ -1389,7 +1384,10 @@ Available only when used in `--print`:   - `subtitles_table` (table): The subtitle format table as printed by `--list-subs`   - `automatic_captions_table` (table): The automatic subtitle format table as printed by `--list-subs` + Available only after the video is downloaded (`post_process`/`after_move`): + - `filepath`: Actual path of downloaded video file +  Available only in `--sponsorblock-chapter-title`:   - `start_time` (numeric): Start time of the chapter in seconds @@ -1435,7 +1433,7 @@ $ yt-dlp -o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s" "https://www.y  $ yt-dlp -o "%(upload_date>%Y)s/%(title)s.%(ext)s" "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"  # Prefix playlist index with " - " separator, but only if it is available -$ yt-dlp -o '%(playlist_index|)s%(playlist_index& - |)s%(title)s.%(ext)s' BaW_jenozKc "https://www.youtube.com/user/TheLinuxFoundation/playlists" +$ yt-dlp -o "%(playlist_index&{} - |)s%(title)s.%(ext)s" BaW_jenozKc "https://www.youtube.com/user/TheLinuxFoundation/playlists"  # Download all playlists of YouTube channel/user keeping each playlist in separate directory:  $ yt-dlp -o "%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s" "https://www.youtube.com/user/TheLinuxFoundation/playlists" diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index e5c9d1aa2..73225bdb9 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -8,5 +8,32 @@          "action": "add",          "when": "776d1c3f0c9b00399896dd2e40e78e9a43218109",          "short": "[priority] **YouTube throttling fixes!**" +    }, +    { +        "action": "remove", +        "when": "2e023649ea4e11151545a34dc1360c114981a236" +    }, +    { +        "action": "add", +        "when": "01aba2519a0884ef17d5f85608dbd2a455577147", +        "short": "[priority] YouTube: Improved throttling and signature fixes" +    }, +    { +        "action": "change", +        "when": "c86e433c35fe5da6cb29f3539eef97497f84ed38", +        "short": "[extractor/niconico:series] Fix extraction (#6898)", +        "authors": ["sqrtNOT"] +    }, +    { +        "action": "change", +        "when": "69a40e4a7f6caa5662527ebd2f3c4e8aa02857a2", +        "short": "[extractor/youtube:music_search_url] Extract title (#7102)", +        "authors": ["kangalio"] +    }, +    { +        "action": "change", +        "when": "8417f26b8a819cd7ffcd4e000ca3e45033e670fb", +        "short": "Add option `--color` (#6904)", +        "authors": ["Grub4K"]      }  ] diff --git a/devscripts/cli_to_api.py b/devscripts/cli_to_api.py index b8b7cbcf1..2aa51eb6e 100644 --- a/devscripts/cli_to_api.py +++ b/devscripts/cli_to_api.py @@ -19,11 +19,11 @@ def parse_patched_options(opts):          'extract_flat': False,          'concat_playlist': 'never',      }) -    yt_dlp.options.__dict__['create_parser'] = lambda: patched_parser +    yt_dlp.options.create_parser = lambda: patched_parser      try:          return yt_dlp.parse_options(opts)      finally: -        yt_dlp.options.__dict__['create_parser'] = create_parser +        yt_dlp.options.create_parser = create_parser  default_opts = parse_patched_options([]).ydl_opts diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py index 1b7e251ee..2fcdc06d7 100644 --- a/devscripts/make_changelog.py +++ b/devscripts/make_changelog.py @@ -44,7 +44,7 @@ class CommitGroup(enum.Enum):          return {              name: group              for group, names in { -                cls.PRIORITY: {''}, +                cls.PRIORITY: {'priority'},                  cls.CORE: {                      'aes',                      'cache', @@ -68,7 +68,7 @@ class CommitGroup(enum.Enum):                      'misc',                      'test',                  }, -                cls.EXTRACTOR: {'extractor', 'extractors'}, +                cls.EXTRACTOR: {'extractor'},                  cls.DOWNLOADER: {'downloader'},                  cls.POSTPROCESSOR: {'postprocessor'},              }.items() @@ -323,7 +323,7 @@ class CommitRange:                  logger.debug(f'Ignored {when!r}, not in commits {self._start!r}')                  continue -            override_hash = override.get('hash') +            override_hash = override.get('hash') or when              if override['action'] == 'add':                  commit = Commit(override.get('hash'), override['short'], override.get('authors') or [])                  logger.info(f'ADD    {commit}') @@ -337,7 +337,7 @@ class CommitRange:              elif override['action'] == 'change':                  if override_hash not in self._commits:                      continue -                commit = Commit(override_hash, override['short'], override['authors']) +                commit = Commit(override_hash, override['short'], override.get('authors') or [])                  logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}')                  self._commits[commit.hash] = commit @@ -348,7 +348,7 @@ class CommitRange:          for commit in self:              upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)              if upstream_re: -                commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}' +                commit.short = f'[core/upstream] Merged with youtube-dl {upstream_re.group(1)}'              match = self.MESSAGE_RE.fullmatch(commit.short)              if not match: @@ -394,10 +394,10 @@ class CommitRange:              return CommitGroup.CORE, None, ()          prefix, _, details = prefix.partition('/') -        prefix = prefix.strip().lower() +        prefix = prefix.strip()          details = details.strip() -        group = CommitGroup.get(prefix) +        group = CommitGroup.get(prefix.lower())          if group is CommitGroup.PRIORITY:              prefix, _, details = details.partition('/') diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index ccc9e36f3..05dd3ed41 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -668,7 +668,7 @@ class TestYoutubeDL(unittest.TestCase):              for (name, got), expect in zip((('outtmpl', out), ('filename', fname)), expected):                  if callable(expect):                      self.assertTrue(expect(got), f'Wrong {name} from {tmpl}') -                else: +                elif expect is not None:                      self.assertEqual(got, expect, f'Wrong {name} from {tmpl}')          # Side-effects @@ -759,15 +759,17 @@ class TestYoutubeDL(unittest.TestCase):          test('a%(width|b)d', 'ab', outtmpl_na_placeholder='none')          FORMATS = self.outtmpl_info['formats'] -        sanitize = lambda x: x.replace(':', ':').replace('"', """).replace('\n', ' ')          # Custom type casting          test('%(formats.:.id)l', 'id 1, id 2, id 3')          test('%(formats.:.id)#l', ('id 1\nid 2\nid 3', 'id 1 id 2 id 3'))          test('%(ext)l', 'mp4')          test('%(formats.:.id) 18l', '  id 1, id 2, id 3') -        test('%(formats)j', (json.dumps(FORMATS), sanitize(json.dumps(FORMATS)))) -        test('%(formats)#j', (json.dumps(FORMATS, indent=4), sanitize(json.dumps(FORMATS, indent=4)))) +        test('%(formats)j', (json.dumps(FORMATS), None)) +        test('%(formats)#j', ( +            json.dumps(FORMATS, indent=4), +            json.dumps(FORMATS, indent=4).replace(':', ':').replace('"', """).replace('\n', ' ') +        ))          test('%(title5).3B', 'á')          test('%(title5)U', 'áéí 𝐀')          test('%(title5)#U', 'a\u0301e\u0301i\u0301 𝐀') @@ -792,8 +794,8 @@ class TestYoutubeDL(unittest.TestCase):          test('%(title|%)s %(title|%%)s', '% %%')          test('%(id+1-height+3)05d', '00158')          test('%(width+100)05d', 'NA') -        test('%(formats.0) 15s', ('% 15s' % FORMATS[0], '% 15s' % sanitize(str(FORMATS[0])))) -        test('%(formats.0)r', (repr(FORMATS[0]), sanitize(repr(FORMATS[0])))) +        test('%(formats.0) 15s', ('% 15s' % FORMATS[0], None)) +        test('%(formats.0)r', (repr(FORMATS[0]), None))          test('%(height.0)03d', '001')          test('%(-height.0)04d', '-001')          test('%(formats.-1.id)s', FORMATS[-1]['id']) @@ -805,7 +807,7 @@ class TestYoutubeDL(unittest.TestCase):          out = json.dumps([{'id': f['id'], 'height.:2': str(f['height'])[:2]}                            if 'height' in f else {'id': f['id']}                            for f in FORMATS]) -        test('%(formats.:.{id,height.:2})j', (out, sanitize(out))) +        test('%(formats.:.{id,height.:2})j', (out, None))          test('%(formats.:.{id,height}.id)l', ', '.join(f['id'] for f in FORMATS))          test('%(.{id,title})j', ('{"id": "1234"}', '{"id": "1234"}')) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index b01477e6f..e9682ddab 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -12,28 +12,38 @@ import math  from yt_dlp.jsinterp import JS_Undefined, JSInterpreter +class NaN: +    pass + +  class TestJSInterpreter(unittest.TestCase): -    def _test(self, code, ret, func='f', args=()): -        self.assertEqual(JSInterpreter(code).call_function(func, *args), ret) +    def _test(self, jsi_or_code, expected, func='f', args=()): +        if isinstance(jsi_or_code, str): +            jsi_or_code = JSInterpreter(jsi_or_code) +        got = jsi_or_code.call_function(func, *args) +        if expected is NaN: +            self.assertTrue(math.isnan(got), f'{got} is not NaN') +        else: +            self.assertEqual(got, expected)      def test_basic(self):          jsi = JSInterpreter('function f(){;}')          self.assertEqual(repr(jsi.extract_function('f')), 'F<f>') -        self.assertEqual(jsi.call_function('f'), None) +        self._test(jsi, None)          self._test('function f(){return 42;}', 42)          self._test('function f(){42}', None)          self._test('var f = function(){return 42;}', 42) -    def test_calc(self): -        self._test('function f(a){return 2*a+1;}', 7, args=[3]) -      def test_div(self):          jsi = JSInterpreter('function f(a, b){return a / b;}') -        self.assertTrue(math.isnan(jsi.call_function('f', 0, 0))) -        self.assertTrue(math.isnan(jsi.call_function('f', JS_Undefined, 1))) -        self.assertTrue(math.isinf(jsi.call_function('f', 2, 0))) -        self.assertEqual(jsi.call_function('f', 0, 3), 0) +        self._test(jsi, NaN, args=(0, 0)) +        self._test(jsi, NaN, args=(JS_Undefined, 1)) +        self._test(jsi, float('inf'), args=(2, 0)) +        self._test(jsi, 0, args=(0, 3)) + +    def test_calc(self): +        self._test('function f(a){return 2*a+1;}', 7, args=[3])      def test_empty_return(self):          self._test('function f(){return; y()}', None) @@ -102,16 +112,15 @@ class TestJSInterpreter(unittest.TestCase):          ''', [20, 20, 30, 40, 50])      def test_builtins(self): -        jsi = JSInterpreter('function f() { return NaN }') -        self.assertTrue(math.isnan(jsi.call_function('f'))) +        self._test('function f() { return NaN }', NaN)      def test_date(self):          self._test('function f() { return new Date("Wednesday 31 December 1969 18:01:26 MDT") - 0; }', 86000)          jsi = JSInterpreter('function f(dt) { return new Date(dt) - 0; }') -        self.assertEqual(jsi.call_function('f', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000) -        self.assertEqual(jsi.call_function('f', '12/31/1969 18:01:26 MDT'), 86000)  # m/d/y -        self.assertEqual(jsi.call_function('f', '1 January 1970 00:00:00 UTC'), 0) +        self._test(jsi, 86000, args=['Wednesday 31 December 1969 18:01:26 MDT']) +        self._test(jsi, 86000, args=['12/31/1969 18:01:26 MDT'])  # m/d/y +        self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC'])      def test_call(self):          jsi = JSInterpreter(''' @@ -119,8 +128,8 @@ class TestJSInterpreter(unittest.TestCase):              function y(a) { return x() + (a?a:0); }              function z() { return y(3); }          ''') -        self.assertEqual(jsi.call_function('z'), 5) -        self.assertEqual(jsi.call_function('y'), 2) +        self._test(jsi, 5, func='z') +        self._test(jsi, 2, func='y')      def test_if(self):          self._test(''' @@ -167,9 +176,9 @@ class TestJSInterpreter(unittest.TestCase):                  default:x=0;              } return x }          ''') -        self.assertEqual(jsi.call_function('f', 1), 7) -        self.assertEqual(jsi.call_function('f', 3), 6) -        self.assertEqual(jsi.call_function('f', 5), 0) +        self._test(jsi, 7, args=[1]) +        self._test(jsi, 6, args=[3]) +        self._test(jsi, 0, args=[5])      def test_switch_default(self):          jsi = JSInterpreter(''' @@ -182,9 +191,9 @@ class TestJSInterpreter(unittest.TestCase):                  case 1: x+=1;              } return x }          ''') -        self.assertEqual(jsi.call_function('f', 1), 2) -        self.assertEqual(jsi.call_function('f', 5), 11) -        self.assertEqual(jsi.call_function('f', 9), 14) +        self._test(jsi, 2, args=[1]) +        self._test(jsi, 11, args=[5]) +        self._test(jsi, 14, args=[9])      def test_try(self):          self._test('function f() { try{return 10} catch(e){return 5} }', 10) @@ -312,12 +321,12 @@ class TestJSInterpreter(unittest.TestCase):      def test_char_code_at(self):          jsi = JSInterpreter('function f(i){return "test".charCodeAt(i)}') -        self.assertEqual(jsi.call_function('f', 0), 116) -        self.assertEqual(jsi.call_function('f', 1), 101) -        self.assertEqual(jsi.call_function('f', 2), 115) -        self.assertEqual(jsi.call_function('f', 3), 116) -        self.assertEqual(jsi.call_function('f', 4), None) -        self.assertEqual(jsi.call_function('f', 'not_a_number'), 116) +        self._test(jsi, 116, args=[0]) +        self._test(jsi, 101, args=[1]) +        self._test(jsi, 115, args=[2]) +        self._test(jsi, 116, args=[3]) +        self._test(jsi, None, args=[4]) +        self._test(jsi, 116, args=['not_a_number'])      def test_bitwise_operators_overflow(self):          self._test('function f(){return -524999584 << 5}', 379882496) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 6759d2c46..811f70e68 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -67,7 +67,7 @@ _SIG_TESTS = [          'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',          '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',          'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0', -    ) +    ),  ]  _NSIG_TESTS = [ diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index e51bceef3..7a5e59323 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -259,7 +259,7 @@ class YoutubeDL:      consoletitle:      Display progress in console window's titlebar.      writedescription:  Write the video description to a .description file      writeinfojson:     Write the video description to a .info.json file -    clean_infojson:    Remove private fields from the infojson +    clean_infojson:    Remove internal metadata from the infojson      getcomments:       Extract video comments. This will not be written to disk                         unless writeinfojson is also given      writeannotations:  Write the video annotations to a .annotations.xml file @@ -1902,7 +1902,7 @@ class YoutubeDL:                  continue              entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip') -            if not lazy and 'playlist-index' in self.params.get('compat_opts', []): +            if not lazy and 'playlist-index' in self.params['compat_opts']:                  playlist_index = ie_result['requested_entries'][i]              entry_copy = collections.ChainMap(entry, { @@ -2959,8 +2959,7 @@ class YoutubeDL:          print_field('url', 'urls')          print_field('thumbnail', optional=True)          print_field('description', optional=True) -        if filename: -            print_field('filename') +        print_field('filename')          if self.params.get('forceduration') and info_copy.get('duration') is not None:              self.to_stdout(formatSeconds(info_copy['duration']))          print_field('format') @@ -3185,7 +3184,6 @@ class YoutubeDL:                          return                  if info_dict.get('requested_formats') is not None: -                    requested_formats = info_dict['requested_formats']                      old_ext = info_dict['ext']                      if self.params.get('merge_output_format') is None:                          if (info_dict['ext'] == 'webm' @@ -3212,6 +3210,7 @@ class YoutubeDL:                      full_filename = correct_ext(full_filename)                      temp_filename = correct_ext(temp_filename)                      dl_filename = existing_video_file(full_filename, temp_filename) +                      info_dict['__real_download'] = False                      merger = FFmpegMergerPP(self) @@ -3219,12 +3218,12 @@ class YoutubeDL:                      if dl_filename is not None:                          self.report_file_already_downloaded(dl_filename)                      elif fd: -                        for f in requested_formats if fd != FFmpegFD else []: +                        for f in info_dict['requested_formats'] if fd != FFmpegFD else []:                              f['filepath'] = fname = prepend_extension(                                  correct_ext(temp_filename, info_dict['ext']),                                  'f%s' % f['format_id'], info_dict['ext'])                              downloaded.append(fname) -                        info_dict['url'] = '\n'.join(f['url'] for f in requested_formats) +                        info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])                          success, real_download = self.dl(temp_filename, info_dict)                          info_dict['__real_download'] = real_download                      else: @@ -3248,7 +3247,7 @@ class YoutubeDL:                                  f'You have requested downloading multiple formats to stdout {reason}. '                                  'The formats will be streamed one after the other')                              fname = temp_filename -                        for f in requested_formats: +                        for f in info_dict['requested_formats']:                              new_info = dict(info_dict)                              del new_info['requested_formats']                              new_info.update(f) @@ -4109,8 +4108,11 @@ class YoutubeDL:                      ret.append((thumb_filename, thumb_filename_final))                      t['filepath'] = thumb_filename                  except network_exceptions as err: +                    if isinstance(err, urllib.error.HTTPError) and err.code == 404: +                        self.to_screen(f'[info] {thumb_display_id.title()} does not exist') +                    else: +                        self.report_warning(f'Unable to download {thumb_display_id}: {err}')                      thumbnails.pop(idx) -                    self.report_warning(f'Unable to download {thumb_display_id}: {err}')              if ret and not write_all:                  break          return ret diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 8693e0b4a..f21e4f7e7 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -1326,3 +1326,7 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):          cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))          self.add_cookie_header(cookie_req)          return cookie_req.get_header('Cookie') + +    def clear(self, *args, **kwargs): +        with contextlib.suppress(KeyError): +            return super().clear(*args, **kwargs) diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 477ec3c8a..a0219a350 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -49,7 +49,6 @@ class FileDownloader:      verbose:            Print additional info to stdout.      quiet:              Do not print messages to stdout.      ratelimit:          Download speed limit, in bytes/sec. -    continuedl:         Attempt to continue downloads if possible      throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)      retries:            Number of times to retry for expected network errors.                          Default is 0 for API, but 10 for CLI diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py index cfe739784..7d8575c2a 100644 --- a/yt_dlp/downloader/niconico.py +++ b/yt_dlp/downloader/niconico.py @@ -7,9 +7,9 @@ from .common import FileDownloader  from .external import FFmpegFD  from ..utils import (      DownloadError, -    str_or_none, -    sanitized_Request,      WebSocketsWrapper, +    sanitized_Request, +    str_or_none,      try_get,  ) diff --git a/yt_dlp/extractor/ciscowebex.py b/yt_dlp/extractor/ciscowebex.py index 0fcf02282..40430505d 100644 --- a/yt_dlp/extractor/ciscowebex.py +++ b/yt_dlp/extractor/ciscowebex.py @@ -49,7 +49,7 @@ class CiscoWebexIE(InfoExtractor):              'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id),              video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429)) -        if urlh.status == 403: +        if urlh.getcode() == 403:              if stream['code'] == 53004:                  self.raise_login_required()              if stream['code'] == 53005: @@ -59,7 +59,7 @@ class CiscoWebexIE(InfoExtractor):                      'This video is protected by a password, use the --video-password option', expected=True)              raise ExtractorError(f'{self.IE_NAME} said: {stream["code"]} - {stream["message"]}', expected=True) -        if urlh.status == 429: +        if urlh.getcode() == 429:              self.raise_login_required(                  f'{self.IE_NAME} asks you to solve a CAPTCHA. Solve CAPTCHA in browser and',                  method='cookies') diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index f11a67358..9662a7ee1 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -17,6 +17,7 @@ import subprocess  import sys  import time  import types +import urllib.error  import urllib.parse  import urllib.request  import xml.etree.ElementTree @@ -58,6 +59,7 @@ from ..utils import (      join_nonempty,      js_to_json,      mimetype2ext, +    netrc_from_content,      network_exceptions,      orderedSet,      parse_bitrate, @@ -72,7 +74,6 @@ from ..utils import (      smuggle_url,      str_or_none,      str_to_int, -    netrc_from_content,      strip_or_none,      traverse_obj,      truncate_string, diff --git a/yt_dlp/extractor/dumpert.py b/yt_dlp/extractor/dumpert.py index 0cf84263c..0cf84263c 100755..100644 --- a/yt_dlp/extractor/dumpert.py +++ b/yt_dlp/extractor/dumpert.py diff --git a/yt_dlp/extractor/globalplayer.py b/yt_dlp/extractor/globalplayer.py index e0c0d58fd..e0c0d58fd 100755..100644 --- a/yt_dlp/extractor/globalplayer.py +++ b/yt_dlp/extractor/globalplayer.py diff --git a/yt_dlp/extractor/odnoklassniki.py b/yt_dlp/extractor/odnoklassniki.py index 0d0ad0bb8..e63714e84 100644 --- a/yt_dlp/extractor/odnoklassniki.py +++ b/yt_dlp/extractor/odnoklassniki.py @@ -238,10 +238,8 @@ class OdnoklassnikiIE(InfoExtractor):      def _clear_cookies(self, cdn_url):          # Direct http downloads will fail if CDN cookies are set          # so we need to reset them after each format extraction -        if self._get_cookies('https://notarealsubdomain.mycdn.me/'): -            self.cookiejar.clear(domain='.mycdn.me') -        if self._get_cookies(cdn_url): -            self.cookiejar.clear(domain=urllib.parse.urlparse(cdn_url).hostname) +        self.cookiejar.clear(domain='.mycdn.me') +        self.cookiejar.clear(domain=urllib.parse.urlparse(cdn_url).hostname)      @classmethod      def _extract_embed_urls(cls, url, webpage): diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py index 2aa0dd870..c686044fa 100644 --- a/yt_dlp/extractor/tvp.py +++ b/yt_dlp/extractor/tvp.py @@ -488,9 +488,9 @@ class TVPVODBaseIE(InfoExtractor):              f'{self._API_BASE_URL}/{resource}', video_id,              query={'lang': 'pl', 'platform': 'BROWSER', **query},              expected_status=lambda x: is_valid(x) or 400 <= x < 500, **kwargs) -        if is_valid(urlh.status): +        if is_valid(urlh.getcode()):              return document -        raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.status})') +        raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.getcode()})')      def _parse_video(self, video, with_url=True):          info_dict = traverse_obj(video, { diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 770aa284d..23e1aaf20 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -39,7 +39,7 @@ class VidioBaseIE(InfoExtractor):          login_post, login_post_urlh = self._download_webpage_handle(              self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=[302, 401]) -        if login_post_urlh.status == 401: +        if login_post_urlh.getcode() == 401:              if get_element_by_class('onboarding-content-register-popup__title', login_post):                  raise ExtractorError(                      'Unable to log in: The provided email has not registered yet.', expected=True) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 4daa4f50e..11e47904a 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -811,7 +811,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):              'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,              'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,              'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED, -            'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED +            'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,          }          label_map = { @@ -821,7 +821,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):              'live': BadgeType.LIVE_NOW,              'premium': BadgeType.AVAILABILITY_PREMIUM,              'verified': BadgeType.VERIFIED, -            'official artist channel': BadgeType.VERIFIED +            'official artist channel': BadgeType.VERIFIED,          }          badges = [] @@ -3935,7 +3935,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))              if f['quality'] == -1 and f.get('height'):                  f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))]) -            if self.get_param('verbose'): +            if self.get_param('verbose') or all_formats:                  f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')              if f.get('fps') and f['fps'] <= 1:                  del f['fps'] @@ -4531,7 +4531,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])          ):              upload_date = strftime_or_none( -                self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date +                self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date          info['upload_date'] = upload_date          for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]: @@ -5071,7 +5071,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):          last_updated_unix = self._parse_time_text(              self._get_text(playlist_stats, 2)  # deprecated, remove when old layout discontinued              or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text'))) -        info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d') +        info['modified_date'] = strftime_or_none(last_updated_unix)          info['view_count'] = self._get_count(playlist_stats, 1)          if info['view_count'] is None:  # 0 is allowed diff --git a/yt_dlp/options.py b/yt_dlp/options.py index b174a24af..9d6dbec9f 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1414,8 +1414,7 @@ def create_parser():          '--clean-info-json', '--clean-infojson',          action='store_true', dest='clean_infojson', default=None,          help=( -            'Remove some private fields such as filenames from the infojson. ' -            'Note that it could still contain some personal information (default)')) +            'Remove some internal metadata such as filenames from the infojson (default)'))      filesystem.add_option(          '--no-clean-info-json', '--no-clean-infojson',          action='store_false', dest='clean_infojson', @@ -1678,8 +1677,7 @@ def create_parser():              'Execute a command, optionally prefixed with when to execute it, separated by a ":". '              'Supported values of "WHEN" are the same as that of --use-postprocessor (default: after_move). '              'Same syntax as the output template can be used to pass any field as arguments to the command. ' -            'After download, an additional field "filepath" that contains the final path of the downloaded file ' -            'is also available, and if no fields are passed, %(filepath,_filename|)q is appended to the end of the command. ' +            'If no fields are passed, %(filepath,_filename|)q is appended to the end of the command. '              'This option can be used multiple times'))      postproc.add_option(          '--no-exec', diff --git a/yt_dlp/utils/_legacy.py b/yt_dlp/utils/_legacy.py index 1097778f0..96ac468b1 100644 --- a/yt_dlp/utils/_legacy.py +++ b/yt_dlp/utils/_legacy.py @@ -6,7 +6,7 @@ import sys  import urllib.parse  import zlib -from ._utils import decode_base_n, preferredencoding +from ._utils import Popen, decode_base_n, preferredencoding  from .traversal import traverse_obj  from ..dependencies import certifi, websockets @@ -174,3 +174,7 @@ def handle_youtubedl_headers(headers):          del filtered_headers['Youtubedl-no-compression']      return filtered_headers + + +def process_communicate_or_kill(p, *args, **kwargs): +    return Popen.communicate_or_kill(p, *args, **kwargs) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 28c2785cb..bc1bc9116 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -872,12 +872,6 @@ class netrc_from_content(netrc.netrc):              self._parse('-', stream, False) -def process_communicate_or_kill(p, *args, **kwargs): -    deprecation_warning(f'"{__name__}.process_communicate_or_kill" is deprecated and may be removed ' -                        f'in a future version. Use "{__name__}.Popen.communicate_or_kill" instead') -    return Popen.communicate_or_kill(p, *args, **kwargs) - -  class Popen(subprocess.Popen):      if sys.platform == 'win32':          _startupinfo = subprocess.STARTUPINFO() @@ -1662,7 +1656,7 @@ def unified_strdate(date_str, day_first=True):  def unified_timestamp(date_str, day_first=True): -    if date_str is None: +    if not isinstance(date_str, str):          return None      date_str = re.sub(r'\s+', ' ', re.sub( @@ -2454,7 +2448,7 @@ def request_to_url(req):          return req -def strftime_or_none(timestamp, date_format, default=None): +def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):      datetime_object = None      try:          if isinstance(timestamp, (int, float)):  # unix timestamp | 
