From 0bb1bc1b107b9c3d68ea0c887bd09cad75d7714d Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Tue, 24 Aug 2021 15:52:40 +1200 Subject: [youtube] Remove annotations and deprecate `--write-annotations` (#765) Closes #692 Authored by: coletdjnz --- README.md | 2 + test/test_write_annotations.py | 81 --------------------------------- test/test_write_annotations.py.disabled | 81 +++++++++++++++++++++++++++++++++ yt_dlp/extractor/youtube.py | 36 +-------------- yt_dlp/options.py | 4 +- 5 files changed, 86 insertions(+), 118 deletions(-) delete mode 100644 test/test_write_annotations.py create mode 100644 test/test_write_annotations.py.disabled diff --git a/README.md b/README.md index 248b7e688..917350bda 100644 --- a/README.md +++ b/README.md @@ -1500,6 +1500,8 @@ These options may no longer work as intended --no-call-home Default --include-ads No longer supported --no-include-ads Default + --write-annotations No supported site has annotations now + --no-write-annotations Default #### Removed These options were deprecated since 2014 and have now been entirely removed diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py deleted file mode 100644 index 7e4d8bc5a..000000000 --- a/test/test_write_annotations.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python3 -# coding: utf-8 -from __future__ import unicode_literals - -# Allow direct execution -import os -import sys -import unittest -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from test.helper import get_params, try_rm, is_download_test - - -import io - -import xml.etree.ElementTree - -import yt_dlp.YoutubeDL -import yt_dlp.extractor - - -class YoutubeDL(yt_dlp.YoutubeDL): - def __init__(self, *args, **kwargs): - super(YoutubeDL, self).__init__(*args, **kwargs) - self.to_stderr = self.to_screen - - -params = get_params({ - 'writeannotations': True, - 'skip_download': True, - 'writeinfojson': False, - 'format': 'flv', -}) - - -TEST_ID = 'gr51aVj-mLg' -ANNOTATIONS_FILE = TEST_ID + '.annotations.xml' -EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label'] - - -@is_download_test -class TestAnnotations(unittest.TestCase): - def setUp(self): - # Clear old files - self.tearDown() - - def test_info_json(self): - expected = list(EXPECTED_ANNOTATIONS) # Two annotations could have the same text. - ie = yt_dlp.extractor.YoutubeIE() - ydl = YoutubeDL(params) - ydl.add_info_extractor(ie) - ydl.download([TEST_ID]) - self.assertTrue(os.path.exists(ANNOTATIONS_FILE)) - annoxml = None - with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof: - annoxml = xml.etree.ElementTree.parse(annof) - self.assertTrue(annoxml is not None, 'Failed to parse annotations XML') - root = annoxml.getroot() - self.assertEqual(root.tag, 'document') - annotationsTag = root.find('annotations') - self.assertEqual(annotationsTag.tag, 'annotations') - annotations = annotationsTag.findall('annotation') - - # Not all the annotations have TEXT children and the annotations are returned unsorted. - for a in annotations: - self.assertEqual(a.tag, 'annotation') - if a.get('type') == 'text': - textTag = a.find('TEXT') - text = textTag.text - self.assertTrue(text in expected) # assertIn only added in python 2.7 - # remove the first occurrence, there could be more than one annotation with the same text - expected.remove(text) - # We should have seen (and removed) all the expected annotation texts. - self.assertEqual(len(expected), 0, 'Not all expected annotations were found.') - - def tearDown(self): - try_rm(ANNOTATIONS_FILE) - - -if __name__ == '__main__': - unittest.main() diff --git a/test/test_write_annotations.py.disabled b/test/test_write_annotations.py.disabled new file mode 100644 index 000000000..7e4d8bc5a --- /dev/null +++ b/test/test_write_annotations.py.disabled @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +# coding: utf-8 +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import get_params, try_rm, is_download_test + + +import io + +import xml.etree.ElementTree + +import yt_dlp.YoutubeDL +import yt_dlp.extractor + + +class YoutubeDL(yt_dlp.YoutubeDL): + def __init__(self, *args, **kwargs): + super(YoutubeDL, self).__init__(*args, **kwargs) + self.to_stderr = self.to_screen + + +params = get_params({ + 'writeannotations': True, + 'skip_download': True, + 'writeinfojson': False, + 'format': 'flv', +}) + + +TEST_ID = 'gr51aVj-mLg' +ANNOTATIONS_FILE = TEST_ID + '.annotations.xml' +EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label'] + + +@is_download_test +class TestAnnotations(unittest.TestCase): + def setUp(self): + # Clear old files + self.tearDown() + + def test_info_json(self): + expected = list(EXPECTED_ANNOTATIONS) # Two annotations could have the same text. + ie = yt_dlp.extractor.YoutubeIE() + ydl = YoutubeDL(params) + ydl.add_info_extractor(ie) + ydl.download([TEST_ID]) + self.assertTrue(os.path.exists(ANNOTATIONS_FILE)) + annoxml = None + with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof: + annoxml = xml.etree.ElementTree.parse(annof) + self.assertTrue(annoxml is not None, 'Failed to parse annotations XML') + root = annoxml.getroot() + self.assertEqual(root.tag, 'document') + annotationsTag = root.find('annotations') + self.assertEqual(annotationsTag.tag, 'annotations') + annotations = annotationsTag.findall('annotation') + + # Not all the annotations have TEXT children and the annotations are returned unsorted. + for a in annotations: + self.assertEqual(a.tag, 'annotation') + if a.get('type') == 'text': + textTag = a.find('TEXT') + text = textTag.text + self.assertTrue(text in expected) # assertIn only added in python 2.7 + # remove the first occurrence, there could be more than one annotation with the same text + expected.remove(text) + # We should have seen (and removed) all the expected annotation texts. + self.assertEqual(len(expected), 0, 'Not all expected annotations were found.') + + def tearDown(self): + try_rm(ANNOTATIONS_FILE) + + +if __name__ == '__main__': + unittest.main() diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 375eca8f8..9ca81e6cb 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -59,7 +59,6 @@ from ..utils import ( unsmuggle_url, update_url_query, url_or_none, - urlencode_postdata, urljoin, variadic, ) @@ -3168,40 +3167,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): needs_auth=info['age_limit'] >= 18, is_unlisted=None if is_private is None else is_unlisted) - # get xsrf for annotations or comments - get_annotations = self.get_param('writeannotations', False) - get_comments = self.get_param('getcomments', False) - if get_annotations or get_comments: - xsrf_token = None - if master_ytcfg: - xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str) - if not xsrf_token: - xsrf_token = self._search_regex( - r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P(?:(?!\2).)+)\2', - webpage, 'xsrf token', group='xsrf_token', fatal=False) - - # annotations - if get_annotations: - invideo_url = get_first( - player_responses, - ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'), - expected_type=str) - if xsrf_token and invideo_url: - xsrf_field_name = None - if master_ytcfg: - xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str) - if not xsrf_field_name: - xsrf_field_name = self._search_regex( - r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P\w+)\2', - webpage, 'xsrf field name', - group='xsrf_field_name', default='session_token') - info['annotations'] = self._download_webpage( - self._proto_relative_url(invideo_url), - video_id, note='Downloading annotations', - errnote='Unable to download video annotations', fatal=False, - data=urlencode_postdata({xsrf_field_name: xsrf_token})) - - if get_comments: + if self.get_param('getcomments', False): info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage) self.mark_watched(video_id, player_responses) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 6bad37d19..86aad3393 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1070,11 +1070,11 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '--write-annotations', action='store_true', dest='writeannotations', default=False, - help='Write video annotations to a .annotations.xml file') + help=optparse.SUPPRESS_HELP) filesystem.add_option( '--no-write-annotations', action='store_false', dest='writeannotations', - help='Do not write video annotations (default)') + help=optparse.SUPPRESS_HELP) filesystem.add_option( '--write-playlist-metafiles', action='store_true', dest='allow_playlist_files', default=None, -- cgit v1.2.3