diff options
author | Brett824 <bsheingo@u.rochester.edu> | 2022-06-23 19:30:17 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-06-23 16:30:17 -0700 |
commit | 06cc8f103b571380b30f03beff94d522930f64e4 (patch) | |
tree | 007ea57822347914e0a4a727c87059de2d4c04aa | |
parent | 34baaced11fc34ab38e39c45d1f6fccc09292404 (diff) | |
download | hypervideo-pre-06cc8f103b571380b30f03beff94d522930f64e4.tar.lz hypervideo-pre-06cc8f103b571380b30f03beff94d522930f64e4.tar.xz hypervideo-pre-06cc8f103b571380b30f03beff94d522930f64e4.zip |
[extractor/youtube] Mark videos as fully watched (#4146)
* Also fixes videos appearing as shorts in watch history
Closes #2555
Authored by: Brett824
-rw-r--r-- | yt_dlp/extractor/youtube.py | 63 |
1 files changed, 39 insertions, 24 deletions
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index dee051d05..d168bfff5 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2643,30 +2643,45 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return sts def _mark_watched(self, video_id, player_responses): - playback_url = get_first( - player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'), - expected_type=url_or_none) - if not playback_url: - self.report_warning('Unable to mark watched') - return - parsed_playback_url = compat_urlparse.urlparse(playback_url) - qs = compat_urlparse.parse_qs(parsed_playback_url.query) - - # cpn generation algorithm is reverse engineered from base.js. - # In fact it works even with dummy cpn. - CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_' - cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)) - - qs.update({ - 'ver': ['2'], - 'cpn': [cpn], - }) - playback_url = compat_urlparse.urlunparse( - parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True))) - - self._download_webpage( - playback_url, video_id, 'Marking watched', - 'Unable to mark watched', fatal=False) + for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')): + label = 'fully ' if is_full else '' + url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'), + expected_type=url_or_none) + if not url: + self.report_warning(f'Unable to mark {label}watched') + return + parsed_url = compat_urlparse.urlparse(url) + qs = compat_urlparse.parse_qs(parsed_url.query) + + # cpn generation algorithm is reverse engineered from base.js. + # In fact it works even with dummy cpn. + CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_' + cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)) + + # # more consistent results setting it to right before the end + video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)] + + qs.update({ + 'ver': ['2'], + 'cpn': [cpn], + 'cmt': video_length, + 'el': 'detailpage', # otherwise defaults to "shorts" + }) + + if is_full: + # these seem to mark watchtime "history" in the real world + # they're required, so send in a single value + qs.update({ + 'st': video_length, + 'et': video_length, + }) + + url = compat_urlparse.urlunparse( + parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True))) + + self._download_webpage( + url, video_id, f'Marking {label}watched', + 'Unable to mark watched', fatal=False) @staticmethod def _extract_urls(webpage): |