diff options
author | James Taylor <user234683@users.noreply.github.com> | 2019-12-17 20:58:15 -0800 |
---|---|---|
committer | James Taylor <user234683@users.noreply.github.com> | 2019-12-17 20:58:15 -0800 |
commit | 45a4ab5acedd2fd7531604d3e817e0742a036c4a (patch) | |
tree | a0729f645cc485880b8cb37dfdec9bfa6f68342c /youtube/yt_data_extract.py | |
parent | 81c7ecf161b528ba293678e0bdbf42952cc87386 (diff) | |
download | yt-local-45a4ab5acedd2fd7531604d3e817e0742a036c4a.tar.lz yt-local-45a4ab5acedd2fd7531604d3e817e0742a036c4a.tar.xz yt-local-45a4ab5acedd2fd7531604d3e817e0742a036c4a.zip |
Extraction: Detect limited state and fix false detection as unlisted
Diffstat (limited to 'youtube/yt_data_extract.py')
-rw-r--r-- | youtube/yt_data_extract.py | 12 |
1 files changed, 10 insertions, 2 deletions
diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index 96021f1..c7a6604 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -1003,6 +1003,13 @@ def extract_watch_info_mobile(top_level): info['comment_count'] = 0 info['comments_disabled'] = True + # check for limited state + items, _ = extract_items(response, item_types={'limitedStateMessageRenderer'}) + if items: + info['limited_state'] = True + else: + info['limited_state'] = False + # related videos related, _ = extract_items(response) info['related_videos'] = [renderer_info(renderer) for renderer in related] @@ -1015,6 +1022,7 @@ def extract_watch_info_desktop(top_level): 'comment_count': None, 'comments_disabled': None, 'allowed_countries': None, + 'limited_state': None, } video_info = {} @@ -1201,7 +1209,7 @@ def extract_watch_info(polymer_json): liberal_update(info, 'author', vd.get('author')) liberal_update(info, 'author_id', vd.get('channelId')) liberal_update(info, 'live', vd.get('isLiveContent')) - liberal_update(info, 'unlisted', not vd.get('isCrawlable', True)) + conservative_update(info, 'unlisted', not vd.get('isCrawlable', True)) #isCrawlable is false on limited state videos even if they aren't unlisted liberal_update(info, 'tags', vd.get('keywords', [])) # fallback stuff from microformat @@ -1213,7 +1221,7 @@ def extract_watch_info(polymer_json): conservative_update(info, 'description', extract_str(mf.get('description'), recover_urls=True)) conservative_update(info, 'author', mf.get('ownerChannelName')) conservative_update(info, 'author_id', mf.get('externalChannelId')) - conservative_update(info, 'unlisted', mf.get('isUnlisted')) + liberal_update(info, 'unlisted', mf.get('isUnlisted')) liberal_update(info, 'category', mf.get('category')) liberal_update(info, 'published_date', mf.get('publishDate')) liberal_update(info, 'uploaded_date', mf.get('uploadDate')) |