diff options
author | James Taylor <user234683@users.noreply.github.com> | 2019-12-17 20:58:15 -0800 |
---|---|---|
committer | James Taylor <user234683@users.noreply.github.com> | 2019-12-17 20:58:15 -0800 |
commit | 45a4ab5acedd2fd7531604d3e817e0742a036c4a (patch) | |
tree | a0729f645cc485880b8cb37dfdec9bfa6f68342c | |
parent | 81c7ecf161b528ba293678e0bdbf42952cc87386 (diff) | |
download | yt-local-45a4ab5acedd2fd7531604d3e817e0742a036c4a.tar.lz yt-local-45a4ab5acedd2fd7531604d3e817e0742a036c4a.tar.xz yt-local-45a4ab5acedd2fd7531604d3e817e0742a036c4a.zip |
Extraction: Detect limited state and fix false detection as unlisted
-rw-r--r-- | youtube/templates/watch.html | 3 | ||||
-rw-r--r-- | youtube/watch.py | 1 | ||||
-rw-r--r-- | youtube/yt_data_extract.py | 12 |
3 files changed, 14 insertions, 2 deletions
diff --git a/youtube/templates/watch.html b/youtube/templates/watch.html index 27150d4..eaa3786 100644 --- a/youtube/templates/watch.html +++ b/youtube/templates/watch.html @@ -252,6 +252,9 @@ {%- if age_restricted -%} <li class="age-restricted">Age-restricted</li> {%- endif -%} + {%- if limited_state -%} + <li>Limited state</li> + {%- endif -%} </ul> <address>Uploaded by <a href="{{ uploader_channel_url }}">{{ uploader }}</a></address> <span class="views">{{ views }} views</span> diff --git a/youtube/watch.py b/youtube/watch.py index 092885d..fca794e 100644 --- a/youtube/watch.py +++ b/youtube/watch.py @@ -429,6 +429,7 @@ def get_watch_page(): uploader = info['author'], description = info['description'], unlisted = info['unlisted'], + limited_state = info['limited_state'], age_restricted = info['age_restricted'], playability_error = info['playability_error'], ) diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index 96021f1..c7a6604 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -1003,6 +1003,13 @@ def extract_watch_info_mobile(top_level): info['comment_count'] = 0 info['comments_disabled'] = True + # check for limited state + items, _ = extract_items(response, item_types={'limitedStateMessageRenderer'}) + if items: + info['limited_state'] = True + else: + info['limited_state'] = False + # related videos related, _ = extract_items(response) info['related_videos'] = [renderer_info(renderer) for renderer in related] @@ -1015,6 +1022,7 @@ def extract_watch_info_desktop(top_level): 'comment_count': None, 'comments_disabled': None, 'allowed_countries': None, + 'limited_state': None, } video_info = {} @@ -1201,7 +1209,7 @@ def extract_watch_info(polymer_json): liberal_update(info, 'author', vd.get('author')) liberal_update(info, 'author_id', vd.get('channelId')) liberal_update(info, 'live', vd.get('isLiveContent')) - liberal_update(info, 'unlisted', not vd.get('isCrawlable', True)) + conservative_update(info, 'unlisted', not vd.get('isCrawlable', True)) #isCrawlable is false on limited state videos even if they aren't unlisted liberal_update(info, 'tags', vd.get('keywords', [])) # fallback stuff from microformat @@ -1213,7 +1221,7 @@ def extract_watch_info(polymer_json): conservative_update(info, 'description', extract_str(mf.get('description'), recover_urls=True)) conservative_update(info, 'author', mf.get('ownerChannelName')) conservative_update(info, 'author_id', mf.get('externalChannelId')) - conservative_update(info, 'unlisted', mf.get('isUnlisted')) + liberal_update(info, 'unlisted', mf.get('isUnlisted')) liberal_update(info, 'category', mf.get('category')) liberal_update(info, 'published_date', mf.get('publishDate')) liberal_update(info, 'uploaded_date', mf.get('uploadDate')) |