aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/yt_data_extract.py
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2019-12-17 20:58:15 -0800
committerJames Taylor <user234683@users.noreply.github.com>2019-12-17 20:58:15 -0800
commit45a4ab5acedd2fd7531604d3e817e0742a036c4a (patch)
treea0729f645cc485880b8cb37dfdec9bfa6f68342c /youtube/yt_data_extract.py
parent81c7ecf161b528ba293678e0bdbf42952cc87386 (diff)
downloadyt-local-45a4ab5acedd2fd7531604d3e817e0742a036c4a.tar.lz
yt-local-45a4ab5acedd2fd7531604d3e817e0742a036c4a.tar.xz
yt-local-45a4ab5acedd2fd7531604d3e817e0742a036c4a.zip
Extraction: Detect limited state and fix false detection as unlisted
Diffstat (limited to 'youtube/yt_data_extract.py')
-rw-r--r--youtube/yt_data_extract.py12
1 files changed, 10 insertions, 2 deletions
diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py
index 96021f1..c7a6604 100644
--- a/youtube/yt_data_extract.py
+++ b/youtube/yt_data_extract.py
@@ -1003,6 +1003,13 @@ def extract_watch_info_mobile(top_level):
info['comment_count'] = 0
info['comments_disabled'] = True
+ # check for limited state
+ items, _ = extract_items(response, item_types={'limitedStateMessageRenderer'})
+ if items:
+ info['limited_state'] = True
+ else:
+ info['limited_state'] = False
+
# related videos
related, _ = extract_items(response)
info['related_videos'] = [renderer_info(renderer) for renderer in related]
@@ -1015,6 +1022,7 @@ def extract_watch_info_desktop(top_level):
'comment_count': None,
'comments_disabled': None,
'allowed_countries': None,
+ 'limited_state': None,
}
video_info = {}
@@ -1201,7 +1209,7 @@ def extract_watch_info(polymer_json):
liberal_update(info, 'author', vd.get('author'))
liberal_update(info, 'author_id', vd.get('channelId'))
liberal_update(info, 'live', vd.get('isLiveContent'))
- liberal_update(info, 'unlisted', not vd.get('isCrawlable', True))
+ conservative_update(info, 'unlisted', not vd.get('isCrawlable', True)) #isCrawlable is false on limited state videos even if they aren't unlisted
liberal_update(info, 'tags', vd.get('keywords', []))
# fallback stuff from microformat
@@ -1213,7 +1221,7 @@ def extract_watch_info(polymer_json):
conservative_update(info, 'description', extract_str(mf.get('description'), recover_urls=True))
conservative_update(info, 'author', mf.get('ownerChannelName'))
conservative_update(info, 'author_id', mf.get('externalChannelId'))
- conservative_update(info, 'unlisted', mf.get('isUnlisted'))
+ liberal_update(info, 'unlisted', mf.get('isUnlisted'))
liberal_update(info, 'category', mf.get('category'))
liberal_update(info, 'published_date', mf.get('publishDate'))
liberal_update(info, 'uploaded_date', mf.get('uploadDate'))