aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/extractor/vlive.py
diff options
context:
space:
mode:
Diffstat (limited to 'hypervideo_dl/extractor/vlive.py')
-rw-r--r--hypervideo_dl/extractor/vlive.py256
1 files changed, 117 insertions, 139 deletions
diff --git a/hypervideo_dl/extractor/vlive.py b/hypervideo_dl/extractor/vlive.py
index 84f51a5..ae35c97 100644
--- a/hypervideo_dl/extractor/vlive.py
+++ b/hypervideo_dl/extractor/vlive.py
@@ -12,22 +12,65 @@ from ..compat import (
from ..utils import (
ExtractorError,
int_or_none,
+ LazyList,
merge_dicts,
str_or_none,
strip_or_none,
try_get,
urlencode_postdata,
+ url_or_none,
)
class VLiveBaseIE(NaverBaseIE):
- _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
+ _NETRC_MACHINE = 'vlive'
+ _logged_in = False
+
+ def _perform_login(self, username, password):
+ if self._logged_in:
+ return
+ LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
+ self._request_webpage(
+ LOGIN_URL, None, note='Downloading login cookies')
+
+ self._download_webpage(
+ LOGIN_URL, None, note='Logging in',
+ data=urlencode_postdata({'email': username, 'pwd': password}),
+ headers={
+ 'Referer': LOGIN_URL,
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ })
+
+ login_info = self._download_json(
+ 'https://www.vlive.tv/auth/loginInfo', None,
+ note='Checking login status',
+ headers={'Referer': 'https://www.vlive.tv/home'})
+
+ if not try_get(login_info, lambda x: x['message']['login'], bool):
+ raise ExtractorError('Unable to log in', expected=True)
+ VLiveBaseIE._logged_in = True
+
+ def _call_api(self, path_template, video_id, fields=None, query_add={}, note=None):
+ if note is None:
+ note = 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0]
+ query = {'appId': '8c6cc7b45d2568fb668be6e05b6e5a3b', 'gcc': 'KR', 'platformType': 'PC'}
+ if fields:
+ query['fields'] = fields
+ if query_add:
+ query.update(query_add)
+ try:
+ return self._download_json(
+ 'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
+ note, headers={'Referer': 'https://www.vlive.tv/'}, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message'])
+ raise
class VLiveIE(VLiveBaseIE):
IE_NAME = 'vlive'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P<id>[0-9]+)'
- _NETRC_MACHINE = 'vlive'
_TESTS = [{
'url': 'http://www.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983',
@@ -38,6 +81,12 @@ class VLiveIE(VLiveBaseIE):
'creator': "Girl's Day",
'view_count': int,
'uploader_id': 'muploader_a',
+ 'upload_date': '20150817',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
+ 'timestamp': 1439816449,
+ },
+ 'params': {
+ 'skip_download': True,
},
}, {
'url': 'http://www.vlive.tv/video/16937',
@@ -49,6 +98,9 @@ class VLiveIE(VLiveBaseIE):
'view_count': int,
'subtitles': 'mincount:12',
'uploader_id': 'muploader_j',
+ 'upload_date': '20161112',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
+ 'timestamp': 1478923074,
},
'params': {
'skip_download': True,
@@ -81,53 +133,6 @@ class VLiveIE(VLiveBaseIE):
'playlist_mincount': 120
}]
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- email, password = self._get_login_info()
- if None in (email, password):
- return
-
- def is_logged_in():
- login_info = self._download_json(
- 'https://www.vlive.tv/auth/loginInfo', None,
- note='Downloading login info',
- headers={'Referer': 'https://www.vlive.tv/home'})
- return try_get(
- login_info, lambda x: x['message']['login'], bool) or False
-
- LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
- self._request_webpage(
- LOGIN_URL, None, note='Downloading login cookies')
-
- self._download_webpage(
- LOGIN_URL, None, note='Logging in',
- data=urlencode_postdata({'email': email, 'pwd': password}),
- headers={
- 'Referer': LOGIN_URL,
- 'Content-Type': 'application/x-www-form-urlencoded'
- })
-
- if not is_logged_in():
- raise ExtractorError('Unable to log in', expected=True)
-
- def _call_api(self, path_template, video_id, fields=None, limit=None):
- query = {'appId': self._APP_ID, 'gcc': 'KR', 'platformType': 'PC'}
- if fields:
- query['fields'] = fields
- if limit:
- query['limit'] = limit
- try:
- return self._download_json(
- 'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
- 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
- headers={'Referer': 'https://www.vlive.tv/'}, query=query)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
- self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message'])
- raise
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -135,30 +140,24 @@ class VLiveIE(VLiveBaseIE):
'post/v1.0/officialVideoPost-%s', video_id,
'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId},playlist{playlistSeq,totalCount,name}')
- playlist = post.get('playlist')
- if not playlist or self.get_param('noplaylist'):
- if playlist:
- self.to_screen(
- 'Downloading just video %s because of --no-playlist'
- % video_id)
-
+ playlist_id = str_or_none(try_get(post, lambda x: x['playlist']['playlistSeq']))
+ if not self._yes_playlist(playlist_id, video_id):
video = post['officialVideo']
return self._get_vlive_info(post, video, video_id)
- else:
- playlist_name = playlist.get('name')
- playlist_id = str_or_none(playlist.get('playlistSeq'))
- playlist_count = str_or_none(playlist.get('totalCount'))
- playlist = self._call_api(
- 'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', limit=playlist_count)
+ playlist_name = str_or_none(try_get(post, lambda x: x['playlist']['name']))
+ playlist_count = str_or_none(try_get(post, lambda x: x['playlist']['totalCount']))
- entries = []
- for video_data in playlist['data']:
- video = video_data.get('officialVideo')
- video_id = str_or_none(video.get('videoSeq'))
- entries.append(self._get_vlive_info(video_data, video, video_id))
+ playlist = self._call_api(
+ 'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', {'limit': playlist_count})
- return self.playlist_result(entries, playlist_id, playlist_name)
+ entries = []
+ for video_data in playlist['data']:
+ video = video_data.get('officialVideo')
+ video_id = str_or_none(video.get('videoSeq'))
+ entries.append(self._get_vlive_info(video_data, video, video_id))
+
+ return self.playlist_result(entries, playlist_id, playlist_name)
def _get_vlive_info(self, post, video, video_id):
def get_common_fields():
@@ -172,6 +171,8 @@ class VLiveIE(VLiveBaseIE):
'view_count': int_or_none(video.get('playCount')),
'like_count': int_or_none(video.get('likeCount')),
'comment_count': int_or_none(video.get('commentCount')),
+ 'timestamp': int_or_none(video.get('createdAt'), scale=1000),
+ 'thumbnail': video.get('thumb'),
}
video_type = video.get('type')
@@ -197,7 +198,7 @@ class VLiveIE(VLiveBaseIE):
self._sort_formats(formats)
info = get_common_fields()
info.update({
- 'title': self._live_title(video['title']),
+ 'title': video['title'],
'id': video_id,
'formats': formats,
'is_live': True,
@@ -216,7 +217,7 @@ class VLiveIE(VLiveBaseIE):
raise ExtractorError('Unknown status ' + status)
-class VLivePostIE(VLiveIE):
+class VLivePostIE(VLiveBaseIE):
IE_NAME = 'vlive:post'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P<id>\d-\d+)'
_TESTS = [{
@@ -238,8 +239,6 @@ class VLivePostIE(VLiveIE):
'playlist_count': 1,
}]
_FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s'
- _SOS_TMPL = _FVIDEO_TMPL % 'sosPlayInfo'
- _INKEY_TMPL = _FVIDEO_TMPL % 'inKey'
def _real_extract(self, url):
post_id = self._match_id(url)
@@ -266,7 +265,7 @@ class VLivePostIE(VLiveIE):
entry = None
if upload_type == 'SOS':
download = self._call_api(
- self._SOS_TMPL, video_id)['videoUrl']['download']
+ self._FVIDEO_TMPL % 'sosPlayInfo', video_id)['videoUrl']['download']
formats = []
for f_id, f_url in download.items():
formats.append({
@@ -284,7 +283,7 @@ class VLivePostIE(VLiveIE):
vod_id = upload_info.get('videoId')
if not vod_id:
continue
- inkey = self._call_api(self._INKEY_TMPL, video_id)['inKey']
+ inkey = self._call_api(self._FVIDEO_TMPL % 'inKey', video_id)['inKey']
entry = self._extract_video_info(video_id, vod_id, inkey)
if entry:
entry['title'] = '%s_part%s' % (title, idx)
@@ -295,7 +294,7 @@ class VLivePostIE(VLiveIE):
class VLiveChannelIE(VLiveBaseIE):
IE_NAME = 'vlive:channel'
- _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<id>[0-9A-Z]+)'
+ _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<channel_id>[0-9A-Z]+)(?:/board/(?P<posts_id>\d+))?'
_TESTS = [{
'url': 'http://channels.vlive.tv/FCD4B',
'info_dict': {
@@ -306,78 +305,57 @@ class VLiveChannelIE(VLiveBaseIE):
}, {
'url': 'https://www.vlive.tv/channel/FCD4B',
'only_matching': True,
+ }, {
+ 'url': 'https://www.vlive.tv/channel/FCD4B/board/3546',
+ 'info_dict': {
+ 'id': 'FCD4B-3546',
+ 'title': 'MAMAMOO - Star Board',
+ },
+ 'playlist_mincount': 880
}]
- def _call_api(self, path, channel_key_suffix, channel_value, note, query):
- q = {
- 'app_id': self._APP_ID,
- 'channel' + channel_key_suffix: channel_value,
- }
- q.update(query)
- return self._download_json(
- 'http://api.vfan.vlive.tv/vproxy/channelplus/' + path,
- channel_value, note='Downloading ' + note, query=q)['result']
-
- def _real_extract(self, url):
- channel_code = self._match_id(url)
-
- channel_seq = self._call_api(
- 'decodeChannelCode', 'Code', channel_code,
- 'decode channel code', {})['channelSeq']
-
- channel_name = None
- entries = []
+ def _entries(self, posts_id, board_name):
+ if board_name:
+ posts_path = 'post/v1.0/board-%s/posts'
+ query_add = {'limit': 100, 'sortType': 'LATEST'}
+ else:
+ posts_path = 'post/v1.0/channel-%s/starPosts'
+ query_add = {'limit': 100}
for page_num in itertools.count(1):
video_list = self._call_api(
- 'getChannelVideoList', 'Seq', channel_seq,
- 'channel list page #%d' % page_num, {
- # Large values of maxNumOfRows (~300 or above) may cause
- # empty responses (see [1]), e.g. this happens for [2] that
- # has more than 300 videos.
- # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
- # 2. http://channels.vlive.tv/EDBF.
- 'maxNumOfRows': 100,
- 'pageNo': page_num
- }
- )
-
- if not channel_name:
- channel_name = try_get(
- video_list,
- lambda x: x['channelInfo']['channelName'],
- compat_str)
+ posts_path, posts_id, 'channel{channelName},contentType,postId,title,url', query_add,
+ note=f'Downloading playlist page {page_num}')
+
+ for video in try_get(video_list, lambda x: x['data'], list) or []:
+ video_id = str(video.get('postId'))
+ video_title = str_or_none(video.get('title'))
+ video_url = url_or_none(video.get('url'))
+ if not all((video_id, video_title, video_url)) or video.get('contentType') != 'VIDEO':
+ continue
+ channel_name = try_get(video, lambda x: x['channel']['channelName'], compat_str)
+ yield self.url_result(video_url, VLivePostIE.ie_key(), video_id, video_title, channel=channel_name)
- videos = try_get(
- video_list, lambda x: x['videoList'], list)
- if not videos:
+ after = try_get(video_list, lambda x: x['paging']['nextParams']['after'], compat_str)
+ if not after:
break
+ query_add['after'] = after
+
+ def _real_extract(self, url):
+ channel_id, posts_id = self._match_valid_url(url).groups()
- for video in videos:
- video_id = video.get('videoSeq')
- video_type = video.get('videoType')
+ board_name = None
+ if posts_id:
+ board = self._call_api(
+ 'board/v1.0/board-%s', posts_id, 'title,boardType')
+ board_name = board.get('title') or 'Unknown'
+ if board.get('boardType') not in ('STAR', 'VLIVE_PLUS'):
+ raise ExtractorError(f'Board {board_name!r} is not supported', expected=True)
- if not video_id or not video_type:
- continue
- video_id = compat_str(video_id)
-
- if video_type in ('PLAYLIST'):
- first_video_id = try_get(
- video,
- lambda x: x['videoPlaylist']['videoList'][0]['videoSeq'], int)
-
- if not first_video_id:
- continue
-
- entries.append(
- self.url_result(
- 'http://www.vlive.tv/video/%s' % first_video_id,
- ie=VLiveIE.ie_key(), video_id=first_video_id))
- else:
- entries.append(
- self.url_result(
- 'http://www.vlive.tv/video/%s' % video_id,
- ie=VLiveIE.ie_key(), video_id=video_id))
+ entries = LazyList(self._entries(posts_id or channel_id, board_name))
+ channel_name = entries[0]['channel']
return self.playlist_result(
- entries, channel_code, channel_name)
+ entries,
+ f'{channel_id}-{posts_id}' if posts_id else channel_id,
+ f'{channel_name} - {board_name}' if channel_name and board_name else channel_name)