diff options
author | Tom-Oliver Heidel <github@tom-oliver.eu> | 2020-10-31 08:58:51 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-31 08:58:51 +0100 |
commit | 90c3f039e2a051c509e4f37fb47cae92153fb059 (patch) | |
tree | 15b91d4a4c03929a5fb06f3c1ad08c1baef57943 | |
parent | fa57af1ef333b11630ba6ae4353a94ea118883d4 (diff) | |
parent | 576d233fe67641da24b15751fc2a0e5c1e787034 (diff) | |
download | hypervideo-pre-90c3f039e2a051c509e4f37fb47cae92153fb059.tar.lz hypervideo-pre-90c3f039e2a051c509e4f37fb47cae92153fb059.tar.xz hypervideo-pre-90c3f039e2a051c509e4f37fb47cae92153fb059.zip |
Merge pull request #45 from insaneracist/xtube-fix
[xtube] fix extractor (#17)
-rw-r--r-- | youtube_dlc/extractor/xtube.py | 47 |
1 files changed, 20 insertions, 27 deletions
diff --git a/youtube_dlc/extractor/xtube.py b/youtube_dlc/extractor/xtube.py index 01b253dcb..081c5e2e7 100644 --- a/youtube_dlc/extractor/xtube.py +++ b/youtube_dlc/extractor/xtube.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, js_to_json, orderedSet, @@ -33,28 +34,12 @@ class XTubeIE(InfoExtractor): 'title': 'strange erotica', 'description': 'contains:an ET kind of thing', 'uploader': 'greenshowers', - 'duration': 450, + 'duration': 449, 'view_count': int, 'comment_count': int, 'age_limit': 18, } }, { - # FLV videos with duplicated formats - 'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752', - 'md5': 'a406963eb349dd43692ec54631efd88b', - 'info_dict': { - 'id': '9299752', - 'display_id': 'A-Super-Run-Part-1-YT', - 'ext': 'flv', - 'title': 'A Super Run - Part 1 (YT)', - 'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616', - 'uploader': 'tshirtguy59', - 'duration': 579, - 'view_count': int, - 'comment_count': int, - 'age_limit': 18, - }, - }, { # new URL schema 'url': 'http://www.xtube.com/video-watch/strange-erotica-625837', 'only_matching': True, @@ -89,16 +74,24 @@ class XTubeIE(InfoExtractor): title, thumbnail, duration = [None] * 3 - config = self._parse_json(self._search_regex( - r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config', - default='{}'), video_id, transform_source=js_to_json, fatal=False) - if config: - config = config.get('mainRoll') - if isinstance(config, dict): - title = config.get('title') - thumbnail = config.get('poster') - duration = int_or_none(config.get('duration')) - sources = config.get('sources') or config.get('format') + json_config_string = self._search_regex( + r'playerConf=({.+?}),loaderConf', + webpage, 'config', default=None) + if not json_config_string: + raise ExtractorError("Could not extract video player data") + + json_config_string = json_config_string.replace("!0", "true").replace("!1", "false") + + config = self._parse_json(json_config_string, video_id, transform_source=js_to_json, fatal=False) + if not config: + raise ExtractorError("Could not extract video player data") + + config = config.get('mainRoll') + if isinstance(config, dict): + title = config.get('title') + thumbnail = config.get('poster') + duration = int_or_none(config.get('duration')) + sources = config.get('sources') or config.get('format') if not isinstance(sources, dict): sources = self._parse_json(self._search_regex( |