diff options
Diffstat (limited to 'youtube_dl')
| -rw-r--r-- | youtube_dl/__init__.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 11 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 19 | ||||
| -rw-r--r-- | youtube_dl/extractor/fc2.py | 60 | ||||
| -rw-r--r-- | youtube_dl/extractor/hentaistigma.py | 42 | ||||
| -rw-r--r-- | youtube_dl/extractor/mixcloud.py | 52 | ||||
| -rw-r--r-- | youtube_dl/extractor/nuvid.py | 48 | ||||
| -rw-r--r-- | youtube_dl/extractor/slutload.py | 47 | ||||
| -rw-r--r-- | youtube_dl/extractor/videott.py | 58 | ||||
| -rw-r--r-- | youtube_dl/extractor/vine.py | 31 | ||||
| -rw-r--r-- | youtube_dl/extractor/wdr.py | 28 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
12 files changed, 381 insertions, 19 deletions
| diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 7ed8d1970..4e657e297 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -54,6 +54,8 @@ __authors__  = (      'phaer',      'Sainyam Kapoor',      'Nicolas Évrard', +    'Jason Normore', +    'Hoje Lee',  )  __license__ = 'Public Domain' diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 23b7cbd8f..3503c76b7 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -80,6 +80,7 @@ from .exfm import ExfmIE  from .extremetube import ExtremeTubeIE  from .facebook import FacebookIE  from .faz import FazIE +from .fc2 import FC2IE  from .firstpost import FirstpostIE  from .firsttv import FirstTVIE  from .fivemin import FiveMinIE @@ -110,6 +111,7 @@ from .googleplus import GooglePlusIE  from .googlesearch import GoogleSearchIE  from .hark import HarkIE  from .helsinki import HelsinkiIE +from .hentaistigma import HentaiStigmaIE  from .hotnewhiphop import HotNewHipHopIE  from .howcast import HowcastIE  from .huffpost import HuffPostIE @@ -195,6 +197,7 @@ from .nowvideo import NowVideoIE  from .nrk import NRKIE  from .ntv import NTVIE  from .nytimes import NYTimesIE +from .nuvid import NuvidIE  from .oe1 import OE1IE  from .ooyala import OoyalaIE  from .orf import ORFIE @@ -231,6 +234,7 @@ from .scivee import SciVeeIE  from .servingsys import ServingSysIE  from .sina import SinaIE  from .slideshare import SlideshareIE +from .slutload import SlutloadIE  from .smotri import (      SmotriIE,      SmotriCommunityIE, @@ -296,6 +300,7 @@ from .videodetective import VideoDetectiveIE  from .videolecturesnet import VideoLecturesNetIE  from .videofyme import VideofyMeIE  from .videopremium import VideoPremiumIE +from .videott import VideoTtIE  from .videoweed import VideoWeedIE  from .vimeo import (      VimeoIE, @@ -306,7 +311,10 @@ from .vimeo import (      VimeoReviewIE,      VimeoWatchLaterIE,  ) -from .vine import VineIE +from .vine import ( +    VineIE, +    VineUserIE, +)  from .viki import VikiIE  from .vk import VKIE  from .vube import VubeIE @@ -315,6 +323,7 @@ from .washingtonpost import WashingtonPostIE  from .wat import WatIE  from .wdr import (      WDRIE, +    WDRMobileIE,      WDRMausIE,  )  from .weibo import WeiboIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 10b0cbe69..11b31db88 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -242,10 +242,11 @@ class InfoExtractor(object):                  url = url_or_request.get_full_url()              except AttributeError:                  url = url_or_request -            if len(url) > 200: -                h = u'___' + hashlib.md5(url.encode('utf-8')).hexdigest() -                url = url[:200 - len(h)] + h -            raw_filename = ('%s_%s.dump' % (video_id, url)) +            basen = video_id + '_' + url +            if len(basen) > 240: +                h = u'___' + hashlib.md5(basen.encode('utf-8')).hexdigest() +                basen = basen[:240 - len(h)] + h +            raw_filename = basen + '.dump'              filename = sanitize_filename(raw_filename, restricted=True)              self.to_screen(u'Saving request to ' + filename)              with open(filename, 'wb') as outf: @@ -555,6 +556,16 @@ class InfoExtractor(object):              if self._downloader.params.get('prefer_insecure', False)              else 'https:') +    def _proto_relative_url(self, url, scheme=None): +        if url is None: +            return url +        if url.startswith('//'): +            if scheme is None: +                scheme = self.http_scheme() +            return scheme + url +        else: +            return url +  class SearchInfoExtractor(InfoExtractor):      """ diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py new file mode 100644 index 000000000..ca8993241 --- /dev/null +++ b/youtube_dl/extractor/fc2.py @@ -0,0 +1,60 @@ +#! -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +import hashlib + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    compat_urllib_request, +    compat_urlparse, +) + + +class FC2IE(InfoExtractor): +    _VALID_URL = r'^http://video\.fc2\.com/(?P<lang>[^/]+)/content/(?P<id>[^/]+)' +    IE_NAME = 'fc2' +    _TEST = { +        'url': 'http://video.fc2.com/en/content/20121103kUan1KHs', +        'md5': 'a6ebe8ebe0396518689d963774a54eb7', +        'info_dict': { +            'id': '20121103kUan1KHs', +            'ext': 'flv', +            'title': 'Boxing again with Puff', +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        webpage = self._download_webpage(url, video_id) +        self._downloader.cookiejar.clear_session_cookies()  # must clear + +        title = self._og_search_title(webpage) +        thumbnail = self._og_search_thumbnail(webpage) +        refer = url.replace('/content/', '/a/content/') + +        mimi = hashlib.md5(video_id + '_gGddgPfeaf_gzyr').hexdigest() + +        info_url = ( +            "http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&". +            format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.','%2E'))) + +        info_webpage = self._download_webpage( +            info_url, video_id, note='Downloading info page') +        info = compat_urlparse.parse_qs(info_webpage) + +        if 'err_code' in info: +            raise ExtractorError('Error code: %s' % info['err_code'][0]) + +        video_url = info['filepath'][0] + '?mid=' + info['mid'][0] + +        return { +            'id': video_id, +            'title': info['title'][0], +            'url': video_url, +            'ext': 'flv', +            'thumbnail': thumbnail, +        } diff --git a/youtube_dl/extractor/hentaistigma.py b/youtube_dl/extractor/hentaistigma.py new file mode 100644 index 000000000..63d87b74c --- /dev/null +++ b/youtube_dl/extractor/hentaistigma.py @@ -0,0 +1,42 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class HentaiStigmaIE(InfoExtractor): +    _VALID_URL = r'^https?://hentai\.animestigma\.com/(?P<id>[^/]+)' +    _TEST = { +        'url': 'http://hentai.animestigma.com/inyouchuu-etsu-bonus/', +        'md5': '4e3d07422a68a4cc363d8f57c8bf0d23', +        'info_dict': { +            'id': 'inyouchuu-etsu-bonus', +            'ext': 'mp4', +            "title": "Inyouchuu Etsu Bonus", +            "age_limit": 18, +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        webpage = self._download_webpage(url, video_id) + +        title = self._html_search_regex( +            r'<h2 class="posttitle"><a[^>]*>([^<]+)</a>', +            webpage, 'title') +        wrap_url = self._html_search_regex( +            r'<iframe src="([^"]+mp4)"', webpage, 'wrapper url') +        wrap_webpage = self._download_webpage(wrap_url, video_id) + +        video_url = self._html_search_regex( +            r'clip:\s*{\s*url: "([^"]*)"', wrap_webpage, 'video url') + +        return { +            'id': video_id, +            'url': video_url, +            'title': title, +            'age_limit': 18, +        } diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index c4bd53fe7..5f64e7bd0 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -4,9 +4,10 @@ import re  from .common import InfoExtractor  from ..utils import ( -    unified_strdate,      compat_urllib_parse,      ExtractorError, +    int_or_none, +    parse_iso8601,  ) @@ -24,6 +25,10 @@ class MixcloudIE(InfoExtractor):              'uploader': 'Daniel Holbach',              'uploader_id': 'dholbach',              'upload_date': '20111115', +            'timestamp': 1321359578, +            'thumbnail': 're:https?://.*\.jpg', +            'view_count': int, +            'like_count': int,          },      } @@ -51,10 +56,6 @@ class MixcloudIE(InfoExtractor):          webpage = self._download_webpage(url, track_id) -        api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name) -        info = self._download_json( -            api_url, track_id, 'Downloading cloudcast info') -          preview_url = self._search_regex(              r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')          song_url = preview_url.replace('/previews/', '/c/originals/') @@ -65,16 +66,41 @@ class MixcloudIE(InfoExtractor):              template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')              final_song_url = self._get_url(template_url)          if final_song_url is None: -            raise ExtractorError(u'Unable to extract track url') +            raise ExtractorError('Unable to extract track url') + +        PREFIX = ( +            r'<div class="cloudcast-play-button-container"' +            r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') +        title = self._html_search_regex( +            PREFIX + r'm-title="([^"]+)"', webpage, 'title') +        thumbnail = self._proto_relative_url(self._html_search_regex( +            PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', +            fatal=False)) +        uploader = self._html_search_regex( +            PREFIX + r'm-owner-name="([^"]+)"', +            webpage, 'uploader', fatal=False) +        uploader_id = self._search_regex( +            r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) +        description = self._og_search_description(webpage) +        like_count = int_or_none(self._search_regex( +            r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"', +            webpage, 'like count', fatal=False)) +        view_count = int_or_none(self._search_regex( +            r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', +            webpage, 'play count', fatal=False)) +        timestamp = parse_iso8601(self._search_regex( +            r'<time itemprop="dateCreated" datetime="([^"]+)">', +            webpage, 'upload date'))          return {              'id': track_id, -            'title': info['name'], +            'title': title,              'url': final_song_url, -            'description': info.get('description'), -            'thumbnail': info['pictures'].get('extra_large'), -            'uploader': info['user']['name'], -            'uploader_id': info['user']['username'], -            'upload_date': unified_strdate(info['created_time']), -            'view_count': info['play_count'], +            'description': description, +            'thumbnail': thumbnail, +            'uploader': uploader, +            'uploader_id': uploader_id, +            'timestamp': timestamp, +            'view_count': view_count, +            'like_count': like_count,          } diff --git a/youtube_dl/extractor/nuvid.py b/youtube_dl/extractor/nuvid.py new file mode 100644 index 000000000..f0befa116 --- /dev/null +++ b/youtube_dl/extractor/nuvid.py @@ -0,0 +1,48 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class NuvidIE(InfoExtractor): +    _VALID_URL = r'^https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)' +    _TEST = { +        'url': 'http://m.nuvid.com/video/1310741/', +        'md5': 'eab207b7ac4fccfb4e23c86201f11277', +        'info_dict': { +            'id': '1310741', +            'ext': 'mp4', +            "title": "Horny babes show their awesome bodeis and", +            "age_limit": 18, +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        murl = url.replace('://www.', '://m.') +        webpage = self._download_webpage(murl, video_id) + +        title = self._html_search_regex( +            r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', +            webpage, 'title').strip() + +        url_end = self._html_search_regex( +            r'href="(/mp4/[^"]+)"[^>]*data-link_type="mp4"', +            webpage, 'video_url') +        video_url = 'http://m.nuvid.com' + url_end + +        thumbnail = self._html_search_regex( +            r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"', +            webpage, 'thumbnail URL', fatal=False) + +        return { +            'id': video_id, +            'url': video_url, +            'ext': 'mp4', +            'title': title, +            'thumbnail': thumbnail, +            'age_limit': 18, +        } diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py new file mode 100644 index 000000000..ecc0abfda --- /dev/null +++ b/youtube_dl/extractor/slutload.py @@ -0,0 +1,47 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +) + + +class SlutloadIE(InfoExtractor): +    _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$' +    _TEST = { +        'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', +        'md5': '0cf531ae8006b530bd9df947a6a0df77', +        'info_dict': { +            'id': 'TD73btpBqSxc', +            'ext': 'mp4', +            "title": "virginie baisee en cam", +            "age_limit": 18, +            'thumbnail': 're:https?://.*?\.jpg' +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        webpage = self._download_webpage(url, video_id) + +        video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>', +            webpage, 'title').strip() + +        video_url = self._html_search_regex( +            r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"', +            webpage, 'video URL') +        thumbnail = self._html_search_regex( +            r'(?s)<div id="vidPlayer"\s+.*?previewer-file="([^"]+)"', +            webpage, 'thumbnail', fatal=False) + +        return { +            'id': video_id, +            'url': video_url, +            'title': video_title, +            'thumbnail': thumbnail, +            'age_limit': 18 +        } diff --git a/youtube_dl/extractor/videott.py b/youtube_dl/extractor/videott.py new file mode 100644 index 000000000..b5034b02f --- /dev/null +++ b/youtube_dl/extractor/videott.py @@ -0,0 +1,58 @@ +from __future__ import unicode_literals + +import re +import base64 + +from .common import InfoExtractor +from ..utils import unified_strdate + + +class VideoTtIE(InfoExtractor): +    ID_NAME = 'video.tt' +    IE_DESC = 'video.tt - Your True Tube' +    _VALID_URL = r'http://(?:www\.)?video\.tt/(?:video/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})' + +    _TEST = { +        'url': 'http://www.video.tt/watch_video.php?v=amd5YujV8', +        'md5': 'b13aa9e2f267effb5d1094443dff65ba', +        'info_dict': { +            'id': 'amd5YujV8', +            'ext': 'flv', +            'title': 'Motivational video Change your mind in just 2.50 mins', +            'description': '', +            'upload_date': '20130827', +            'uploader': 'joseph313', +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        settings = self._download_json( +            'http://www.video.tt/player_control/settings.php?v=%s' % video_id, video_id, +            'Downloading video JSON')['settings'] + +        video = settings['video_details']['video'] + +        formats = [ +            { +                'url': base64.b64decode(res['u']).decode('utf-8'), +                'ext': 'flv', +                'format_id': res['l'], +            } for res in settings['res'] if res['u'] +        ] + +        return { +            'id': video_id, +            'title': video['title'], +            'description': video['description'], +            'thumbnail': settings['config']['thumbnail'], +            'upload_date': unified_strdate(video['added']), +            'uploader': video['owner'], +            'view_count': int(video['view_count']), +            'comment_count': int(video['comment_count']), +            'like_count': int(video['liked']), +            'dislike_count': int(video['disliked']), +            'formats': formats, +        }
\ No newline at end of file diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 5136ec466..076c87119 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals  import re  import json +import itertools  from .common import InfoExtractor  from ..utils import unified_strdate @@ -58,3 +59,33 @@ class VineIE(InfoExtractor):              'repost_count': data['reposts']['count'],              'formats': formats,          } + + +class VineUserIE(InfoExtractor): +    IE_NAME = 'vine:user' +    _VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$' +    _VINE_BASE_URL = "https://vine.co/" + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        user = mobj.group('user') + +        profile_url = "%sapi/users/profiles/vanity/%s" % ( +            self._VINE_BASE_URL, user) +        profile_data = self._download_json( +            profile_url, user, note='Downloading user profile data') + +        user_id = profile_data['data']['userId'] +        timeline_data = [] +        for pagenum in itertools.count(1): +            timeline_url = "%sapi/timelines/users/%s?page=%s" % ( +                self._VINE_BASE_URL, user_id, pagenum) +            timeline_page = self._download_json( +                timeline_url, user, note='Downloading page %d' % pagenum) +            timeline_data.extend(timeline_page['data']['records']) +            if timeline_page['data']['nextPage'] is None: +                break + +        entries = [ +            self.url_result(e['permalinkUrl'], 'Vine') for e in timeline_data] +        return self.playlist_result(entries, user) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 63691aa67..feeb44b45 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -115,6 +115,34 @@ class WDRIE(InfoExtractor):          } +class WDRMobileIE(InfoExtractor): +    _VALID_URL = r'''(?x) +        https?://mobile-ondemand\.wdr\.de/ +        .*?/fsk(?P<age_limit>[0-9]+) +        /[0-9]+/[0-9]+/ +        (?P<id>[0-9]+)_(?P<title>[0-9]+)''' +    IE_NAME = 'wdr:mobile' +    _TEST = { +        'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4', +        'info_dict': { +            'title': '4283021', +            'id': '421735', +            'age_limit': 0, +        }, +        '_skip': 'Will be depublicized shortly' +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        return { +            'id': mobj.group('id'), +            'title': mobj.group('title'), +            'age_limit': int(mobj.group('age_limit')), +            'url': url, +            'user_agent': 'mobile', +        } + +  class WDRMausIE(InfoExtractor):      _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'      IE_DESC = 'Sendung mit der Maus' diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d6d606913..89a2f72dc 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.05.05' +__version__ = '2014.05.12' | 
