diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/cspan.py | 11 | ||||
-rw-r--r-- | youtube_dl/extractor/ellentv.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/instagram.py | 10 | ||||
-rw-r--r-- | youtube_dl/extractor/mtv.py | 7 | ||||
-rw-r--r-- | youtube_dl/extractor/orf.py | 14 | ||||
-rw-r--r-- | youtube_dl/extractor/southpark.py | 15 |
7 files changed, 42 insertions, 17 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a64afa1da..ab80fd5e0 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -478,6 +478,7 @@ from .soundgasm import ( ) from .southpark import ( SouthParkIE, + SouthParkEsIE, SouthparkDeIE, ) from .space import SpaceIE diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index d516b1402..fbefd37d0 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -8,6 +8,7 @@ from ..utils import ( unescapeHTML, find_xpath_attr, smuggle_url, + determine_ext, ) from .senateisvp import SenateISVPIE @@ -87,6 +88,10 @@ class CSpanIE(InfoExtractor): return self.url_result(surl, 'SenateISVP', video_id, title) files = data['video']['files'] + try: + capfile = data['video']['capfile']['#text'] + except KeyError: + capfile = None entries = [{ 'id': '%s_%d' % (video_id, partnum + 1), @@ -97,6 +102,12 @@ class CSpanIE(InfoExtractor): 'description': description, 'thumbnail': thumbnail, 'duration': int_or_none(f.get('length', {}).get('#text')), + 'subtitles': { + 'en': [{ + 'url': capfile, + 'ext': determine_ext(capfile, 'dfxp') + }], + } if capfile else None, } for partnum, f in enumerate(files)] if len(entries) == 1: diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index 74b50bca2..02c6a4615 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -6,7 +6,6 @@ import json from .common import InfoExtractor from ..utils import ( ExtractorError, - parse_iso8601, ) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index b020e2621..65f6ca103 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -3,13 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - int_or_none, -) +from ..utils import int_or_none class InstagramIE(InfoExtractor): - _VALID_URL = r'http://instagram\.com/p/(?P<id>.*?)/' + _VALID_URL = r'https?://instagram\.com/p/(?P<id>[\da-zA-Z]+)' _TEST = { 'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc', 'md5': '0d2da106a9d2631273e192b372806516', @@ -23,8 +21,8 @@ class InstagramIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"', webpage, 'uploader id', fatal=False) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 4430b3416..b48fac5e3 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -25,6 +25,7 @@ def _media_xml_tag(tag): class MTVServicesInfoExtractor(InfoExtractor): _MOBILE_TEMPLATE = None + _LANG = None @staticmethod def _id_from_uri(uri): @@ -169,8 +170,12 @@ class MTVServicesInfoExtractor(InfoExtractor): video_id = self._id_from_uri(uri) feed_url = self._get_feed_url(uri) data = compat_urllib_parse.urlencode({'uri': uri}) + info_url = feed_url + '?' + if self._LANG: + info_url += 'lang=%s&' % self._LANG + info_url += data idoc = self._download_xml( - feed_url + '?' + data, video_id, + info_url, video_id, 'Downloading info', transform_source=fix_xml_ampersands) return self.playlist_result( [self._get_video_info(item) for item in idoc.findall('.//item')]) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index ca1a5bb3c..2e6c9872b 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -210,16 +210,16 @@ class ORFIPTVIE(InfoExtractor): _VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)' _TEST = { - 'url': 'http://iptv.orf.at/stories/2267952', - 'md5': '26ffa4bab6dbce1eee78bbc7021016cd', + 'url': 'http://iptv.orf.at/stories/2275236/', + 'md5': 'c8b22af4718a4b4af58342529453e3e5', 'info_dict': { - 'id': '339775', + 'id': '350612', 'ext': 'flv', - 'title': 'Kreml-Kritiker Nawalny wieder frei', - 'description': 'md5:6f24e7f546d364dacd0e616a9e409236', - 'duration': 84.729, + 'title': 'Weitere Evakuierungen um Vulkan Calbuco', + 'description': 'md5:d689c959bdbcf04efeddedbf2299d633', + 'duration': 68.197, 'thumbnail': 're:^https?://.*\.jpg$', - 'upload_date': '20150306', + 'upload_date': '20150425', }, } diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index c20397b3d..77758bbed 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -5,7 +5,7 @@ from .mtv import MTVServicesInfoExtractor class SouthParkIE(MTVServicesInfoExtractor): IE_NAME = 'southpark.cc.com' - _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.cc\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' @@ -20,9 +20,20 @@ class SouthParkIE(MTVServicesInfoExtractor): }] +class SouthParkEsIE(SouthParkIE): + IE_NAME = 'southpark.cc.com:espanol' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/episodios-en-espanol/(?P<id>.+?)(\?|#|$))' + _LANG = 'es' + + _TESTS = [{ + 'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate', + 'playlist_count': 4, + }] + + class SouthparkDeIE(SouthParkIE): IE_NAME = 'southpark.de' - _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' _TESTS = [{ |