aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/cspan.py11
-rw-r--r--youtube_dl/extractor/ellentv.py1
-rw-r--r--youtube_dl/extractor/instagram.py10
-rw-r--r--youtube_dl/extractor/mtv.py7
-rw-r--r--youtube_dl/extractor/orf.py14
-rw-r--r--youtube_dl/extractor/southpark.py15
7 files changed, 42 insertions, 17 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index a64afa1da..ab80fd5e0 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -478,6 +478,7 @@ from .soundgasm import (
)
from .southpark import (
SouthParkIE,
+ SouthParkEsIE,
SouthparkDeIE,
)
from .space import SpaceIE
diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py
index d516b1402..fbefd37d0 100644
--- a/youtube_dl/extractor/cspan.py
+++ b/youtube_dl/extractor/cspan.py
@@ -8,6 +8,7 @@ from ..utils import (
unescapeHTML,
find_xpath_attr,
smuggle_url,
+ determine_ext,
)
from .senateisvp import SenateISVPIE
@@ -87,6 +88,10 @@ class CSpanIE(InfoExtractor):
return self.url_result(surl, 'SenateISVP', video_id, title)
files = data['video']['files']
+ try:
+ capfile = data['video']['capfile']['#text']
+ except KeyError:
+ capfile = None
entries = [{
'id': '%s_%d' % (video_id, partnum + 1),
@@ -97,6 +102,12 @@ class CSpanIE(InfoExtractor):
'description': description,
'thumbnail': thumbnail,
'duration': int_or_none(f.get('length', {}).get('#text')),
+ 'subtitles': {
+ 'en': [{
+ 'url': capfile,
+ 'ext': determine_ext(capfile, 'dfxp')
+ }],
+ } if capfile else None,
} for partnum, f in enumerate(files)]
if len(entries) == 1:
diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py
index 74b50bca2..02c6a4615 100644
--- a/youtube_dl/extractor/ellentv.py
+++ b/youtube_dl/extractor/ellentv.py
@@ -6,7 +6,6 @@ import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
- parse_iso8601,
)
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index b020e2621..65f6ca103 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -3,13 +3,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import (
- int_or_none,
-)
+from ..utils import int_or_none
class InstagramIE(InfoExtractor):
- _VALID_URL = r'http://instagram\.com/p/(?P<id>.*?)/'
+ _VALID_URL = r'https?://instagram\.com/p/(?P<id>[\da-zA-Z]+)'
_TEST = {
'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516',
@@ -23,8 +21,8 @@ class InstagramIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
+
webpage = self._download_webpage(url, video_id)
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
webpage, 'uploader id', fatal=False)
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index 4430b3416..b48fac5e3 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -25,6 +25,7 @@ def _media_xml_tag(tag):
class MTVServicesInfoExtractor(InfoExtractor):
_MOBILE_TEMPLATE = None
+ _LANG = None
@staticmethod
def _id_from_uri(uri):
@@ -169,8 +170,12 @@ class MTVServicesInfoExtractor(InfoExtractor):
video_id = self._id_from_uri(uri)
feed_url = self._get_feed_url(uri)
data = compat_urllib_parse.urlencode({'uri': uri})
+ info_url = feed_url + '?'
+ if self._LANG:
+ info_url += 'lang=%s&' % self._LANG
+ info_url += data
idoc = self._download_xml(
- feed_url + '?' + data, video_id,
+ info_url, video_id,
'Downloading info', transform_source=fix_xml_ampersands)
return self.playlist_result(
[self._get_video_info(item) for item in idoc.findall('.//item')])
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
index ca1a5bb3c..2e6c9872b 100644
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -210,16 +210,16 @@ class ORFIPTVIE(InfoExtractor):
_VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
_TEST = {
- 'url': 'http://iptv.orf.at/stories/2267952',
- 'md5': '26ffa4bab6dbce1eee78bbc7021016cd',
+ 'url': 'http://iptv.orf.at/stories/2275236/',
+ 'md5': 'c8b22af4718a4b4af58342529453e3e5',
'info_dict': {
- 'id': '339775',
+ 'id': '350612',
'ext': 'flv',
- 'title': 'Kreml-Kritiker Nawalny wieder frei',
- 'description': 'md5:6f24e7f546d364dacd0e616a9e409236',
- 'duration': 84.729,
+ 'title': 'Weitere Evakuierungen um Vulkan Calbuco',
+ 'description': 'md5:d689c959bdbcf04efeddedbf2299d633',
+ 'duration': 68.197,
'thumbnail': 're:^https?://.*\.jpg$',
- 'upload_date': '20150306',
+ 'upload_date': '20150425',
},
}
diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py
index c20397b3d..77758bbed 100644
--- a/youtube_dl/extractor/southpark.py
+++ b/youtube_dl/extractor/southpark.py
@@ -5,7 +5,7 @@ from .mtv import MTVServicesInfoExtractor
class SouthParkIE(MTVServicesInfoExtractor):
IE_NAME = 'southpark.cc.com'
- _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.cc\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
@@ -20,9 +20,20 @@ class SouthParkIE(MTVServicesInfoExtractor):
}]
+class SouthParkEsIE(SouthParkIE):
+ IE_NAME = 'southpark.cc.com:espanol'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/episodios-en-espanol/(?P<id>.+?)(\?|#|$))'
+ _LANG = 'es'
+
+ _TESTS = [{
+ 'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate',
+ 'playlist_count': 4,
+ }]
+
+
class SouthparkDeIE(SouthParkIE):
IE_NAME = 'southpark.de'
- _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
_TESTS = [{