aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py8
-rw-r--r--youtube_dl/extractor/byutv.py2
-rw-r--r--youtube_dl/extractor/canalplus.py14
-rw-r--r--youtube_dl/extractor/common.py3
-rw-r--r--youtube_dl/extractor/dumpert.py2
-rw-r--r--youtube_dl/extractor/kanalplay.py5
-rw-r--r--youtube_dl/extractor/nrk.py13
-rw-r--r--youtube_dl/extractor/nytimes.py2
-rw-r--r--youtube_dl/extractor/odnoklassniki.py5
-rw-r--r--youtube_dl/extractor/southpark.py11
-rw-r--r--youtube_dl/extractor/tmz.py28
-rw-r--r--youtube_dl/extractor/vine.py2
-rw-r--r--youtube_dl/extractor/zingmp3.py10
13 files changed, 71 insertions, 34 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index e808f2734..de19dfd7a 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -489,8 +489,9 @@ from .soundgasm import (
)
from .southpark import (
SouthParkIE,
- SouthParkEsIE,
SouthParkDeIE,
+ SouthParkDkIE,
+ SouthParkEsIE,
SouthParkNlIE
)
from .space import SpaceIE
@@ -543,7 +544,10 @@ from .thesixtyone import TheSixtyOneIE
from .thisav import ThisAVIE
from .tinypic import TinyPicIE
from .tlc import TlcIE, TlcDeIE
-from .tmz import TMZIE
+from .tmz import (
+ TMZIE,
+ TMZArticleIE,
+)
from .tnaflix import TNAFlixIE
from .thvideo import (
THVideoIE,
diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py
index 6252be05b..3b2de517e 100644
--- a/youtube_dl/extractor/byutv.py
+++ b/youtube_dl/extractor/byutv.py
@@ -16,7 +16,7 @@ class BYUtvIE(InfoExtractor):
'ext': 'mp4',
'description': 'md5:5438d33774b6bdc662f9485a340401cc',
'title': 'Season 5 Episode 5',
- 'thumbnail': 're:^https?://.*promo.*'
+ 'thumbnail': 're:^https?://.*\.jpg$'
},
'params': {
'skip_download': True,
diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py
index 1b14471e5..699b4f7d0 100644
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@@ -25,14 +25,14 @@ class CanalplusIE(InfoExtractor):
}
_TESTS = [{
- 'url': 'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
- 'md5': '3db39fb48b9685438ecf33a1078023e4',
+ 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092',
+ 'md5': 'b3481d7ca972f61e37420798d0a9d934',
'info_dict': {
- 'id': '922470',
+ 'id': '1263092',
'ext': 'flv',
- 'title': 'Zapping - 26/08/13',
- 'description': 'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
- 'upload_date': '20130826',
+ 'title': 'Le Zapping - 13/05/15',
+ 'description': 'md5:09738c0d06be4b5d06a0940edb0da73f',
+ 'upload_date': '20150513',
},
}, {
'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
@@ -56,7 +56,7 @@ class CanalplusIE(InfoExtractor):
'skip': 'videos get deleted after a while',
}, {
'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
- 'md5': '65aa83ad62fe107ce29e564bb8712580',
+ 'md5': 'f3a46edcdf28006598ffaf5b30e6a2d4',
'info_dict': {
'id': '1213714',
'ext': 'flv',
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 981e34bc7..65bb77086 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1072,9 +1072,6 @@ class InfoExtractor(object):
def _get_automatic_captions(self, *args, **kwargs):
raise NotImplementedError("This method must be implemented by subclasses")
- def _subtitles_timecode(self, seconds):
- return '%02d:%02d:%02d.%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
-
class SearchInfoExtractor(InfoExtractor):
"""
diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py
index 9c594b757..999fb5620 100644
--- a/youtube_dl/extractor/dumpert.py
+++ b/youtube_dl/extractor/dumpert.py
@@ -26,7 +26,7 @@ class DumpertIE(InfoExtractor):
video_id = self._match_id(url)
req = compat_urllib_request.Request(url)
- req.add_header('Cookie', 'nsfw=1')
+ req.add_header('Cookie', 'nsfw=1; cpc=10')
webpage = self._download_webpage(req, video_id)
files_base64 = self._search_regex(
diff --git a/youtube_dl/extractor/kanalplay.py b/youtube_dl/extractor/kanalplay.py
index 2bb078036..4597d1b96 100644
--- a/youtube_dl/extractor/kanalplay.py
+++ b/youtube_dl/extractor/kanalplay.py
@@ -7,6 +7,7 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
+ srt_subtitles_timecode,
)
@@ -39,8 +40,8 @@ class KanalPlayIE(InfoExtractor):
'%s\r\n%s --> %s\r\n%s'
% (
num,
- self._subtitles_timecode(item['startMillis'] / 1000.0),
- self._subtitles_timecode(item['endMillis'] / 1000.0),
+ srt_subtitles_timecode(item['startMillis'] / 1000.0),
+ srt_subtitles_timecode(item['endMillis'] / 1000.0),
item['text'],
) for num, item in enumerate(subs, 1))
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index e91d3a248..cc70c2950 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import compat_str
from ..utils import (
ExtractorError,
float_or_none,
@@ -200,20 +199,10 @@ class NRKTVIE(InfoExtractor):
url = "%s%s" % (baseurl, subtitlesurl)
self._debug_print('%s: Subtitle url: %s' % (video_id, url))
captions = self._download_xml(
- url, video_id, 'Downloading subtitles',
- transform_source=lambda s: s.replace(r'<br />', '\r\n'))
+ url, video_id, 'Downloading subtitles')
lang = captions.get('lang', 'no')
- ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
- srt = ''
- for pos, p in enumerate(ps):
- begin = parse_duration(p.get('begin'))
- duration = parse_duration(p.get('dur'))
- starttime = self._subtitles_timecode(begin)
- endtime = self._subtitles_timecode(begin + duration)
- srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
return {lang: [
{'ext': 'ttml', 'url': url},
- {'ext': 'srt', 'data': srt},
]}
def _extract_f4m(self, manifest_url, video_id):
diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py
index 6ffbe3863..7f254b867 100644
--- a/youtube_dl/extractor/nytimes.py
+++ b/youtube_dl/extractor/nytimes.py
@@ -89,7 +89,7 @@ class NYTimesIE(NYTimesBaseIE):
class NYTimesArticleIE(NYTimesBaseIE):
- _VALID_URL = r'https?://(?:www)?\.nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?'
+ _VALID_URL = r'https?://(?:www\.)?nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?'
_TESTS = [{
'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0',
'md5': 'e2076d58b4da18e6a001d53fd56db3c9',
diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py
index 155d0ee6a..fbc521d1a 100644
--- a/youtube_dl/extractor/odnoklassniki.py
+++ b/youtube_dl/extractor/odnoklassniki.py
@@ -6,6 +6,7 @@ from ..utils import (
unified_strdate,
int_or_none,
qualities,
+ unescapeHTML,
)
@@ -36,8 +37,8 @@ class OdnoklassnikiIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
player = self._parse_json(
- self._search_regex(
- r"OKVideo\.start\(({.+?})\s*,\s*'VideoAutoplay_player'", webpage, 'player'),
+ unescapeHTML(self._search_regex(
+ r'data-attributes="([^"]+)"', webpage, 'player')),
video_id)
metadata = self._parse_json(player['flashvars']['metadata'], video_id)
diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py
index 59e31198c..7fb165a87 100644
--- a/youtube_dl/extractor/southpark.py
+++ b/youtube_dl/extractor/southpark.py
@@ -57,3 +57,14 @@ class SouthParkNlIE(SouthParkIE):
'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free',
'playlist_count': 4,
}]
+
+
+class SouthParkDkIE(SouthParkIE):
+ IE_NAME = 'southparkstudios.dk'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southparkstudios\.dk/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
+ _FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/'
+
+ _TESTS = [{
+ 'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop',
+ 'playlist_count': 4,
+ }]
diff --git a/youtube_dl/extractor/tmz.py b/youtube_dl/extractor/tmz.py
index c5c6fdc51..7dbe68b5c 100644
--- a/youtube_dl/extractor/tmz.py
+++ b/youtube_dl/extractor/tmz.py
@@ -30,3 +30,31 @@ class TMZIE(InfoExtractor):
'description': self._og_search_description(webpage),
'thumbnail': self._html_search_meta('ThumbURL', webpage),
}
+
+
+class TMZArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?'
+ _TEST = {
+ 'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
+ 'md5': 'e482a414a38db73087450e3a6ce69d00',
+ 'info_dict': {
+ 'id': '0_6snoelag',
+ 'ext': 'mp4',
+ 'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
+ 'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ embedded_video_info_str = self._html_search_regex(
+ r'tmzVideoEmbedV2\("([^)]+)"\);', webpage, 'embedded video info')
+
+ embedded_video_info = self._parse_json(
+ embedded_video_info_str, video_id,
+ transform_source=lambda s: s.replace('\\', ''))
+
+ return self.url_result(
+ 'http://www.tmz.com/videos/%s/' % embedded_video_info['id'])
diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py
index 65c459fad..c733a48fa 100644
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@@ -75,7 +75,7 @@ class VineIE(InfoExtractor):
return {
'id': video_id,
'title': self._og_search_title(webpage),
- 'alt_title': self._og_search_description(webpage),
+ 'alt_title': self._og_search_description(webpage, default=None),
'description': data['description'],
'thumbnail': data['thumbnailUrl'],
'upload_date': unified_strdate(data['created']),
diff --git a/youtube_dl/extractor/zingmp3.py b/youtube_dl/extractor/zingmp3.py
index 1afbe68ed..7dc1e2f2b 100644
--- a/youtube_dl/extractor/zingmp3.py
+++ b/youtube_dl/extractor/zingmp3.py
@@ -4,12 +4,18 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..utils import ExtractorError
class ZingMp3BaseInfoExtractor(InfoExtractor):
- @staticmethod
- def _extract_item(item):
+ def _extract_item(self, item):
+ error_message = item.find('./errormessage').text
+ if error_message:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error_message),
+ expected=True)
+
title = item.find('./title').text.strip()
source = item.find('./source').text
extension = item.attrib['type']