diff options
41 files changed, 804 insertions, 303 deletions
| diff --git a/.travis.yml b/.travis.yml index e78a2fa76..cc21fae8f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,6 +5,7 @@ python:    - "3.2"    - "3.3"    - "3.4" +  - "3.5"  sudo: false  script: nosetests test --verbose  notifications: @@ -143,3 +143,4 @@ Shaun Walbridge  Lee Jenkins  Anssi Hannula  Lukáš Lalinský +Qijiang Fan @@ -281,6 +281,7 @@ The `-o` option allows users to indicate a template for the output file names. T   - `playlist`: The sequence will be replaced by the name or the id of the playlist that contains the video.   - `playlist_index`: The sequence will be replaced by the index of the video in the playlist padded with leading zeros according to the total length of the playlist.   - `format_id`: The sequence will be replaced by the format code specified by `--format`. + - `duration`: The sequence will be replaced by the length of the video in seconds.  The current default template is `%(title)s-%(id)s.%(ext)s`. diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py index cd26cc089..ce68f26f9 100755 --- a/devscripts/bash-completion.py +++ b/devscripts/bash-completion.py @@ -5,7 +5,7 @@ import os  from os.path import dirname as dirn  import sys -sys.path.append(dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))  import youtube_dl  BASH_COMPLETION_FILE = "youtube-dl.bash-completion" diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py index c2f238798..41629d87d 100755 --- a/devscripts/fish-completion.py +++ b/devscripts/fish-completion.py @@ -6,7 +6,7 @@ import os  from os.path import dirname as dirn  import sys -sys.path.append(dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))  import youtube_dl  from youtube_dl.utils import shell_quote diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py index d3ef5f0b5..503c1372f 100755 --- a/devscripts/gh-pages/update-sites.py +++ b/devscripts/gh-pages/update-sites.py @@ -6,7 +6,7 @@ import os  import textwrap  # We must be able to import youtube_dl -sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))  import youtube_dl diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py index 3df4385a6..8cb4a4638 100644 --- a/devscripts/make_supportedsites.py +++ b/devscripts/make_supportedsites.py @@ -9,7 +9,7 @@ import sys  # Import youtube_dl  ROOT_DIR = os.path.join(os.path.dirname(__file__), '..') -sys.path.append(ROOT_DIR) +sys.path.insert(0, ROOT_DIR)  import youtube_dl diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index 7ece37754..776e6556e 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -8,6 +8,35 @@ import re  ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))  README_FILE = os.path.join(ROOT_DIR, 'README.md') + +def filter_options(readme): +    ret = '' +    in_options = False +    for line in readme.split('\n'): +        if line.startswith('# '): +            if line[2:].startswith('OPTIONS'): +                in_options = True +            else: +                in_options = False + +        if in_options: +            if line.lstrip().startswith('-'): +                option, description = re.split(r'\s{2,}', line.lstrip()) +                split_option = option.split(' ') + +                if not split_option[-1].startswith('-'):  # metavar +                    option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]]) + +                # Pandoc's definition_lists. See http://pandoc.org/README.html +                # for more information. +                ret += '\n%s\n:   %s\n' % (option, description) +            else: +                ret += line.lstrip() + '\n' +        else: +            ret += line + '\n' + +    return ret +  with io.open(README_FILE, encoding='utf-8') as f:      readme = f.read() @@ -26,6 +55,8 @@ readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)  readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)  readme = PREFIX + readme +readme = filter_options(readme) +  if sys.version_info < (3, 0):      print(readme.encode('utf-8'))  else: diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py index f200f2c80..04728e8e2 100755 --- a/devscripts/zsh-completion.py +++ b/devscripts/zsh-completion.py @@ -5,7 +5,7 @@ import os  from os.path import dirname as dirn  import sys -sys.path.append(dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))  import youtube_dl  ZSH_COMPLETION_FILE = "youtube-dl.zsh" diff --git a/docs/supportedsites.md b/docs/supportedsites.md index ab153af6b..fa83b68ad 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -101,7 +101,7 @@   - **ComCarCoff**   - **ComedyCentral**   - **ComedyCentralShows**: The Daily Show / The Colbert Report - - **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED + - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED   - **Cracked**   - **Criterion**   - **CrooksAndLiars** @@ -158,7 +158,6 @@   - **faz.net**   - **fc2**   - **fernsehkritik.tv** - - **fernsehkritik.tv:postecke**   - **Firstpost**   - **FiveTV**   - **Flickr** @@ -208,7 +207,6 @@   - **hitbox**   - **hitbox:live**   - **HornBunny** - - **HostingBulk**   - **HotNewHipHop**   - **Howcast**   - **HowStuffWorks** diff --git a/test/helper.py b/test/helper.py index cb6eec8d9..bdd7acca4 100644 --- a/test/helper.py +++ b/test/helper.py @@ -89,66 +89,81 @@ def gettestcases(include_onlymatching=False):  md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() -def expect_info_dict(self, got_dict, expected_dict): +def expect_value(self, got, expected, field): +    if isinstance(expected, compat_str) and expected.startswith('re:'): +        match_str = expected[len('re:'):] +        match_rex = re.compile(match_str) + +        self.assertTrue( +            isinstance(got, compat_str), +            'Expected a %s object, but got %s for field %s' % ( +                compat_str.__name__, type(got).__name__, field)) +        self.assertTrue( +            match_rex.match(got), +            'field %s (value: %r) should match %r' % (field, got, match_str)) +    elif isinstance(expected, compat_str) and expected.startswith('startswith:'): +        start_str = expected[len('startswith:'):] +        self.assertTrue( +            isinstance(got, compat_str), +            'Expected a %s object, but got %s for field %s' % ( +                compat_str.__name__, type(got).__name__, field)) +        self.assertTrue( +            got.startswith(start_str), +            'field %s (value: %r) should start with %r' % (field, got, start_str)) +    elif isinstance(expected, compat_str) and expected.startswith('contains:'): +        contains_str = expected[len('contains:'):] +        self.assertTrue( +            isinstance(got, compat_str), +            'Expected a %s object, but got %s for field %s' % ( +                compat_str.__name__, type(got).__name__, field)) +        self.assertTrue( +            contains_str in got, +            'field %s (value: %r) should contain %r' % (field, got, contains_str)) +    elif isinstance(expected, type): +        self.assertTrue( +            isinstance(got, expected), +            'Expected type %r for field %s, but got value %r of type %r' % (expected, field, got, type(got))) +    elif isinstance(expected, dict) and isinstance(got, dict): +        expect_dict(self, got, expected) +    elif isinstance(expected, list) and isinstance(got, list): +        self.assertEqual( +            len(expected), len(got), +            'Expect a list of length %d, but got a list of length %d for field %s' % ( +                len(expected), len(got), field)) +        for index, (item_got, item_expected) in enumerate(zip(got, expected)): +            type_got = type(item_got) +            type_expected = type(item_expected) +            self.assertEqual( +                type_expected, type_got, +                'Type mismatch for list item at index %d for field %s, expected %r, got %r' % ( +                    index, field, type_expected, type_got)) +            expect_value(self, item_got, item_expected, field) +    else: +        if isinstance(expected, compat_str) and expected.startswith('md5:'): +            got = 'md5:' + md5(got) +        elif isinstance(expected, compat_str) and expected.startswith('mincount:'): +            self.assertTrue( +                isinstance(got, (list, dict)), +                'Expected field %s to be a list or a dict, but it is of type %s' % ( +                    field, type(got).__name__)) +            expected_num = int(expected.partition(':')[2]) +            assertGreaterEqual( +                self, len(got), expected_num, +                'Expected %d items in field %s, but only got %d' % (expected_num, field, len(got))) +            return +        self.assertEqual( +            expected, got, +            'Invalid value for field %s, expected %r, got %r' % (field, expected, got)) + + +def expect_dict(self, got_dict, expected_dict):      for info_field, expected in expected_dict.items(): -        if isinstance(expected, compat_str) and expected.startswith('re:'): -            got = got_dict.get(info_field) -            match_str = expected[len('re:'):] -            match_rex = re.compile(match_str) +        got = got_dict.get(info_field) +        expect_value(self, got, expected, info_field) -            self.assertTrue( -                isinstance(got, compat_str), -                'Expected a %s object, but got %s for field %s' % ( -                    compat_str.__name__, type(got).__name__, info_field)) -            self.assertTrue( -                match_rex.match(got), -                'field %s (value: %r) should match %r' % (info_field, got, match_str)) -        elif isinstance(expected, compat_str) and expected.startswith('startswith:'): -            got = got_dict.get(info_field) -            start_str = expected[len('startswith:'):] -            self.assertTrue( -                isinstance(got, compat_str), -                'Expected a %s object, but got %s for field %s' % ( -                    compat_str.__name__, type(got).__name__, info_field)) -            self.assertTrue( -                got.startswith(start_str), -                'field %s (value: %r) should start with %r' % (info_field, got, start_str)) -        elif isinstance(expected, compat_str) and expected.startswith('contains:'): -            got = got_dict.get(info_field) -            contains_str = expected[len('contains:'):] -            self.assertTrue( -                isinstance(got, compat_str), -                'Expected a %s object, but got %s for field %s' % ( -                    compat_str.__name__, type(got).__name__, info_field)) -            self.assertTrue( -                contains_str in got, -                'field %s (value: %r) should contain %r' % (info_field, got, contains_str)) -        elif isinstance(expected, type): -            got = got_dict.get(info_field) -            self.assertTrue(isinstance(got, expected), -                            'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got))) -        else: -            if isinstance(expected, compat_str) and expected.startswith('md5:'): -                got = 'md5:' + md5(got_dict.get(info_field)) -            elif isinstance(expected, compat_str) and expected.startswith('mincount:'): -                got = got_dict.get(info_field) -                self.assertTrue( -                    isinstance(got, (list, dict)), -                    'Expected field %s to be a list or a dict, but it is of type %s' % ( -                        info_field, type(got).__name__)) -                expected_num = int(expected.partition(':')[2]) -                assertGreaterEqual( -                    self, len(got), expected_num, -                    'Expected %d items in field %s, but only got %d' % ( -                        expected_num, info_field, len(got) -                    ) -                ) -                continue -            else: -                got = got_dict.get(info_field) -            self.assertEqual(expected, got, -                             'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) +def expect_info_dict(self, got_dict, expected_dict): +    expect_dict(self, got_dict, expected_dict)      # Check for the presence of mandatory fields      if got_dict.get('_type') not in ('playlist', 'multi_video'):          for key in ('id', 'url', 'title', 'ext'): @@ -1,5 +1,5 @@  [tox] -envlist = py26,py27,py33,py34 +envlist = py26,py27,py33,py34,py35  [testenv]  deps =     nose diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d65253882..adf70d658 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1232,13 +1232,20 @@ class YoutubeDL(object):              except (ValueError, OverflowError, OSError):                  pass +        subtitles = info_dict.get('subtitles') +        if subtitles: +            for _, subtitle in subtitles.items(): +                for subtitle_format in subtitle: +                    if 'ext' not in subtitle_format: +                        subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() +          if self.params.get('listsubtitles', False):              if 'automatic_captions' in info_dict:                  self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions') -            self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles') +            self.list_subtitles(info_dict['id'], subtitles, 'subtitles')              return          info_dict['requested_subtitles'] = self.process_subtitles( -            info_dict['id'], info_dict.get('subtitles'), +            info_dict['id'], subtitles,              info_dict.get('automatic_captions'))          # We now pick which formats have to be downloaded diff --git a/youtube_dl/__main__.py b/youtube_dl/__main__.py index 65a0f891c..42a0f8c6f 100755 --- a/youtube_dl/__main__.py +++ b/youtube_dl/__main__.py @@ -11,7 +11,7 @@ if __package__ is None and not hasattr(sys, "frozen"):      # direct call of __main__.py      import os.path      path = os.path.realpath(os.path.abspath(__file__)) -    sys.path.append(os.path.dirname(os.path.dirname(path))) +    sys.path.insert(0, os.path.dirname(os.path.dirname(path)))  import youtube_dl diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 1ff42d94b..c36c9c23f 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -416,7 +416,7 @@ if hasattr(shutil, 'get_terminal_size'):  # Python >= 3.3  else:      _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines']) -    def compat_get_terminal_size(): +    def compat_get_terminal_size(fallback=(80, 24)):          columns = compat_getenv('COLUMNS', None)          if columns:              columns = int(columns) @@ -428,14 +428,20 @@ else:          else:              lines = None -        try: -            sp = subprocess.Popen( -                ['stty', 'size'], -                stdout=subprocess.PIPE, stderr=subprocess.PIPE) -            out, err = sp.communicate() -            lines, columns = map(int, out.split()) -        except Exception: -            pass +        if columns <= 0 or lines <= 0: +            try: +                sp = subprocess.Popen( +                    ['stty', 'size'], +                    stdout=subprocess.PIPE, stderr=subprocess.PIPE) +                out, err = sp.communicate() +                _columns, _lines = map(int, out.split()) +            except Exception: +                _columns, _lines = _terminal_size(*fallback) + +            if columns <= 0: +                columns = _columns +            if lines <= 0: +                lines = _lines          return _terminal_size(columns, lines)  try: diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7272859db..3ace1cc2c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -158,6 +158,7 @@ from .eroprofile import EroProfileIE  from .escapist import EscapistIE  from .espn import ESPNIE  from .esri import EsriVideoIE +from .europa import EuropaIE  from .everyonesmixtape import EveryonesMixtapeIE  from .exfm import ExfmIE  from .expotv import ExpoTVIE @@ -169,10 +170,7 @@ from .firstpost import FirstpostIE  from .firsttv import FirstTVIE  from .fivemin import FiveMinIE  from .fivetv import FiveTVIE -from .fktv import ( -    FKTVIE, -    FKTVPosteckeIE, -) +from .fktv import FKTVIE  from .flickr import FlickrIE  from .folketinget import FolketingetIE  from .footyroom import FootyRoomIE @@ -297,6 +295,11 @@ from .lifenews import (      LifeNewsIE,      LifeEmbedIE,  ) +from .limelight import ( +    LimelightMediaIE, +    LimelightChannelIE, +    LimelightChannelListIE, +)  from .liveleak import LiveLeakIE  from .livestream import (      LivestreamIE, diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 4327c2f61..27de07587 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -5,6 +5,7 @@ import re  from .common import InfoExtractor  from ..utils import ( +    determine_ext,      ExtractorError,      float_or_none,      xpath_text, @@ -123,7 +124,6 @@ class AdultSwimIE(InfoExtractor):          else:              collections = bootstrapped_data['show']['collections']              collection, video_info = self.find_collection_containing_video(collections, episode_path) -              # Video wasn't found in the collections, let's try `slugged_video`.              if video_info is None:                  if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path: @@ -133,7 +133,9 @@ class AdultSwimIE(InfoExtractor):              show = bootstrapped_data['show']              show_title = show['title'] -            segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']] +            stream = video_info.get('stream') +            clips = [stream] if stream else video_info['clips'] +            segment_ids = [clip['videoPlaybackID'] for clip in clips]          episode_id = video_info['id']          episode_title = video_info['title'] @@ -142,7 +144,7 @@ class AdultSwimIE(InfoExtractor):          entries = []          for part_num, segment_id in enumerate(segment_ids): -            segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=mobile' % segment_id +            segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id              segment_title = '%s - %s' % (show_title, episode_title)              if len(segment_ids) > 1: @@ -158,17 +160,30 @@ class AdultSwimIE(InfoExtractor):              formats = []              file_els = idoc.findall('.//files/file') or idoc.findall('./files/file') +            unique_urls = [] +            unique_file_els = []              for file_el in file_els: +                media_url = file_el.text +                if not media_url or determine_ext(media_url) == 'f4m': +                    continue +                if file_el.text not in unique_urls: +                    unique_urls.append(file_el.text) +                    unique_file_els.append(file_el) + +            for file_el in unique_file_els:                  bitrate = file_el.attrib.get('bitrate')                  ftype = file_el.attrib.get('type') - -                formats.append({ -                    'format_id': '%s_%s' % (bitrate, ftype), -                    'url': file_el.text.strip(), -                    # The bitrate may not be a number (for example: 'iphone') -                    'tbr': int(bitrate) if bitrate.isdigit() else None, -                    'quality': 1 if ftype == 'hd' else -1 -                }) +                media_url = file_el.text +                if determine_ext(media_url) == 'm3u8': +                    formats.extend(self._extract_m3u8_formats( +                        media_url, segment_title, 'mp4', 'm3u8_native', preference=0, m3u8_id='hls')) +                else: +                    formats.append({ +                        'format_id': '%s_%s' % (bitrate, ftype), +                        'url': file_el.text.strip(), +                        # The bitrate may not be a number (for example: 'iphone') +                        'tbr': int(bitrate) if bitrate.isdigit() else None, +                    })              self._sort_formats(formats) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 42526357a..cc2f6fed2 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -21,6 +21,9 @@ class BBCCoUkIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'      _MEDIASELECTOR_URLS = [ +        # Provides HQ HLS streams with even better quality that pc mediaset but fails +        # with geolocation in some cases when it's even not geo restricted at all (e.g. +        # http://www.bbc.co.uk/programmes/b06bp7lf)          'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',          'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',      ] @@ -154,6 +157,21 @@ class BBCCoUkIE(InfoExtractor):              },              'skip': 'geolocation',          }, { +            # iptv-all mediaset fails with geolocation however there is no geo restriction +            # for this programme at all +            'url': 'http://www.bbc.co.uk/programmes/b06bp7lf', +            'info_dict': { +                'id': 'b06bp7kf', +                'ext': 'flv', +                'title': "Annie Mac's Friday Night, B.Traits sits in for Annie", +                'description': 'B.Traits sits in for Annie Mac with a Mini-Mix from Disclosure.', +                'duration': 10800, +            }, +            'params': { +                # rtmp download +                'skip_download': True, +            }, +        }, {              'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',              'only_matching': True,          }, { @@ -294,7 +312,7 @@ class BBCCoUkIE(InfoExtractor):                  return self._download_media_selector_url(                      mediaselector_url % programme_id, programme_id)              except BBCCoUkIE.MediaSelectionError as e: -                if e.id == 'notukerror': +                if e.id in ('notukerror', 'geolocation'):                      last_exception = e                      continue                  self._raise_extractor_error(e) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 91ebb0ce5..3e4bd10b6 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -151,12 +151,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):          mobj = re.match(self._VALID_URL, url)          if mobj.group('shortname'): -            if mobj.group('shortname') in ('tds', 'thedailyshow'): -                url = 'http://thedailyshow.cc.com/full-episodes/' -            else: -                url = 'http://thecolbertreport.cc.com/full-episodes/' -            mobj = re.match(self._VALID_URL, url, re.VERBOSE) -            assert mobj is not None +            return self.url_result('http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes')          if mobj.group('clip'):              if mobj.group('videotitle'): diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1e7db8a9b..dbae75406 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -39,6 +39,7 @@ from ..utils import (      RegexNotFoundError,      sanitize_filename,      unescapeHTML, +    unified_strdate,      url_basename,      xpath_text,      xpath_with_ns, @@ -152,6 +153,7 @@ class InfoExtractor(object):      description:    Full video description.      uploader:       Full name of the video uploader.      creator:        The main artist who created the video. +    release_date:   The date (YYYYMMDD) when the video was released.      timestamp:      UNIX timestamp of the moment the video became available.      upload_date:    Video upload date (YYYYMMDD).                      If not explicitly set, calculated from timestamp. @@ -163,6 +165,7 @@ class InfoExtractor(object):                      with the "ext" entry and one of:                          * "data": The subtitles file contents                          * "url": A URL pointing to the subtitles file +                    "ext" will be calculated from URL if missing      automatic_captions: Like 'subtitles', used by the YoutubeIE for                      automatically generated captions      duration:       Length of the video in seconds, as an integer. @@ -868,13 +871,18 @@ class InfoExtractor(object):          time.sleep(timeout)      def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None, -                             transform_source=lambda s: fix_xml_ampersands(s).strip()): +                             transform_source=lambda s: fix_xml_ampersands(s).strip(), +                             fatal=True):          manifest = self._download_xml(              manifest_url, video_id, 'Downloading f4m manifest',              'Unable to download f4m manifest',              # Some manifests may be malformed, e.g. prosiebensat1 generated manifests              # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244) -            transform_source=transform_source) +            transform_source=transform_source, +            fatal=fatal) + +        if manifest is False: +            return manifest          formats = []          manifest_version = '1.0' @@ -895,7 +903,10 @@ class InfoExtractor(object):                  # may differ leading to inability to resolve the format by requested                  # bitrate in f4m downloader                  if determine_ext(manifest_url) == 'f4m': -                    formats.extend(self._extract_f4m_formats(manifest_url, video_id, preference, f4m_id)) +                    f4m_formats = self._extract_f4m_formats( +                        manifest_url, video_id, preference, f4m_id, fatal=fatal) +                    if f4m_formats: +                        formats.extend(f4m_formats)                      continue              tbr = int_or_none(media_el.attrib.get('bitrate'))              formats.append({ @@ -1043,6 +1054,7 @@ class InfoExtractor(object):          video_id = os.path.splitext(url_basename(smil_url))[0]          title = None          description = None +        upload_date = None          for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):              name = meta.attrib.get('name')              content = meta.attrib.get('content') @@ -1052,11 +1064,22 @@ class InfoExtractor(object):                  title = content              elif not description and name in ('description', 'abstract'):                  description = content +            elif not upload_date and name == 'date': +                upload_date = unified_strdate(content) + +        thumbnails = [{ +            'id': image.get('type'), +            'url': image.get('src'), +            'width': int_or_none(image.get('width')), +            'height': int_or_none(image.get('height')), +        } for image in smil.findall(self._xpath_ns('.//image', namespace)) if image.get('src')]          return {              'id': video_id,              'title': title or video_id,              'description': description, +            'upload_date': upload_date, +            'thumbnails': thumbnails,              'formats': formats,              'subtitles': subtitles,          } @@ -1083,7 +1106,7 @@ class InfoExtractor(object):              if not src:                  continue -            bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) +            bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)              filesize = int_or_none(video.get('size') or video.get('fileSize'))              width = int_or_none(video.get('width'))              height = int_or_none(video.get('height')) @@ -1115,8 +1138,10 @@ class InfoExtractor(object):              src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)              if proto == 'm3u8' or src_ext == 'm3u8': -                formats.extend(self._extract_m3u8_formats( -                    src_url, video_id, ext or 'mp4', m3u8_id='hls')) +                m3u8_formats = self._extract_m3u8_formats( +                    src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False) +                if m3u8_formats: +                    formats.extend(m3u8_formats)                  continue              if src_ext == 'f4m': @@ -1128,10 +1153,12 @@ class InfoExtractor(object):                      }                  f4m_url += '&' if '?' in f4m_url else '?'                  f4m_url += compat_urllib_parse.urlencode(f4m_params) -                formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds')) +                f4m_formats = self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False) +                if f4m_formats: +                    formats.extend(f4m_formats)                  continue -            if src_url.startswith('http'): +            if src_url.startswith('http') and self._is_valid_url(src, video_id):                  http_count += 1                  formats.append({                      'url': src_url, diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index d6949ca28..6f92ae2ed 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -11,6 +11,7 @@ from ..compat import (  )  from ..utils import (      orderedSet, +    remove_end,  ) @@ -44,12 +45,12 @@ class CondeNastIE(InfoExtractor):          'wmagazine': 'W Magazine',      } -    _VALID_URL = r'http://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys()) +    _VALID_URL = r'http://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())      IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) -    EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed)/.+?' % '|'.join(_SITES.keys()) +    EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed(?:js)?)/.+?' % '|'.join(_SITES.keys()) -    _TEST = { +    _TESTS = [{          'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',          'md5': '1921f713ed48aabd715691f774c451f7',          'info_dict': { @@ -58,7 +59,16 @@ class CondeNastIE(InfoExtractor):              'title': '3D Printed Speakers Lit With LED',              'description': 'Check out these beautiful 3D printed LED speakers.  You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',          } -    } +    }, { +        # JS embed +        'url': 'http://player.cnevids.com/embedjs/55f9cf8b61646d1acf00000c/5511d76261646d5566020000.js', +        'md5': 'f1a6f9cafb7083bab74a710f65d08999', +        'info_dict': { +            'id': '55f9cf8b61646d1acf00000c', +            'ext': 'mp4', +            'title': '3D printed TSA Travel Sentry keys really do open TSA locks', +        } +    }]      def _extract_series(self, url, webpage):          title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>', @@ -122,6 +132,13 @@ class CondeNastIE(InfoExtractor):          url_type = mobj.group('type')          item_id = mobj.group('id') +        # Convert JS embed to regular embed +        if url_type == 'embedjs': +            parsed_url = compat_urlparse.urlparse(url) +            url = compat_urlparse.urlunparse(parsed_url._replace( +                path=remove_end(parsed_url.path, '.js').replace('/embedjs/', '/embed/'))) +            url_type = 'embed' +          self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site])          webpage = self._download_webpage(url, item_id) diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index a1ee51568..e529b9b96 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -21,7 +21,7 @@ class EaglePlatformIE(InfoExtractor):      _TESTS = [{          # http://lenta.ru/news/2015/03/06/navalny/          'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201', -        'md5': '0b7994faa2bd5c0f69a3db6db28d078d', +        'md5': '70f5187fb620f2c1d503b3b22fd4efe3',          'info_dict': {              'id': '227304',              'ext': 'mp4', @@ -36,7 +36,7 @@ class EaglePlatformIE(InfoExtractor):          # http://muz-tv.ru/play/7129/          # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true          'url': 'eagleplatform:media.clipyou.ru:12820', -        'md5': '6c2ebeab03b739597ce8d86339d5a905', +        'md5': '90b26344ba442c8e44aa4cf8f301164a',          'info_dict': {              'id': '12820',              'ext': 'mp4', @@ -48,7 +48,8 @@ class EaglePlatformIE(InfoExtractor):          'skip': 'Georestricted',      }] -    def _handle_error(self, response): +    @staticmethod +    def _handle_error(response):          status = int_or_none(response.get('status', 200))          if status != 200:              raise ExtractorError(' '.join(response['errors']), expected=True) @@ -58,6 +59,9 @@ class EaglePlatformIE(InfoExtractor):          self._handle_error(response)          return response +    def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'): +        return self._download_json(url_or_request, video_id, note)['data'][0] +      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id') @@ -69,7 +73,7 @@ class EaglePlatformIE(InfoExtractor):          title = media['title']          description = media.get('description') -        thumbnail = media.get('snapshot') +        thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:')          duration = int_or_none(media.get('duration'))          view_count = int_or_none(media.get('views')) @@ -78,13 +82,20 @@ class EaglePlatformIE(InfoExtractor):          if age_restriction:              age_limit = 0 if age_restriction == 'allow_all' else 18 -        m3u8_data = self._download_json( -            self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:'), -            video_id, 'Downloading m3u8 JSON') +        secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:') +        m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')          formats = self._extract_m3u8_formats( -            m3u8_data['data'][0], video_id, +            m3u8_url, video_id,              'mp4', entry_protocol='m3u8_native') + +        mp4_url = self._get_video_url( +            # Secure mp4 URL is constructed according to Player.prototype.mp4 from +            # http://lentaru.media.eagleplatform.com/player/player.js +            re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8), +            video_id, 'Downloading mp4 JSON') +        formats.append({'url': mp4_url, 'format_id': 'mp4'}) +          self._sort_formats(formats)          return { diff --git a/youtube_dl/extractor/engadget.py b/youtube_dl/extractor/engadget.py index 4ea37ebd9..e4180701d 100644 --- a/youtube_dl/extractor/engadget.py +++ b/youtube_dl/extractor/engadget.py @@ -10,7 +10,7 @@ from ..utils import (  class EngadgetIE(InfoExtractor):      _VALID_URL = r'''(?x)https?://www.engadget.com/ -        (?:video/5min/(?P<id>\d+)| +        (?:video(?:/5min)?/(?P<id>\d+)|              [\d/]+/.*?)          ''' diff --git a/youtube_dl/extractor/europa.py b/youtube_dl/extractor/europa.py new file mode 100644 index 000000000..adc43919e --- /dev/null +++ b/youtube_dl/extractor/europa.py @@ -0,0 +1,93 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( +    int_or_none, +    orderedSet, +    parse_duration, +    qualities, +    unified_strdate, +    xpath_text +) + + +class EuropaIE(InfoExtractor): +    _VALID_URL = r'https?://ec\.europa\.eu/avservices/(?:video/player|audio/audioDetails)\.cfm\?.*?\bref=(?P<id>[A-Za-z0-9-]+)' +    _TESTS = [{ +        'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758', +        'md5': '574f080699ddd1e19a675b0ddf010371', +        'info_dict': { +            'id': 'I107758', +            'ext': 'mp4', +            'title': 'TRADE - Wikileaks on TTIP', +            'description': 'NEW  LIVE EC Midday press briefing of 11/08/2015', +            'thumbnail': 're:^https?://.*\.jpg$', +            'upload_date': '20150811', +            'duration': 34, +            'view_count': int, +            'formats': 'mincount:3', +        } +    }, { +        'url': 'http://ec.europa.eu/avservices/video/player.cfm?sitelang=en&ref=I107786', +        'only_matching': True, +    }, { +        'url': 'http://ec.europa.eu/avservices/audio/audioDetails.cfm?ref=I-109295&sitelang=en', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        playlist = self._download_xml( +            'http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=%s' % video_id, video_id) + +        def get_item(type_, preference): +            items = {} +            for item in playlist.findall('./info/%s/item' % type_): +                lang, label = xpath_text(item, 'lg', default=None), xpath_text(item, 'label', default=None) +                if lang and label: +                    items[lang] = label.strip() +            for p in preference: +                if items.get(p): +                    return items[p] + +        query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) +        preferred_lang = query.get('sitelang', ('en', ))[0] + +        preferred_langs = orderedSet((preferred_lang, 'en', 'int')) + +        title = get_item('title', preferred_langs) or video_id +        description = get_item('description', preferred_langs) +        thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail') +        upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date')) +        duration = parse_duration(xpath_text(playlist, './info/duration', 'duration')) +        view_count = int_or_none(xpath_text(playlist, './info/views', 'views')) + +        language_preference = qualities(preferred_langs[::-1]) + +        formats = [] +        for file_ in playlist.findall('./files/file'): +            video_url = xpath_text(file_, './url') +            if not video_url: +                continue +            lang = xpath_text(file_, './lg') +            formats.append({ +                'url': video_url, +                'format_id': lang, +                'format_note': xpath_text(file_, './lglabel'), +                'language_preference': language_preference(lang) +            }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnmail, +            'upload_date': upload_date, +            'duration': duration, +            'view_count': view_count, +            'formats': formats +        } diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py index 190d9f9ad..40ea27895 100644 --- a/youtube_dl/extractor/fktv.py +++ b/youtube_dl/extractor/fktv.py @@ -1,13 +1,12 @@  from __future__ import unicode_literals  import re -import random -import json  from .common import InfoExtractor  from ..utils import ( -    get_element_by_id,      clean_html, +    determine_ext, +    ExtractorError,  ) @@ -17,66 +16,40 @@ class FKTVIE(InfoExtractor):      _TEST = {          'url': 'http://fernsehkritik.tv/folge-1', +        'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79',          'info_dict': { -            'id': '00011', -            'ext': 'flv', +            'id': '1', +            'ext': 'mp4',              'title': 'Folge 1 vom 10. April 2007', -            'description': 'md5:fb4818139c7cfe6907d4b83412a6864f', +            'thumbnail': 're:^https?://.*\.jpg$',          },      }      def _real_extract(self, url): -        episode = int(self._match_id(url)) - -        video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%s.jpg' % episode -        start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/Start' % episode, -                                               episode) -        playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage, -                                      'playlist', flags=re.DOTALL) -        files = json.loads(re.sub('{[^{}]*?}', '{}', playlist)) - -        videos = [] -        for i, _ in enumerate(files, 1): -            video_id = '%04d%d' % (episode, i) -            video_url = 'http://fernsehkritik.tv/js/directme.php?file=%s%s.flv' % (episode, '' if i == 1 else '-%d' % i) -            videos.append({ -                'ext': 'flv', -                'id': video_id, -                'url': video_url, -                'title': clean_html(get_element_by_id('eptitle', start_webpage)), -                'description': clean_html(get_element_by_id('contentlist', start_webpage)), -                'thumbnail': video_thumbnail -            }) -        return { -            '_type': 'multi_video', -            'entries': videos, -            'id': 'folge-%s' % episode, -        } - - -class FKTVPosteckeIE(InfoExtractor): -    IE_NAME = 'fernsehkritik.tv:postecke' -    _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/inline-video/postecke\.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)' -    _TEST = { -        'url': 'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120', -        'md5': '262f0adbac80317412f7e57b4808e5c4', -        'info_dict': { -            'id': '0120', -            'ext': 'flv', -            'title': 'Postecke 120', -        } -    } - -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        episode = int(mobj.group('ep')) - -        server = random.randint(2, 4) -        video_id = '%04d' % episode -        video_url = 'http://dl%d.fernsehkritik.tv/postecke/postecke%d.flv' % (server, episode) -        video_title = 'Postecke %d' % episode +        episode = self._match_id(url) + +        webpage = self._download_webpage( +            'http://fernsehkritik.tv/folge-%s/play' % episode, episode) +        title = clean_html(self._html_search_regex( +            '<h3>([^<]+)</h3>', webpage, 'title')) +        matches = re.search( +            r'(?s)<video(?:(?!poster)[^>])+(?:poster="([^"]+)")?[^>]*>(.*)</video>', +            webpage) +        if matches is None: +            raise ExtractorError('Unable to extract the video') + +        poster, sources = matches.groups() +        if poster is None: +            self.report_warning('unable to extract thumbnail') + +        urls = re.findall(r'<source[^>]+src="([^"]+)"', sources) +        formats = [{ +            'url': furl, +            'format_id': determine_ext(furl), +        } for furl in urls]          return { -            'id': video_id, -            'url': video_url, -            'title': video_title, +            'id': episode, +            'title': title, +            'formats': formats, +            'thumbnail': poster,          } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8881a8a23..ca5fbafb2 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -50,6 +50,7 @@ from .dailymotion import DailymotionCloudIE  from .onionstudios import OnionStudiosIE  from .snagfilms import SnagFilmsEmbedIE  from .screenwavemedia import ScreenwaveMediaIE +from .mtv import MTVServicesEmbeddedIE  class GenericIE(InfoExtractor): @@ -1611,12 +1612,9 @@ class GenericIE(InfoExtractor):              return self.url_result(url, ie='Vulture')          # Look for embedded mtvservices player -        mobj = re.search( -            r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"', -            webpage) -        if mobj is not None: -            url = unescapeHTML(mobj.group('url')) -            return self.url_result(url, ie='MTVServicesEmbedded') +        mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage) +        if mtvservices_url: +            return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')          # Look for embedded yahoo player          mobj = re.search( @@ -1655,7 +1653,7 @@ class GenericIE(InfoExtractor):              return self.url_result(mobj.group('url'), 'MLB')          mobj = re.search( -            r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL, +            r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,              webpage)          if mobj is not None:              return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast') diff --git a/youtube_dl/extractor/keek.py b/youtube_dl/extractor/keek.py index c0956ba09..94a03d277 100644 --- a/youtube_dl/extractor/keek.py +++ b/youtube_dl/extractor/keek.py @@ -1,46 +1,39 @@ +# coding: utf-8  from __future__ import unicode_literals  from .common import InfoExtractor  class KeekIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<id>\w+)' +    _VALID_URL = r'https?://(?:www\.)?keek\.com/keek/(?P<id>\w+)'      IE_NAME = 'keek'      _TEST = { -        'url': 'https://www.keek.com/ytdl/keeks/NODfbab', -        'md5': '09c5c109067536c1cec8bac8c21fea05', +        'url': 'https://www.keek.com/keek/NODfbab', +        'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83',          'info_dict': {              'id': 'NODfbab',              'ext': 'mp4', -            'uploader': 'youtube-dl project', -            'uploader_id': 'ytdl', -            'title': 'test chars: "\'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de .', +            'title': 'md5:35d42050a3ece241d5ddd7fdcc6fd896', +            'uploader': 'ytdl', +            'uploader_id': 'eGT5bab',          },      }      def _real_extract(self, url):          video_id = self._match_id(url) -        video_url = 'http://cdn.keek.com/keek/video/%s' % video_id -        thumbnail = 'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id          webpage = self._download_webpage(url, video_id) -        raw_desc = self._html_search_meta('description', webpage) -        if raw_desc: -            uploader = self._html_search_regex( -                r'Watch (.*?)\s+\(', raw_desc, 'uploader', fatal=False) -            uploader_id = self._html_search_regex( -                r'Watch .*?\(@(.+?)\)', raw_desc, 'uploader_id', fatal=False) -        else: -            uploader = None -            uploader_id = None -          return {              'id': video_id, -            'url': video_url, +            'url': self._og_search_video_url(webpage),              'ext': 'mp4', -            'title': self._og_search_title(webpage), -            'thumbnail': thumbnail, -            'uploader': uploader, -            'uploader_id': uploader_id, +            'title': self._og_search_description(webpage).strip(), +            'thumbnail': self._og_search_thumbnail(webpage), +            'uploader': self._search_regex( +                r'data-username=(["\'])(?P<uploader>.+?)\1', webpage, +                'uploader', fatal=False, group='uploader'), +            'uploader_id': self._search_regex( +                r'data-user-id=(["\'])(?P<uploader_id>.+?)\1', webpage, +                'uploader id', fatal=False, group='uploader_id'),          } diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index fa233377d..0c8ed5d07 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -57,6 +57,7 @@ class KuwoIE(KuwoBaseIE):              'upload_date': '20080122',              'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c'          }, +        'skip': 'this song has been offline because of copyright issues',      }, {          'url': 'http://www.kuwo.cn/yinyue/6446136/',          'info_dict': { @@ -76,9 +77,11 @@ class KuwoIE(KuwoBaseIE):          webpage = self._download_webpage(              url, song_id, note='Download song detail info',              errnote='Unable to get song detail info') +        if '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage: +            raise ExtractorError('this song has been offline because of copyright issues', expected=True)          song_name = self._html_search_regex( -            r'<h1[^>]+title="([^"]+)">', webpage, 'song name') +            r'(?s)class="(?:[^"\s]+\s+)*title(?:\s+[^"\s]+)*".*?<h1[^>]+title="([^"]+)"', webpage, 'song name')          singer_name = self._html_search_regex(              r'<div[^>]+class="s_img">\s*<a[^>]+title="([^>]+)"',              webpage, 'singer name', fatal=False) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py new file mode 100644 index 000000000..fb03dd527 --- /dev/null +++ b/youtube_dl/extractor/limelight.py @@ -0,0 +1,229 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    determine_ext, +    float_or_none, +    int_or_none, +) + + +class LimelightBaseIE(InfoExtractor): +    _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s' +    _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json' + +    def _call_playlist_service(self, item_id, method, fatal=True): +        return self._download_json( +            self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method), +            item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal) + +    def _call_api(self, organization_id, item_id, method): +        return self._download_json( +            self._API_URL % (organization_id, self._API_PATH, item_id, method), +            item_id, 'Downloading API %s JSON' % method) + +    def _extract(self, item_id, pc_method, mobile_method, meta_method): +        pc = self._call_playlist_service(item_id, pc_method) +        metadata = self._call_api(pc['orgId'], item_id, meta_method) +        mobile = self._call_playlist_service(item_id, mobile_method, fatal=False) +        return pc, mobile, metadata + +    def _extract_info(self, streams, mobile_urls, properties): +        video_id = properties['media_id'] +        formats = [] + +        for stream in streams: +            stream_url = stream.get('url') +            if not stream_url: +                continue +            if '.f4m' in stream_url: +                formats.extend(self._extract_f4m_formats(stream_url, video_id)) +            else: +                fmt = { +                    'url': stream_url, +                    'abr': float_or_none(stream.get('audioBitRate')), +                    'vbr': float_or_none(stream.get('videoBitRate')), +                    'fps': float_or_none(stream.get('videoFrameRate')), +                    'width': int_or_none(stream.get('videoWidthInPixels')), +                    'height': int_or_none(stream.get('videoHeightInPixels')), +                    'ext': determine_ext(stream_url) +                } +                rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url) +                if rtmp: +                    format_id = 'rtmp' +                    if stream.get('videoBitRate'): +                        format_id += '-%d' % int_or_none(stream['videoBitRate']) +                    fmt.update({ +                        'url': rtmp.group('url'), +                        'play_path': rtmp.group('playpath'), +                        'app': rtmp.group('app'), +                        'ext': 'flv', +                        'format_id': format_id, +                    }) +                formats.append(fmt) + +        for mobile_url in mobile_urls: +            media_url = mobile_url.get('mobileUrl') +            if not media_url: +                continue +            format_id = mobile_url.get('targetMediaPlatform') +            if determine_ext(media_url) == 'm3u8': +                formats.extend(self._extract_m3u8_formats( +                    media_url, video_id, 'mp4', entry_protocol='m3u8_native', +                    preference=-1, m3u8_id=format_id)) +            else: +                formats.append({ +                    'url': media_url, +                    'format_id': format_id, +                    'preference': -1, +                }) + +        self._sort_formats(formats) + +        title = properties['title'] +        description = properties.get('description') +        timestamp = int_or_none(properties.get('publish_date') or properties.get('create_date')) +        duration = float_or_none(properties.get('duration_in_milliseconds'), 1000) +        filesize = int_or_none(properties.get('total_storage_in_bytes')) +        categories = [properties.get('category')] +        tags = properties.get('tags', []) +        thumbnails = [{ +            'url': thumbnail['url'], +            'width': int_or_none(thumbnail.get('width')), +            'height': int_or_none(thumbnail.get('height')), +        } for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')] + +        subtitles = {} +        for caption in properties.get('captions', {}): +            lang = caption.get('language_code') +            subtitles_url = caption.get('url') +            if lang and subtitles_url: +                subtitles[lang] = [{ +                    'url': subtitles_url, +                }] + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'formats': formats, +            'timestamp': timestamp, +            'duration': duration, +            'filesize': filesize, +            'categories': categories, +            'tags': tags, +            'thumbnails': thumbnails, +            'subtitles': subtitles, +        } + + +class LimelightMediaIE(LimelightBaseIE): +    IE_NAME = 'limelight' +    _VALID_URL = r'(?:limelight:media:|http://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})' +    _TESTS = [{ +        'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86', +        'info_dict': { +            'id': '3ffd040b522b4485b6d84effc750cd86', +            'ext': 'flv', +            'title': 'HaP and the HB Prince Trailer', +            'description': 'md5:8005b944181778e313d95c1237ddb640', +            'thumbnail': 're:^https?://.*\.jpeg$', +            'duration': 144.23, +            'timestamp': 1244136834, +            'upload_date': '20090604', +        }, +        'params': { +            # rtmp download +            'skip_download': True, +        }, +    }, { +        # video with subtitles +        'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335', +        'info_dict': { +            'id': 'a3e00274d4564ec4a9b29b9466432335', +            'ext': 'flv', +            'title': '3Play Media Overview Video', +            'description': '', +            'thumbnail': 're:^https?://.*\.jpeg$', +            'duration': 78.101, +            'timestamp': 1338929955, +            'upload_date': '20120605', +            'subtitles': 'mincount:9', +        }, +        'params': { +            # rtmp download +            'skip_download': True, +        }, +    }] +    _PLAYLIST_SERVICE_PATH = 'media' +    _API_PATH = 'media' + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        pc, mobile, metadata = self._extract( +            video_id, 'getPlaylistByMediaId', 'getMobilePlaylistByMediaId', 'properties') + +        return self._extract_info( +            pc['playlistItems'][0].get('streams', []), +            mobile['mediaList'][0].get('mobileUrls', []) if mobile else [], +            metadata) + + +class LimelightChannelIE(LimelightBaseIE): +    IE_NAME = 'limelight:channel' +    _VALID_URL = r'(?:limelight:channel:|http://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})' +    _TEST = { +        'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082', +        'info_dict': { +            'id': 'ab6a524c379342f9b23642917020c082', +            'title': 'Javascript Sample Code', +        }, +        'playlist_mincount': 3, +    } +    _PLAYLIST_SERVICE_PATH = 'channel' +    _API_PATH = 'channels' + +    def _real_extract(self, url): +        channel_id = self._match_id(url) + +        pc, mobile, medias = self._extract( +            channel_id, 'getPlaylistByChannelId', +            'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', 'media') + +        entries = [ +            self._extract_info( +                pc['playlistItems'][i].get('streams', []), +                mobile['mediaList'][i].get('mobileUrls', []) if mobile else [], +                medias['media_list'][i]) +            for i in range(len(medias['media_list']))] + +        return self.playlist_result(entries, channel_id, pc['title']) + + +class LimelightChannelListIE(LimelightBaseIE): +    IE_NAME = 'limelight:channel_list' +    _VALID_URL = r'(?:limelight:channel_list:|http://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})' +    _TEST = { +        'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b', +        'info_dict': { +            'id': '301b117890c4465c8179ede21fd92e2b', +            'title': 'Website - Hero Player', +        }, +        'playlist_mincount': 2, +    } +    _PLAYLIST_SERVICE_PATH = 'channel_list' + +    def _real_extract(self, url): +        channel_list_id = self._match_id(url) + +        channel_list = self._call_playlist_service(channel_list_id, 'getMobileChannelListById') + +        entries = [ +            self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel') +            for channel in channel_list['channelList']] + +        return self.playlist_result(entries, channel_list_id, channel_list['title']) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index a597714e9..302c9bf35 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -200,7 +200,13 @@ class MTVServicesInfoExtractor(InfoExtractor):          if mgid is None or ':' not in mgid:              mgid = self._search_regex(                  [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], -                webpage, 'mgid') +                webpage, 'mgid', default=None) + +        if not mgid: +            sm4_embed = self._html_search_meta( +                'sm4:video:embed', webpage, 'sm4 embed', default='') +            mgid = self._search_regex( +                r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid')          videos_info = self._get_videos_info(mgid)          return videos_info @@ -222,6 +228,13 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):          },      } +    @staticmethod +    def _extract_url(webpage): +        mobj = re.search( +            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media.mtvnservices.com/embed/.+?)\1', webpage) +        if mobj: +            return mobj.group('url') +      def _get_feed_url(self, uri):          video_id = self._id_from_uri(uri)          site_id = uri.replace(video_id, '') diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 925967753..1f5fc2145 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -10,7 +10,6 @@ from ..compat import (  )  from ..utils import (      ExtractorError, -    clean_html,  ) @@ -46,11 +45,11 @@ class NaverIE(InfoExtractor):          m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',                           webpage)          if m_id is None: -            m_error = re.search( -                r'(?s)<div class="(?:nation_error|nation_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>', -                webpage) -            if m_error: -                raise ExtractorError(clean_html(m_error.group('msg')), expected=True) +            error = self._html_search_regex( +                r'(?s)<div class="(?:nation_error|nation_box|error_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>', +                webpage, 'error', default=None) +            if error: +                raise ExtractorError(error, expected=True)              raise ExtractorError('couldn\'t extract vid and key')          vid = m_id.group(1)          key = m_id.group(2) diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py index 55dc6107d..200874d68 100644 --- a/youtube_dl/extractor/nfl.py +++ b/youtube_dl/extractor/nfl.py @@ -108,6 +108,20 @@ class NFLIE(InfoExtractor):              'upload_date': '20150918',          },      }, { +        # lowercase data-contentid +        'url': 'http://www.steelers.com/news/article-1/Tomlin-on-Ben-getting-Vick-ready/56399c96-4160-48cf-a7ad-1d17d4a3aef7', +        'info_dict': { +            'id': '12693586-6ea9-4743-9c1c-02c59e4a5ef2', +            'ext': 'mp4', +            'title': 'Tomlin looks ahead to Ravens on a short week', +            'description': 'md5:32f3f7b139f43913181d5cbb24ecad75', +            'timestamp': 1443459651, +            'upload_date': '20150928', +        }, +        'params': { +            'skip_download': True, +        }, +    }, {          'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',          'only_matching': True,      }, { @@ -151,7 +165,7 @@ class NFLIE(InfoExtractor):              group='config'))          # For articles, the id in the url is not the video id          video_id = self._search_regex( -            r'(?:<nflcs:avplayer[^>]+data-contentId\s*=\s*|contentId\s*:\s*)(["\'])(?P<id>.+?)\1', +            r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>.+?)\1',              webpage, 'video id', default=video_id, group='id')          config = self._download_json(config_url, video_id, 'Downloading player config')          url_template = NFLIE.prepend_host( diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index d066a96db..8ac38a174 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import compat_urlparse  from ..utils import (      ExtractorError,      float_or_none, @@ -49,7 +50,7 @@ class NRKIE(InfoExtractor):          if data['usageRights']['isGeoBlocked']:              raise ExtractorError( -                'NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', +                'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',                  expected=True)          video_url = data['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81' @@ -196,20 +197,6 @@ class NRKTVIE(InfoExtractor):          }      ] -    def _debug_print(self, txt): -        if self._downloader.params.get('verbose', False): -            self.to_screen('[debug] %s' % txt) - -    def _get_subtitles(self, subtitlesurl, video_id, baseurl): -        url = "%s%s" % (baseurl, subtitlesurl) -        self._debug_print('%s: Subtitle url: %s' % (video_id, url)) -        captions = self._download_xml( -            url, video_id, 'Downloading subtitles') -        lang = captions.get('lang', 'no') -        return {lang: [ -            {'ext': 'ttml', 'url': url}, -        ]} -      def _extract_f4m(self, manifest_url, video_id):          return self._extract_f4m_formats(              manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id, f4m_id='hds') @@ -218,7 +205,7 @@ class NRKTVIE(InfoExtractor):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id')          part_id = mobj.group('part_id') -        baseurl = mobj.group('baseurl') +        base_url = mobj.group('baseurl')          webpage = self._download_webpage(url, video_id) @@ -278,11 +265,14 @@ class NRKTVIE(InfoExtractor):          self._sort_formats(formats)          subtitles_url = self._html_search_regex( -            r'data-subtitlesurl[ ]*=[ ]*"([^"]+)"', -            webpage, 'subtitle URL', default=None) -        subtitles = None +            r'data-subtitlesurl\s*=\s*(["\'])(?P<url>.+?)\1', +            webpage, 'subtitle URL', default=None, group='url') +        subtitles = {}          if subtitles_url: -            subtitles = self.extract_subtitles(subtitles_url, video_id, baseurl) +            subtitles['no'] = [{ +                'ext': 'ttml', +                'url': compat_urlparse.urljoin(base_url, subtitles_url), +            }]          return {              'id': video_id, diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 683c81de3..6923c6094 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -134,6 +134,24 @@ class PBSIE(InfoExtractor):              'params': {                  'skip_download': True,  # requires ffmpeg              }, +        }, +        { +            # Video embedded in iframe containing angle brackets as attribute's value (e.g. +            # "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see +            # https://github.com/rg3/youtube-dl/issues/7059) +            'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/', +            'info_dict': { +                'id': '2365546844', +                'display_id': 'a-chefs-life-season-3-episode-5-prickly-business', +                'ext': 'mp4', +                'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business", +                'description': 'md5:61db2ddf27c9912f09c241014b118ed1', +                'duration': 1480, +                'thumbnail': 're:^https?://.*\.jpg$', +            }, +            'params': { +                'skip_download': True,  # requires ffmpeg +            },          }      ] @@ -167,7 +185,7 @@ class PBSIE(InfoExtractor):                  return media_id, presumptive_id, upload_date              url = self._search_regex( -                r'<iframe\s+[^>]*\s+src=["\']([^\'"]+partnerplayer[^\'"]+)["\']', +                r'(?s)<iframe[^>]+?(?:[a-z-]+?=["\'].*?["\'][^>]+?)*?\bsrc=["\']([^\'"]+partnerplayer[^\'"]+)["\']',                  webpage, 'player URL')              mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 1654a641f..c98539f6a 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -25,7 +25,7 @@ class QQMusicIE(InfoExtractor):              'id': '004295Et37taLD',              'ext': 'mp3',              'title': '可惜没如果', -            'upload_date': '20141227', +            'release_date': '20141227',              'creator': '林俊杰',              'description': 'md5:d327722d0361576fde558f1ac68a7065',              'thumbnail': 're:^https?://.*\.jpg$', @@ -38,11 +38,26 @@ class QQMusicIE(InfoExtractor):              'id': '004MsGEo3DdNxV',              'ext': 'mp3',              'title': '如果', -            'upload_date': '20050626', +            'release_date': '20050626',              'creator': '李季美',              'description': 'md5:46857d5ed62bc4ba84607a805dccf437',              'thumbnail': 're:^https?://.*\.jpg$',          } +    }, { +        'note': 'lyrics not in .lrc format', +        'url': 'http://y.qq.com/#type=song&mid=001JyApY11tIp6', +        'info_dict': { +            'id': '001JyApY11tIp6', +            'ext': 'mp3', +            'title': 'Shadows Over Transylvania', +            'release_date': '19970225', +            'creator': 'Dark Funeral', +            'description': 'md5:ed14d5bd7ecec19609108052c25b2c11', +            'thumbnail': 're:^https?://.*\.jpg$', +        }, +        'params': { +            'skip_download': True, +        },      }]      _FORMATS = { @@ -112,15 +127,27 @@ class QQMusicIE(InfoExtractor):          self._check_formats(formats, mid)          self._sort_formats(formats) -        return { +        actual_lrc_lyrics = ''.join( +            line + '\n' for line in re.findall( +                r'(?m)^(\[[0-9]{2}:[0-9]{2}(?:\.[0-9]{2,})?\][^\n]*|\[[^\]]*\])', lrc_content)) + +        info_dict = {              'id': mid,              'formats': formats,              'title': song_name, -            'upload_date': publish_time, +            'release_date': publish_time,              'creator': singer,              'description': lrc_content, -            'thumbnail': thumbnail_url, +            'thumbnail': thumbnail_url          } +        if actual_lrc_lyrics: +            info_dict['subtitles'] = { +                'origin': [{ +                    'ext': 'lrc', +                    'data': actual_lrc_lyrics, +                }] +            } +        return info_dict  class QQPlaylistBaseIE(InfoExtractor): diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index c67ad25ce..a16b73ff4 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -74,7 +74,7 @@ class RuutuIE(InfoExtractor):                          preference = -1 if proto == 'rtmp' else 1                          label = child.get('label')                          tbr = int_or_none(child.get('bitrate')) -                        width, height = [int_or_none(x) for x in child.get('resolution', '').split('x')] +                        width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]]                          formats.append({                              'format_id': '%s-%s' % (proto, label if label else tbr),                              'url': video_url, diff --git a/youtube_dl/extractor/tapely.py b/youtube_dl/extractor/tapely.py index f1f43d0a7..744f9db38 100644 --- a/youtube_dl/extractor/tapely.py +++ b/youtube_dl/extractor/tapely.py @@ -16,7 +16,7 @@ from ..utils import (  class TapelyIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?tape\.ly/(?P<id>[A-Za-z0-9\-_]+)(?:/(?P<songnr>\d+))?' +    _VALID_URL = r'https?://(?:www\.)?(?:tape\.ly|tapely\.com)/(?P<id>[A-Za-z0-9\-_]+)(?:/(?P<songnr>\d+))?'      _API_URL = 'http://tape.ly/showtape?id={0:}'      _S3_SONG_URL = 'http://mytape.s3.amazonaws.com/{0:}'      _SOUNDCLOUD_SONG_URL = 'http://api.soundcloud.com{0:}' @@ -42,6 +42,10 @@ class TapelyIE(InfoExtractor):                  'ext': 'm4a',              },          }, +        { +            'url': 'https://tapely.com/my-grief-as-told-by-water', +            'only_matching': True, +        },      ]      def _real_extract(self, url): diff --git a/youtube_dl/extractor/videolecturesnet.py b/youtube_dl/extractor/videolecturesnet.py index ef2da5632..649ac9433 100644 --- a/youtube_dl/extractor/videolecturesnet.py +++ b/youtube_dl/extractor/videolecturesnet.py @@ -3,11 +3,13 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import ( +    compat_HTTPError, +    compat_urlparse, +)  from ..utils import ( -    find_xpath_attr, -    int_or_none, +    ExtractorError,      parse_duration, -    unified_strdate,  ) @@ -15,7 +17,7 @@ class VideoLecturesNetIE(InfoExtractor):      _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/*(?:[#?].*)?$'      IE_NAME = 'videolectures.net' -    _TEST = { +    _TESTS = [{          'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/',          'info_dict': {              'id': 'promogram_igor_mekjavic_eng', @@ -26,61 +28,55 @@ class VideoLecturesNetIE(InfoExtractor):              'duration': 565,              'thumbnail': 're:http://.*\.jpg',          }, -    } +    }, { +        # video with invalid direct format links (HTTP 403) +        'url': 'http://videolectures.net/russir2010_filippova_nlp/', +        'info_dict': { +            'id': 'russir2010_filippova_nlp', +            'ext': 'flv', +            'title': 'NLP at Google', +            'description': 'md5:fc7a6d9bf0302d7cc0e53f7ca23747b3', +            'duration': 5352, +            'thumbnail': 're:http://.*\.jpg', +        }, +        'params': { +            # rtmp download +            'skip_download': True, +        }, +    }, { +        'url': 'http://videolectures.net/deeplearning2015_montreal/', +        'info_dict': { +            'id': 'deeplearning2015_montreal', +            'title': 'Deep Learning Summer School, Montreal 2015', +            'description': 'md5:90121a40cc6926df1bf04dcd8563ed3b', +        }, +        'playlist_count': 30, +    }]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id -        smil = self._download_xml(smil_url, video_id) -        title = find_xpath_attr(smil, './/meta', 'name', 'title').attrib['content'] -        description_el = find_xpath_attr(smil, './/meta', 'name', 'abstract') -        description = ( -            None if description_el is None -            else description_el.attrib['content']) -        upload_date = unified_strdate( -            find_xpath_attr(smil, './/meta', 'name', 'date').attrib['content']) +        try: +            smil = self._download_smil(smil_url, video_id) +        except ExtractorError as e: +            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: +                # Probably a playlist +                webpage = self._download_webpage(url, video_id) +                entries = [ +                    self.url_result(compat_urlparse.urljoin(url, video_url), 'VideoLecturesNet') +                    for _, video_url in re.findall(r'<a[^>]+href=(["\'])(.+?)\1[^>]+id=["\']lec=\d+', webpage)] +                playlist_title = self._html_search_meta('title', webpage, 'title', fatal=True) +                playlist_description = self._html_search_meta('description', webpage, 'description') +                return self.playlist_result(entries, video_id, playlist_title, playlist_description) -        switch = smil.find('.//switch') -        duration = parse_duration(switch.attrib.get('dur')) -        thumbnail_el = find_xpath_attr(switch, './image', 'type', 'thumbnail') -        thumbnail = ( -            None if thumbnail_el is None else thumbnail_el.attrib.get('src')) +        info = self._parse_smil(smil, smil_url, video_id) -        formats = [] -        for v in switch.findall('./video'): -            proto = v.attrib.get('proto') -            if proto not in ['http', 'rtmp']: -                continue -            f = { -                'width': int_or_none(v.attrib.get('width')), -                'height': int_or_none(v.attrib.get('height')), -                'filesize': int_or_none(v.attrib.get('size')), -                'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0, -                'ext': v.attrib.get('ext'), -            } -            src = v.attrib['src'] -            if proto == 'http': -                if self._is_valid_url(src, video_id): -                    f['url'] = src -                    formats.append(f) -            elif proto == 'rtmp': -                f.update({ -                    'url': v.attrib['streamer'], -                    'play_path': src, -                    'rtmp_real_time': True, -                }) -                formats.append(f) -        self._sort_formats(formats) +        info['id'] = video_id + +        switch = smil.find('.//switch') +        if switch is not None: +            info['duration'] = parse_duration(switch.attrib.get('dur')) -        return { -            'id': video_id, -            'title': title, -            'description': description, -            'upload_date': upload_date, -            'duration': duration, -            'thumbnail': thumbnail, -            'formats': formats, -        } +        return info diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index c30c5a8e5..765e9e6fd 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -17,6 +17,7 @@ from ..utils import (      unescapeHTML,      unified_strdate,  ) +from .vimeo import VimeoIE  class VKIE(InfoExtractor): @@ -249,6 +250,10 @@ class VKIE(InfoExtractor):          if youtube_url:              return self.url_result(youtube_url, 'Youtube') +        vimeo_url = VimeoIE._extract_vimeo_url(url, info_page) +        if vimeo_url is not None: +            return self.url_result(vimeo_url) +          m_rutube = re.search(              r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page)          if m_rutube is not None: diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 5eccc0a70..3dd6d290b 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -276,7 +276,7 @@ def parseOpts(overrideArguments=None):              'For example, to only match videos that have been liked more than '              '100 times and disliked less than 50 times (or the dislike '              'functionality is not available at the given service), but who ' -            'also have a description, use  --match-filter ' +            'also have a description, use --match-filter '              '"like_count > 100 & dislike_count <? 50 & description" .'          ))      selection.add_option( @@ -602,7 +602,7 @@ def parseOpts(overrideArguments=None):      filesystem.add_option(          '-A', '--auto-number',          action='store_true', dest='autonumber', default=False, -        help='[deprecated; use  -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000') +        help='[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000')      filesystem.add_option(          '-t', '--title',          action='store_true', dest='usetitle', default=False, diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7ef4f2755..8f0977849 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2015.09.22' +__version__ = '2015.09.28' | 
