aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/extractor/screencast.py
blob: d52d46cc3dc16fcb572d2804b30a1203effdd5c6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import (
    compat_parse_qs,
    compat_urllib_request,
)
from ..utils import (
    ExtractorError,
)


class ScreencastIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?screencast\.com/t/(?P<id>[a-zA-Z0-9]+)'
    _API_URL = 'https://www.screencast.com/api/external/oembed?url=%s&format=json'

    _TESTS = [{
        'url': 'http://www.screencast.com/t/3ZEjQXlT',
        'md5': '917df1c13798a3e96211dd1561fded83',
        'info_dict': {
            'id': '3ZEjQXlT',
            'ext': 'm4v',
            'title': 'Color Measurement with Ocean Optics Spectrometers',
            'description': 'md5:240369cde69d8bed61349a199c5fb153',
            'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
        }
    }, {
        'url': 'http://www.screencast.com/t/V2uXehPJa1ZI',
        'md5': 'e8e4b375a7660a9e7e35c33973410d34',
        'info_dict': {
            'id': 'V2uXehPJa1ZI',
            'ext': 'mov',
            'title': 'The Amadeus Spectrometer',
            'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit',
            'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
        }
    }, {
        'url': 'http://www.screencast.com/t/aAB3iowa',
        'md5': 'dedb2734ed00c9755761ccaee88527cd',
        'info_dict': {
            'id': 'aAB3iowa',
            'ext': 'mp4',
            'title': 'Google Earth Export',
            'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.',
            'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
        }
    }, {
        'url': 'http://www.screencast.com/t/X3ddTrYh',
        'md5': '669ee55ff9c51988b4ebc0877cc8b159',
        'info_dict': {
            'id': 'X3ddTrYh',
            'ext': 'wmv',
            'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression',
            'description': 'md5:7b9f393bc92af02326a5c5889639eab0',
            'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
        }
    }, {
        'url': 'http://screencast.com/t/aAB3iowa',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        # The info JSON given by the API has a thumbnail URL,
        # but it's inferior to the webpage's thumbnail.
        # It also has no video description, so we
        # definitely still need to get the webpage.

        info = self._download_json(
            self._API_URL % url, video_id,
            'Downloading video info JSON')

        video_url = info.get('url')
        if video_url != None:
            video_url_raw = compat_urllib_request.quote(video_url)
            video_url = re.sub(r'^(?P<proto>https|http)%3A',
                               lambda match: '%s:' % match.group('proto'), 
                               video_url_raw)

        title = info.get('title')
        webpage = self._download_webpage(url, video_id)

        if video_url == None:
            video_url = self._html_search_regex(
                r'<embed name="Video".*?src="([^"]+)"', webpage,
                'QuickTime embed', default=None)

        if video_url == None:
            flash_vars_s = self._html_search_regex(
                r'<param name="flashVars" value="([^"]+)"', webpage, 'flash vars',
                default=None)
            if not flash_vars_s:
                flash_vars_s = self._html_search_regex(
                    r'<param name="initParams" value="([^"]+)"', webpage, 'flash vars',
                    default=None)
                if flash_vars_s:
                    flash_vars_s = flash_vars_s.replace(',', '&')
            if flash_vars_s:
                flash_vars = compat_parse_qs(flash_vars_s)
                video_url_raw = compat_urllib_request.quote(
                    flash_vars['content'][0])
                video_url = re.sub(r'^(?P<proto>http|https)%3A',
                                   lambda match: '%s:' % match.group('proto'),
                                   video_url_raw)

        if video_url == None:
            video_meta = self._html_search_meta(
                'og:video', webpage, default=None)
            if video_meta:
                video_url = self._search_regex(
                    r'src=(.*?)(?:$|&)', video_meta,
                    'meta tag video URL', default=None)

        if video_url == None:
            video_url = self._html_search_regex(
                r'MediaContentUrl["\']\s*:(["\'])(?P<url>(?:(?!\1).)+)\1',
                webpage, 'video url', default=None, group='url')

        if video_url == None:
            video_url = self._html_search_meta(
                'og:video', webpage, default=None)

        if video_url == None:
            raise ExtractorError('Cannot find video')

        if title == None:
            title = self._og_search_title(webpage, default=None)
        
        if title == None:
            title = self._html_search_regex(
                [r'<b>Title:</b> ([^<]+)</div>',
                 r'class="tabSeperator">></span><span class="tabText">(.+?)<',
                 r'<title>([^<]+)</title>'],
                webpage, 'title')

        thumbnail = self._og_search_thumbnail(webpage)
        description = self._og_search_description(webpage, default=None)
        if description == None:
            description = self._html_search_meta('description', webpage)

        return {
            'id': video_id,
            'url': video_url,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
        }