diff options
| author | pukkandan <pukkandan.ytdlp@gmail.com> | 2022-08-02 01:00:55 +0530 | 
|---|---|---|
| committer | pukkandan <pukkandan.ytdlp@gmail.com> | 2022-08-02 01:08:16 +0530 | 
| commit | 5fff2e576f5a36ba253e53880566db932b9b7621 (patch) | |
| tree | aff515c640d4a12c6580ac0890e9342aad85c413 | |
| parent | f2e8dbcc0067fb16b632de1984e622a8e99d9d8f (diff) | |
| download | hypervideo-pre-5fff2e576f5a36ba253e53880566db932b9b7621.tar.lz hypervideo-pre-5fff2e576f5a36ba253e53880566db932b9b7621.tar.xz hypervideo-pre-5fff2e576f5a36ba253e53880566db932b9b7621.zip | |
[extractor/camtasia] Separate into own extractor (#4307)
Authored by: coletdjnz
| -rw-r--r-- | yt_dlp/extractor/_extractors.py | 1 | ||||
| -rw-r--r-- | yt_dlp/extractor/camtasia.py | 71 | ||||
| -rw-r--r-- | yt_dlp/extractor/generic.py | 67 | 
3 files changed, 72 insertions, 67 deletions
| diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index b105437c3..b62b8113c 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -219,6 +219,7 @@ from .camdemy import (      CamdemyFolderIE  )  from .cammodels import CamModelsIE +from .camtasia import CamtasiaEmbedIE  from .camwithher import CamWithHerIE  from .canalalpha import CanalAlphaIE  from .canalplus import CanalplusIE diff --git a/yt_dlp/extractor/camtasia.py b/yt_dlp/extractor/camtasia.py new file mode 100644 index 000000000..70ab6c62a --- /dev/null +++ b/yt_dlp/extractor/camtasia.py @@ -0,0 +1,71 @@ +import os +import urllib.parse + +from .common import InfoExtractor +from ..utils import float_or_none + + +class CamtasiaEmbedIE(InfoExtractor): +    _VALID_URL = False +    _WEBPAGE_TESTS = [ +        { +            'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/', +            'playlist': [{ +                'md5': '0c5e352edabf715d762b0ad4e6d9ee67', +                'info_dict': { +                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final', +                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1', +                    'ext': 'flv', +                    'duration': 2235.90, +                } +            }, { +                'md5': '10e4bb3aaca9fd630e273ff92d9f3c63', +                'info_dict': { +                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP', +                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip', +                    'ext': 'flv', +                    'duration': 2235.93, +                } +            }], +            'info_dict': { +                'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final', +            }, +            'skip': 'webpage dead' +        }, + +    ] + +    def _extract_from_webpage(self, url, webpage): +        camtasia_cfg = self._search_regex( +            r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);', +            webpage, 'camtasia configuration file', default=None) +        if camtasia_cfg is None: +            return None + +        title = self._html_search_meta('DC.title', webpage, fatal=True) + +        camtasia_url = urllib.parse.urljoin(url, camtasia_cfg) +        camtasia_cfg = self._download_xml( +            camtasia_url, self._generic_id(url), +            note='Downloading camtasia configuration', +            errnote='Failed to download camtasia configuration') +        fileset_node = camtasia_cfg.find('./playlist/array/fileset') + +        entries = [] +        for n in fileset_node.getchildren(): +            url_n = n.find('./uri') +            if url_n is None: +                continue + +            entries.append({ +                'id': os.path.splitext(url_n.text.rpartition('/')[2])[0], +                'title': f'{title} - {n.tag}', +                'url': urllib.parse.urljoin(url, url_n.text), +                'duration': float_or_none(n.find('./duration').text), +            }) + +        return { +            '_type': 'playlist', +            'entries': entries, +            'title': title, +        } diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 0dc9ae0da..3d574cd02 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -933,30 +933,6 @@ class GenericIE(InfoExtractor):                  'skip_download': True,              }          }, -        # Camtasia studio -        { -            'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/', -            'playlist': [{ -                'md5': '0c5e352edabf715d762b0ad4e6d9ee67', -                'info_dict': { -                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final', -                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1', -                    'ext': 'flv', -                    'duration': 2235.90, -                } -            }, { -                'md5': '10e4bb3aaca9fd630e273ff92d9f3c63', -                'info_dict': { -                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP', -                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip', -                    'ext': 'flv', -                    'duration': 2235.93, -                } -            }], -            'info_dict': { -                'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final', -            } -        },          # Flowplayer          {              'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html', @@ -2680,43 +2656,6 @@ class GenericIE(InfoExtractor):              'entries': entries,          } -    def _extract_camtasia(self, url, video_id, webpage): -        """ Returns None if no camtasia video can be found. """ - -        camtasia_cfg = self._search_regex( -            r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);', -            webpage, 'camtasia configuration file', default=None) -        if camtasia_cfg is None: -            return None - -        title = self._html_search_meta('DC.title', webpage, fatal=True) - -        camtasia_url = urllib.parse.urljoin(url, camtasia_cfg) -        camtasia_cfg = self._download_xml( -            camtasia_url, video_id, -            note='Downloading camtasia configuration', -            errnote='Failed to download camtasia configuration') -        fileset_node = camtasia_cfg.find('./playlist/array/fileset') - -        entries = [] -        for n in fileset_node.getchildren(): -            url_n = n.find('./uri') -            if url_n is None: -                continue - -            entries.append({ -                'id': os.path.splitext(url_n.text.rpartition('/')[2])[0], -                'title': f'{title} - {n.tag}', -                'url': urllib.parse.urljoin(url, url_n.text), -                'duration': float_or_none(n.find('./duration').text), -            }) - -        return { -            '_type': 'playlist', -            'entries': entries, -            'title': title, -        } -      def _kvs_getrealurl(self, video_url, license_code):          if not video_url.startswith('function/0/'):              return video_url  # not obfuscated @@ -2920,12 +2859,6 @@ class GenericIE(InfoExtractor):          except xml.etree.ElementTree.ParseError:              pass -        # Is it a Camtasia project? -        camtasia_res = self._extract_camtasia(url, video_id, webpage) -        if camtasia_res is not None: -            self.report_detected('Camtasia video') -            return camtasia_res -          info_dict.update({              # it's tempting to parse this further, but you would              # have to take into account all the variations like | 
