diff options
-rw-r--r-- | .github/workflows/python-publish.yml | 33 | ||||
-rw-r--r-- | .travis.yml | 12 | ||||
-rw-r--r-- | .travis.yml.original | 50 | ||||
-rw-r--r-- | setup.py | 127 | ||||
-rw-r--r-- | youtube_dl/extractor/extractors.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/hrfensehen.py | 102 | ||||
-rw-r--r-- | youtube_dl/extractor/soundcloud.py | 97 | ||||
-rw-r--r-- | youtube_dl/extractor/viki.py | 4 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
9 files changed, 317 insertions, 111 deletions
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 000000000..0fa2d1857 --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,33 @@ +# This workflows will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +name: Upload Python Package + +on: + pull_request: + branches: + - release + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + rm -rf dist/* + python setup.py sdist bdist_wheel + twine upload dist/* diff --git a/.travis.yml b/.travis.yml index 51afd469a..fb499845e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,29 +12,18 @@ python: dist: trusty env: - YTDL_TEST_SET=core - - YTDL_TEST_SET=download jobs: include: - python: 3.7 dist: xenial env: YTDL_TEST_SET=core - - python: 3.7 - dist: xenial - env: YTDL_TEST_SET=download - python: 3.8 dist: xenial env: YTDL_TEST_SET=core - - python: 3.8 - dist: xenial - env: YTDL_TEST_SET=download - python: 3.8-dev dist: xenial env: YTDL_TEST_SET=core - - python: 3.8-dev - dist: xenial - env: YTDL_TEST_SET=download - env: JYTHON=true; YTDL_TEST_SET=core - - env: JYTHON=true; YTDL_TEST_SET=download - name: flake8 python: 3.8 dist: xenial @@ -44,7 +33,6 @@ jobs: allow_failures: - env: YTDL_TEST_SET=download - env: JYTHON=true; YTDL_TEST_SET=core - - env: JYTHON=true; YTDL_TEST_SET=download before_install: - if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi script: ./devscripts/run_tests.sh diff --git a/.travis.yml.original b/.travis.yml.original new file mode 100644 index 000000000..51afd469a --- /dev/null +++ b/.travis.yml.original @@ -0,0 +1,50 @@ +language: python +python: + - "2.6" + - "2.7" + - "3.2" + - "3.3" + - "3.4" + - "3.5" + - "3.6" + - "pypy" + - "pypy3" +dist: trusty +env: + - YTDL_TEST_SET=core + - YTDL_TEST_SET=download +jobs: + include: + - python: 3.7 + dist: xenial + env: YTDL_TEST_SET=core + - python: 3.7 + dist: xenial + env: YTDL_TEST_SET=download + - python: 3.8 + dist: xenial + env: YTDL_TEST_SET=core + - python: 3.8 + dist: xenial + env: YTDL_TEST_SET=download + - python: 3.8-dev + dist: xenial + env: YTDL_TEST_SET=core + - python: 3.8-dev + dist: xenial + env: YTDL_TEST_SET=download + - env: JYTHON=true; YTDL_TEST_SET=core + - env: JYTHON=true; YTDL_TEST_SET=download + - name: flake8 + python: 3.8 + dist: xenial + install: pip install flake8 + script: flake8 . + fast_finish: true + allow_failures: + - env: YTDL_TEST_SET=download + - env: JYTHON=true; YTDL_TEST_SET=core + - env: JYTHON=true; YTDL_TEST_SET=download +before_install: + - if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi +script: ./devscripts/run_tests.sh @@ -1,62 +1,21 @@ #!/usr/bin/env python # coding: utf-8 -from __future__ import print_function - +from setuptools import setup, Command import os.path import warnings import sys - -try: - from setuptools import setup, Command - setuptools_available = True -except ImportError: - from distutils.core import setup, Command - setuptools_available = False from distutils.spawn import spawn -try: - # This will create an exe that needs Microsoft Visual C++ 2008 - # Redistributable Package - import py2exe -except ImportError: - if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': - print('Cannot import py2exe', file=sys.stderr) - exit(1) - -py2exe_options = { - 'bundle_files': 1, - 'compressed': 1, - 'optimize': 2, - 'dist_dir': '.', - 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], -} - # Get the version from youtube_dl/version.py without importing the package exec(compile(open('youtube_dl/version.py').read(), 'youtube_dl/version.py', 'exec')) -DESCRIPTION = 'YouTube video downloader' -LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites' - -py2exe_console = [{ - 'script': './youtube_dl/__main__.py', - 'dest_base': 'youtube-dl', - 'version': __version__, - 'description': DESCRIPTION, - 'comments': LONG_DESCRIPTION, - 'product_name': 'youtube-dl', - 'product_version': __version__, -}] - -py2exe_params = { - 'console': py2exe_console, - 'options': {'py2exe': py2exe_options}, - 'zipfile': None -} +DESCRIPTION = 'Media downloader supporting various sites such as youtube' +LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites. Based on a more active community fork.' if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': - params = py2exe_params + print("inv") else: files_spec = [ ('etc/bash_completion.d', ['youtube-dl.bash-completion']), @@ -78,10 +37,10 @@ else: params = { 'data_files': data_files, } - if setuptools_available: - params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']} - else: - params['scripts'] = ['bin/youtube-dl'] + #if setuptools_available: + params['entry_points'] = {'console_scripts': ['youtube-dlc = youtube_dl:main']} + #else: + # params['scripts'] = ['bin/youtube-dlc'] class build_lazy_extractors(Command): description = 'Build the extractor lazy loading module' @@ -100,49 +59,45 @@ class build_lazy_extractors(Command): ) setup( - name='youtube_dl', + name="youtube_dlc", version=__version__, + maintainer="Tom-Oliver Heidel", + maintainer_email="theidel@uni-bremen.de", description=DESCRIPTION, long_description=LONG_DESCRIPTION, - url='https://github.com/ytdl-org/youtube-dl', - author='Ricardo Garcia', - author_email='ytdl@yt-dl.org', - maintainer='Sergey M.', - maintainer_email='dstftw@gmail.com', - license='Unlicense', - packages=[ + # long_description_content_type="text/markdown", + url="https://github.com/blackjack4494/youtube-dlc", + # packages=setuptools.find_packages(), + packages=[ 'youtube_dl', 'youtube_dl.extractor', 'youtube_dl.downloader', 'youtube_dl.postprocessor'], - - # Provokes warning on most systems (why?!) - # test_suite = 'nose.collector', - # test_requires = ['nosetest'], - classifiers=[ - 'Topic :: Multimedia :: Video', - 'Development Status :: 5 - Production/Stable', - 'Environment :: Console', - 'License :: Public Domain', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.2', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: Implementation', - 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: IronPython', - 'Programming Language :: Python :: Implementation :: Jython', - 'Programming Language :: Python :: Implementation :: PyPy', + "Topic :: Multimedia :: Video", + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Programming Language :: Python", + "Programming Language :: Python :: 2", + "Programming Language :: Python :: 2.6", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.2", + "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: Implementation", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: IronPython", + "Programming Language :: Python :: Implementation :: Jython", + "Programming Language :: Python :: Implementation :: PyPy", + "License :: Public Domain", + "Operating System :: OS Independent", ], - - cmdclass={'build_lazy_extractors': build_lazy_extractors}, + python_requires='>=2.6', + + cmdclass={'build_lazy_extractors': build_lazy_extractors}, **params -) +)
\ No newline at end of file diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4b3092028..5ae0a34aa 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -440,6 +440,7 @@ from .hotstar import ( ) from .howcast import HowcastIE from .howstuffworks import HowStuffWorksIE +from .hrfensehen import HRFernsehenIE from .hrti import ( HRTiIE, HRTiPlaylistIE, diff --git a/youtube_dl/extractor/hrfensehen.py b/youtube_dl/extractor/hrfensehen.py new file mode 100644 index 000000000..2beadef2c --- /dev/null +++ b/youtube_dl/extractor/hrfensehen.py @@ -0,0 +1,102 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from youtube_dl.utils import int_or_none, unified_timestamp, unescapeHTML +from .common import InfoExtractor + + +class HRFernsehenIE(InfoExtractor): + IE_NAME = 'hrfernsehen' + _VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html' + + _TESTS = [{ + 'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html', + 'md5': '5c4e0ba94677c516a2f65a84110fc536', + 'info_dict': { + 'id': '130546', + 'ext': 'mp4', + 'description': 'Sturmtief Kirsten fegt über Hessen / Die Corona-Pandemie – eine Chronologie / ' + 'Sterbehilfe: Die Lage in Hessen / Miss Hessen leitet zwei eigene Unternehmen / ' + 'Pop-Up Museum zeigt Schwarze Unterhaltung und Black Music', + 'subtitles': {'de': [{ + 'url': 'https://hr-a.akamaihd.net/video/as/hessenschau/2020_08/hrLogo_200826200407_L385592_512x288-25p-500kbit.vtt' + }]}, + 'timestamp': 1598470200, + 'upload_date': '20200826', + 'thumbnails': [{ + 'url': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9.jpg', + 'id': '0' + }, { + 'url': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9__medium.jpg', + 'id': '1' + }], + 'title': 'hessenschau vom 26.08.2020' + } + }, { + 'url': 'https://www.hr-fernsehen.de/sendungen-a-z/mex/sendungen/fair-und-gut---was-hinter-aldis-eigenem-guetesiegel-steckt,video-130544.html', + 'only_matching': True + }] + + _GEO_COUNTRIES = ['DE'] + + def extract_airdate(self, loader_data): + airdate_str = loader_data.get('mediaMetadata', {}).get('agf', {}).get('airdate') + + if airdate_str is None: + return None + + return unified_timestamp(airdate_str) + + def extract_formats(self, loader_data): + stream_formats = [] + for stream_obj in loader_data["videoResolutionLevels"]: + stream_format = { + 'format_id': str(stream_obj['verticalResolution']) + "p", + 'height': stream_obj['verticalResolution'], + 'url': stream_obj['url'], + } + + quality_information = re.search(r'([0-9]{3,4})x([0-9]{3,4})-([0-9]{2})p-([0-9]{3,4})kbit', + stream_obj['url']) + if quality_information: + stream_format['width'] = int_or_none(quality_information.group(1)) + stream_format['height'] = int_or_none(quality_information.group(2)) + stream_format['fps'] = int_or_none(quality_information.group(3)) + stream_format['tbr'] = int_or_none(quality_information.group(4)) + + stream_formats.append(stream_format) + + self._sort_formats(stream_formats) + return stream_formats + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_meta( + ['og:title', 'twitter:title', 'name'], webpage) + description = self._html_search_meta( + ['description'], webpage) + + loader_str = unescapeHTML(self._search_regex(r"data-hr-mediaplayer-loader='([^']*)'", webpage, "ardloader")) + loader_data = json.loads(loader_str) + + info = { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': self.extract_formats(loader_data), + 'timestamp': self.extract_airdate(loader_data) + } + + if "subtitle" in loader_data: + info["subtitles"] = {"de": [{"url": loader_data["subtitle"]}]} + + thumbnails = list(set([t for t in loader_data.get("previewImageUrl", {}).values()])) + if len(thumbnails) > 0: + info["thumbnails"] = [{"url": t} for t in thumbnails] + + return info diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index d37c52543..ac09cb5e6 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -3,6 +3,8 @@ from __future__ import unicode_literals import itertools import re +import json +import random from .common import ( InfoExtractor, @@ -28,6 +30,7 @@ from ..utils import ( update_url_query, url_or_none, urlhandle_detect_ext, + sanitized_Request, ) @@ -309,7 +312,81 @@ class SoundcloudIE(InfoExtractor): raise def _real_initialize(self): - self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk' + self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or "T5R4kgWS2PRf6lzLyIravUMnKlbIxQag" # 'EXLwg5lHTO2dslU5EePe3xkw0m1h86Cd' # 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk' + self._login() + + _USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36" + _API_AUTH_QUERY_TEMPLATE = '?client_id=%s' + _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s' + _access_token = None + _HEADERS = {} + _NETRC_MACHINE = 'soundcloud' + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + def genDevId(): + def genNumBlock(): + return ''.join([str(random.randrange(10)) for i in range(6)]) + return '-'.join([genNumBlock() for i in range(4)]) + + payload = { + 'client_id': self._CLIENT_ID, + 'recaptcha_pubkey': 'null', + 'recaptcha_response': 'null', + 'credentials': { + 'identifier': username, + 'password': password + }, + 'signature': self.sign(username, password, self._CLIENT_ID), + 'device_id': genDevId(), + 'user_agent': self._USER_AGENT + } + + query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID + login = sanitized_Request(self._API_AUTH_URL_PW % query, json.dumps(payload).encode('utf-8')) + response = self._download_json(login, None) + self._access_token = response.get('session').get('access_token') + if not self._access_token: + self.report_warning('Unable to get access token, login may has failed') + else: + self._HEADERS = {'Authorization': 'OAuth ' + self._access_token} + + # signature generation + def sign(self, user, pw, clid): + a = 33 + i = 1 + s = 440123 + w = 117 + u = 1800000 + l = 1042 + b = 37 + k = 37 + c = 5 + n = "0763ed7314c69015fd4a0dc16bbf4b90" # _KEY + y = "8" # _REV + r = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36" # _USER_AGENT + e = user # _USERNAME + t = clid # _CLIENT_ID + + d = '-'.join([str(mInt) for mInt in [a, i, s, w, u, l, b, k]]) + p = n + y + d + r + e + t + d + n + h = p + + m = 8011470 + f = 0 + + for f in range(f, len(h)): + m = (m >> 1) + ((1 & m) << 23) + m += ord(h[f]) + m &= 16777215 + + # c is not even needed + out = str(y) + ':' + str(d) + ':' + format(m, 'x') + ':' + str(c) + + return out @classmethod def _resolv_url(cls, url): @@ -389,7 +466,7 @@ class SoundcloudIE(InfoExtractor): if not format_url: continue stream = self._download_json( - format_url, track_id, query=query, fatal=False) + format_url, track_id, query=query, fatal=False, headers=self._HEADERS) if not isinstance(stream, dict): continue stream_url = url_or_none(stream.get('url')) @@ -487,7 +564,7 @@ class SoundcloudIE(InfoExtractor): info_json_url = self._resolv_url(self._BASE_URL + resolve_title) info = self._download_json( - info_json_url, full_title, 'Downloading info JSON', query=query) + info_json_url, full_title, 'Downloading info JSON', query=query, headers=self._HEADERS) return self._extract_info_dict(info, full_title, token) @@ -503,7 +580,7 @@ class SoundcloudPlaylistBaseIE(SoundcloudIE): 'ids': ','.join([compat_str(t['id']) for t in tracks]), 'playlistId': playlist_id, 'playlistSecretToken': token, - }) + }, headers=self._HEADERS) entries = [] for track in tracks: track_id = str_or_none(track.get('id')) @@ -547,7 +624,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE): full_title += '/' + token info = self._download_json(self._resolv_url( - self._BASE_URL + full_title), full_title) + self._BASE_URL + full_title), full_title, headers=self._HEADERS) if 'errors' in info: msgs = (compat_str(err['error_message']) for err in info['errors']) @@ -572,7 +649,7 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): for i in itertools.count(): response = self._download_json( next_href, playlist_id, - 'Downloading track page %s' % (i + 1), query=query) + 'Downloading track page %s' % (i + 1), query=query, headers=self._HEADERS) collection = response['collection'] @@ -694,7 +771,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): user = self._download_json( self._resolv_url(self._BASE_URL + uploader), - uploader, 'Downloading user info') + uploader, 'Downloading user info', headers=self._HEADERS) resource = mobj.group('rsrc') or 'all' @@ -719,7 +796,7 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): def _real_extract(self, url): track_name = self._match_id(url) - track = self._download_json(self._resolv_url(url), track_name) + track = self._download_json(self._resolv_url(url), track_name, headers=self._HEADERS) track_id = self._search_regex( r'soundcloud:track-stations:(\d+)', track['id'], 'track id') @@ -752,7 +829,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): data = self._download_json( self._API_V2_BASE + 'playlists/' + playlist_id, - playlist_id, 'Downloading playlist', query=query) + playlist_id, 'Downloading playlist', query=query, headers=self._HEADERS) return self._extract_set(data, token) @@ -789,7 +866,7 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): for i in itertools.count(1): response = self._download_json( next_url, collection_id, 'Downloading page {0}'.format(i), - 'Unable to download API page') + 'Unable to download API page', headers=self._HEADERS) collection = response.get('collection', []) if not collection: diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index b0dcdc0e6..9e4171237 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -56,14 +56,14 @@ class VikiBaseIE(InfoExtractor): def _call_api(self, path, video_id, note, timestamp=None, post_data=None): resp = self._download_json( - self._prepare_call(path, timestamp, post_data), video_id, note) + self._prepare_call(path, timestamp, post_data), video_id, note, headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404]) error = resp.get('error') if error: if error == 'invalid timestamp': resp = self._download_json( self._prepare_call(path, int(resp['current_timestamp']), post_data), - video_id, '%s (retry)' % note) + video_id, '%s (retry)' % note, headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404]) error = resp.get('error') if error: self._raise_error(resp['error']) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 17101fa47..b50bd2b3b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.07.28' +__version__ = '2020.08.31' |