aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/extractor/bandcamp.py
diff options
context:
space:
mode:
Diffstat (limited to 'hypervideo_dl/extractor/bandcamp.py')
-rw-r--r--hypervideo_dl/extractor/bandcamp.py23
1 files changed, 10 insertions, 13 deletions
diff --git a/hypervideo_dl/extractor/bandcamp.py b/hypervideo_dl/extractor/bandcamp.py
index 745055e..de81e0d 100644
--- a/hypervideo_dl/extractor/bandcamp.py
+++ b/hypervideo_dl/extractor/bandcamp.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import random
import re
import time
@@ -8,23 +5,24 @@ import time
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ KNOWN_EXTENSIONS,
ExtractorError,
float_or_none,
int_or_none,
- KNOWN_EXTENSIONS,
parse_filesize,
str_or_none,
try_get,
- update_url_query,
unified_strdate,
unified_timestamp,
+ update_url_query,
url_or_none,
urljoin,
)
class BandcampIE(InfoExtractor):
- _VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?P<uploader>[^/]+)\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
+ _EMBED_REGEX = [r'<meta property="og:url"[^>]*?content="(?P<url>.*?bandcamp\.com.*?)"']
_TESTS = [{
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
'md5': 'c557841d5e50261777a6585648adf439',
@@ -87,7 +85,7 @@ class BandcampIE(InfoExtractor):
attr + ' data', group=2), video_id, fatal=fatal)
def _real_extract(self, url):
- title = self._match_id(url)
+ title, uploader = self._match_valid_url(url).group('id', 'uploader')
webpage = self._download_webpage(url, title)
tralbum = self._extract_data_attr(webpage, title)
thumbnail = self._og_search_thumbnail(webpage)
@@ -186,8 +184,6 @@ class BandcampIE(InfoExtractor):
'acodec': format_id.split('-')[0],
})
- self._sort_formats(formats)
-
title = '%s - %s' % (artist, track) if artist else track
if not duration:
@@ -199,6 +195,8 @@ class BandcampIE(InfoExtractor):
'title': title,
'thumbnail': thumbnail,
'uploader': artist,
+ 'uploader_id': uploader,
+ 'uploader_url': f'https://{uploader}.bandcamp.com',
'timestamp': timestamp,
'release_timestamp': unified_timestamp(tralbum.get('album_release_date')),
'duration': duration,
@@ -211,7 +209,7 @@ class BandcampIE(InfoExtractor):
}
-class BandcampAlbumIE(BandcampIE):
+class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'Bandcamp:album'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com/album/(?P<id>[^/?#&]+)'
@@ -314,7 +312,7 @@ class BandcampAlbumIE(BandcampIE):
}
-class BandcampWeeklyIE(BandcampIE):
+class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'Bandcamp:weekly'
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
_TESTS = [{
@@ -363,7 +361,6 @@ class BandcampWeeklyIE(BandcampIE):
'ext': ext,
'vcodec': 'none',
})
- self._sort_formats(formats)
title = show.get('audio_title') or 'Bandcamp Weekly'
subtitle = show.get('subtitle')
@@ -439,7 +436,7 @@ class BandcampUserIE(InfoExtractor):
uploader = self._match_id(url)
webpage = self._download_webpage(url, uploader)
- discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\']([^"\']+)', webpage)
+ discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
return self.playlist_from_matches(