diff options
Diffstat (limited to 'youtube_dlc/utils.py')
-rw-r--r-- | youtube_dlc/utils.py | 158 |
1 files changed, 132 insertions, 26 deletions
diff --git a/youtube_dlc/utils.py b/youtube_dlc/utils.py index 54a4ea2aa..6a04b710e 100644 --- a/youtube_dlc/utils.py +++ b/youtube_dlc/utils.py @@ -60,6 +60,9 @@ from .compat import ( compat_urllib_parse, compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, + compat_urllib_parse_urlunparse, + compat_urllib_parse_quote, + compat_urllib_parse_quote_plus, compat_urllib_parse_unquote_plus, compat_urllib_request, compat_urlparse, @@ -2282,11 +2285,11 @@ def decodeOption(optval): return optval -def formatSeconds(secs): +def formatSeconds(secs, delim=':'): if secs > 3600: - return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60) + return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60) elif secs > 60: - return '%d:%02d' % (secs // 60, secs % 60) + return '%d%s%02d' % (secs // 60, delim, secs % 60) else: return '%d' % secs @@ -2320,8 +2323,8 @@ def bug_reports_message(): if ytdl_is_updateable(): update_cmd = 'type youtube-dlc -U to update' else: - update_cmd = 'see https://yt-dl.org/update on how to update' - msg = '; please report this issue on https://yt-dl.org/bug .' + update_cmd = 'see https://github.com/pukkandan/yt-dlc on how to update' + msg = '; please report this issue on https://github.com/pukkandan/yt-dlc .' msg += ' Make sure you are using the latest version; %s.' % update_cmd msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.' return msg @@ -2460,7 +2463,7 @@ class XAttrMetadataError(YoutubeDLError): # Parsing code and msg if (self.code in (errno.ENOSPC, errno.EDQUOT) - or 'No space left' in self.msg or 'Disk quota excedded' in self.msg): + or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg): self.reason = 'NO_SPACE' elif self.code == errno.E2BIG or 'Argument list too long' in self.msg: self.reason = 'VALUE_TOO_LONG' @@ -3647,7 +3650,7 @@ def url_or_none(url): if not url or not isinstance(url, compat_str): return None url = url.strip() - return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None + return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None def parse_duration(s): @@ -4085,7 +4088,7 @@ def js_to_json(code): v = m.group(0) if v in ('true', 'false', 'null'): return v - elif v.startswith('/*') or v.startswith('//') or v == ',': + elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',': return "" if v[0] in ("'", '"'): @@ -4095,12 +4098,12 @@ def js_to_json(code): '\\\n': '', '\\x': '\\u00', }.get(m.group(0), m.group(0)), v[1:-1]) - - for regex, base in INTEGER_TABLE: - im = re.match(regex, v) - if im: - i = int(im.group(1), base) - return '"%d":' % i if v.endswith(':') else '%d' % i + else: + for regex, base in INTEGER_TABLE: + im = re.match(regex, v) + if im: + i = int(im.group(1), base) + return '"%d":' % i if v.endswith(':') else '%d' % i return '"%s"' % v @@ -4110,7 +4113,8 @@ def js_to_json(code): {comment}|,(?={skip}[\]}}])| (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*| \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?| - [0-9]+(?={skip}:) + [0-9]+(?={skip}:)| + !+ '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code) @@ -4124,7 +4128,7 @@ def qualities(quality_ids): return q -DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s' +DEFAULT_OUTTMPL = '%(title)s [%(id)s].%(ext)s' def limit_length(s, length): @@ -4152,6 +4156,8 @@ def is_outdated_version(version, limit, assume_new=True): def ytdl_is_updateable(): """ Returns if youtube-dlc can be updated with -U """ + return False + from zipimport import zipimporter return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen') @@ -4214,10 +4220,10 @@ def parse_codecs(codecs_str): # http://tools.ietf.org/html/rfc6381 if not codecs_str: return {} - splited_codecs = list(filter(None, map( + split_codecs = list(filter(None, map( lambda str: str.strip(), codecs_str.strip().strip(',').split(',')))) vcodec, acodec = None, None - for full_codec in splited_codecs: + for full_codec in split_codecs: codec = full_codec.split('.')[0] if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'): if not vcodec: @@ -4228,10 +4234,10 @@ def parse_codecs(codecs_str): else: write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr) if not vcodec and not acodec: - if len(splited_codecs) == 2: + if len(split_codecs) == 2: return { - 'vcodec': splited_codecs[0], - 'acodec': splited_codecs[1], + 'vcodec': split_codecs[0], + 'acodec': split_codecs[1], } else: return { @@ -4311,11 +4317,25 @@ def determine_protocol(info_dict): return compat_urllib_parse_urlparse(url).scheme -def render_table(header_row, data): +def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False): """ Render a list of rows, each as a list of values """ + + def get_max_lens(table): + return [max(len(compat_str(v)) for v in col) for col in zip(*table)] + + def filter_using_list(row, filterArray): + return [col for (take, col) in zip(filterArray, row) if take] + + if hideEmpty: + max_lens = get_max_lens(data) + header_row = filter_using_list(header_row, max_lens) + data = [filter_using_list(row, max_lens) for row in data] + table = [header_row] + data - max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)] - format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s' + max_lens = get_max_lens(table) + if delim: + table = [header_row] + [['-' * ml for ml in max_lens]] + data + format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s' return '\n'.join(format_str % tuple(row) for row in table) @@ -5470,7 +5490,7 @@ def encode_base_n(num, n, table=None): def decode_packed_codes(code): mobj = re.search(PACKED_CODES_RE, code) - obfucasted_code, base, count, symbols = mobj.groups() + obfuscated_code, base, count, symbols = mobj.groups() base = int(base) count = int(count) symbols = symbols.split('|') @@ -5483,7 +5503,7 @@ def decode_packed_codes(code): return re.sub( r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)], - obfucasted_code) + obfuscated_code) def caesar(s, alphabet, shift): @@ -5713,3 +5733,89 @@ def random_birthday(year_field, month_field, day_field): month_field: str(random_date.month), day_field: str(random_date.day), } + + +# Templates for internet shortcut files, which are plain text files. +DOT_URL_LINK_TEMPLATE = ''' +[InternetShortcut] +URL=%(url)s +'''.lstrip() + +DOT_WEBLOC_LINK_TEMPLATE = ''' +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +<dict> +\t<key>URL</key> +\t<string>%(url)s</string> +</dict> +</plist> +'''.lstrip() + +DOT_DESKTOP_LINK_TEMPLATE = ''' +[Desktop Entry] +Encoding=UTF-8 +Name=%(filename)s +Type=Link +URL=%(url)s +Icon=text-html +'''.lstrip() + + +def iri_to_uri(iri): + """ + Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only). + + The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact. + """ + + iri_parts = compat_urllib_parse_urlparse(iri) + + if '[' in iri_parts.netloc: + raise ValueError('IPv6 URIs are not, yet, supported.') + # Querying `.netloc`, when there's only one bracket, also raises a ValueError. + + # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is. + + net_location = '' + if iri_parts.username: + net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~") + if iri_parts.password is not None: + net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~") + net_location += '@' + + net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames. + # The 'idna' encoding produces ASCII text. + if iri_parts.port is not None and iri_parts.port != 80: + net_location += ':' + str(iri_parts.port) + + return compat_urllib_parse_urlunparse( + (iri_parts.scheme, + net_location, + + compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"), + + # Unsure about the `safe` argument, since this is a legacy way of handling parameters. + compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"), + + # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component. + compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"), + + compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~"))) + + # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes. + + +def to_high_limit_path(path): + if sys.platform in ['win32', 'cygwin']: + # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited. + return r'\\?\ '.rstrip() + os.path.abspath(path) + + return path + + +def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None): + val = obj.get(field, default) + if func and val not in ignore: + val = func(val) + return template % val if val not in ignore else default |