diff options
| author | Adam Thalhammer <s3544305@student.rmit.edu.au> | 2016-05-02 13:21:39 +1000 | 
|---|---|---|
| committer | Adam Thalhammer <s3544305@student.rmit.edu.au> | 2016-05-02 13:21:39 +1000 | 
| commit | 79a2e94e79e65cdf4898bc2dedb6a1bb4ca9af3c (patch) | |
| tree | 97cd368a2089b073d2f0c1638bc66bd75fe11443 | |
| parent | 686cc8963441c37105c0447f31c5ea21405be05a (diff) | |
| download | hypervideo-pre-79a2e94e79e65cdf4898bc2dedb6a1bb4ca9af3c.tar.lz hypervideo-pre-79a2e94e79e65cdf4898bc2dedb6a1bb4ca9af3c.tar.xz hypervideo-pre-79a2e94e79e65cdf4898bc2dedb6a1bb4ca9af3c.zip | |
Instead of replacing accented characters with an underscore when sanitizing file names in restricted mode, replace them with their non-accented equivalents fixes #9347
| -rw-r--r-- | test/test_utils.py | 9 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 9 | 
2 files changed, 14 insertions, 4 deletions
| diff --git a/test/test_utils.py b/test/test_utils.py index e16a6761b..0072ba241 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -139,8 +139,8 @@ class TestUtil(unittest.TestCase):          self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))          self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True)) -        tests = 'a\xe4b\u4e2d\u56fd\u7684c' -        self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c') +        tests = 'aäb\u4e2d\u56fd\u7684c' +        self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c')          self.assertTrue(sanitize_filename('\xf6', restricted=True) != '')  # No empty filename          forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#' @@ -155,6 +155,11 @@ class TestUtil(unittest.TestCase):          self.assertTrue(sanitize_filename('-', restricted=True) != '')          self.assertTrue(sanitize_filename(':', restricted=True) != '') +        self.assertEqual(sanitize_filename( +            'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', restricted=True), +            'AAAAAAAECEEEEIIIIDNOOOOOOUUUUYPssaaaaaaaeceeeeiiiionoooooouuuuypy') +        pass +      def test_sanitize_ids(self):          self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')          self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 7bcc85e2b..f74f62268 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -14,8 +14,8 @@ import email.utils  import errno  import functools  import gzip -import itertools  import io +import itertools  import json  import locale  import math @@ -24,8 +24,8 @@ import os  import pipes  import platform  import re -import ssl  import socket +import ssl  import struct  import subprocess  import sys @@ -365,6 +365,11 @@ def sanitize_filename(s, restricted=False, is_id=False):      Set is_id if this is not an arbitrary string, but an ID that should be kept if possible      """      def replace_insane(char): +        accents = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', +                           itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOUUUUYP', ['ss'], +                                           'aaaaaa', ['ae'], 'ceeeeiiiionoooooouuuuypy'))) +        if restricted and char in accents: +            return accents[char]          if char == '?' or ord(char) < 32 or ord(char) == 127:              return ''          elif char == '"': | 
