diff options
-rw-r--r-- | Dockerfile-debian-python3-sqlite | 3 | ||||
-rw-r--r-- | Dockerfile-fedora-python3-sqlite | 1 | ||||
-rw-r--r-- | docs/source/siteadmin/media-types.rst | 5 | ||||
-rw-r--r-- | docs/source/siteadmin/relnotes.rst | 1 | ||||
-rw-r--r-- | extlib/freesound/audioprocessing.py | 616 | ||||
l--------- | mediagoblin/media_types/audio/audioprocessing.py | 1 | ||||
-rw-r--r-- | mediagoblin/media_types/audio/audiotospectrogram.py | 297 | ||||
-rw-r--r-- | mediagoblin/media_types/audio/spectrogram.py | 362 | ||||
-rw-r--r-- | mediagoblin/media_types/audio/transcoders.py | 64 | ||||
-rw-r--r-- | mediagoblin/tests/test_audio.py | 1 | ||||
-rw-r--r-- | setup.py | 2 |
11 files changed, 311 insertions, 1042 deletions
diff --git a/Dockerfile-debian-python3-sqlite b/Dockerfile-debian-python3-sqlite index 42b2a964..ba30ecc5 100644 --- a/Dockerfile-debian-python3-sqlite +++ b/Dockerfile-debian-python3-sqlite @@ -81,7 +81,8 @@ gstreamer1.0-plugins-bad \ gstreamer1.0-plugins-base \ gstreamer1.0-plugins-good \ gstreamer1.0-plugins-ugly \ -python3-gst-1.0 +python3-gst-1.0 \ +python3-numpy # Install video dependencies. RUN apt-get install -y \ diff --git a/Dockerfile-fedora-python3-sqlite b/Dockerfile-fedora-python3-sqlite index cdaa8385..f9037934 100644 --- a/Dockerfile-fedora-python3-sqlite +++ b/Dockerfile-fedora-python3-sqlite @@ -43,6 +43,7 @@ which # gstreamer1.0-plugins-good \ # gstreamer1.0-plugins-ugly \ # python3-gst-1.0 \ +# python3-numpy # RUN apt-get install -y \ # gir1.2-gst-plugins-base-1.0 \ diff --git a/docs/source/siteadmin/media-types.rst b/docs/source/siteadmin/media-types.rst index c57c4430..9b2e4d9d 100644 --- a/docs/source/siteadmin/media-types.rst +++ b/docs/source/siteadmin/media-types.rst @@ -92,10 +92,11 @@ as whatever GStreamer plugins you want, good/bad/ugly): # Debian and co. sudo apt install python3-gst-1.0 gstreamer1.0-plugins-{base,bad,good,ugly} \ - gstreamer1.0-libav + gstreamer1.0-libav python3-numpy # Fedora and co. - sudo dnf install gstreamer1-plugins-{base,bad-free,good,ugly-free} + sudo dnf install gstreamer1-plugins-{base,bad-free,good,ugly-free} \ + python3-numpy Add ``[[mediagoblin.media_types.audio]]`` under the ``[plugins]`` section in your ``mediagoblin.ini`` and update MediaGoblin:: diff --git a/docs/source/siteadmin/relnotes.rst b/docs/source/siteadmin/relnotes.rst index 2cb1a5cf..86e31095 100644 --- a/docs/source/siteadmin/relnotes.rst +++ b/docs/source/siteadmin/relnotes.rst @@ -30,6 +30,7 @@ carefully, or at least skim over it. **Improvements:** - Drop Python 2 installation support (Ben Sturmfels) + - Reinstate Python 3 audio spectrograms [#5610] (Fernando Gutierrez) **Bug fixes:** diff --git a/extlib/freesound/audioprocessing.py b/extlib/freesound/audioprocessing.py deleted file mode 100644 index b002ff8a..00000000 --- a/extlib/freesound/audioprocessing.py +++ /dev/null @@ -1,616 +0,0 @@ -#!/usr/bin/env python -# processing.py -- various audio processing functions -# Copyright (C) 2008 MUSIC TECHNOLOGY GROUP (MTG) -# UNIVERSITAT POMPEU FABRA -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# -# Authors: -# Bram de Jong <bram.dejong at domain.com where domain in gmail> -# 2012, Joar Wandborg <first name at last name dot se> - -from PIL import Image, ImageDraw, ImageColor #@UnresolvedImport -from functools import partial -import math -import numpy -import os -import re -import signal - - -def get_sound_type(input_filename): - sound_type = os.path.splitext(input_filename.lower())[1].strip(".") - - if sound_type == "fla": - sound_type = "flac" - elif sound_type == "aif": - sound_type = "aiff" - - return sound_type - - -try: - import scikits.audiolab as audiolab -except ImportError: - print "WARNING: audiolab is not installed so wav2png will not work" -import subprocess - -class AudioProcessingException(Exception): - pass - -class TestAudioFile(object): - """A class that mimics audiolab.sndfile but generates noise instead of reading - a wave file. Additionally it can be told to have a "broken" header and thus crashing - in the middle of the file. Also useful for testing ultra-short files of 20 samples.""" - def __init__(self, num_frames, has_broken_header=False): - self.seekpoint = 0 - self.nframes = num_frames - self.samplerate = 44100 - self.channels = 1 - self.has_broken_header = has_broken_header - - def seek(self, seekpoint): - self.seekpoint = seekpoint - - def read_frames(self, frames_to_read): - if self.has_broken_header and self.seekpoint + frames_to_read > self.num_frames / 2: - raise RuntimeError() - - num_frames_left = self.num_frames - self.seekpoint - will_read = num_frames_left if num_frames_left < frames_to_read else frames_to_read - self.seekpoint += will_read - return numpy.random.random(will_read)*2 - 1 - - -def get_max_level(filename): - max_value = 0 - buffer_size = 4096 - audio_file = audiolab.Sndfile(filename, 'r') - n_samples_left = audio_file.nframes - - while n_samples_left: - to_read = min(buffer_size, n_samples_left) - - try: - samples = audio_file.read_frames(to_read) - except RuntimeError: - # this can happen with a broken header - break - - # convert to mono by selecting left channel only - if audio_file.channels > 1: - samples = samples[:,0] - - max_value = max(max_value, numpy.abs(samples).max()) - - n_samples_left -= to_read - - audio_file.close() - - return max_value - -class AudioProcessor(object): - """ - The audio processor processes chunks of audio an calculates the spectrac centroid and the peak - samples in that chunk of audio. - """ - def __init__(self, input_filename, fft_size, window_function=numpy.hanning): - max_level = get_max_level(input_filename) - - self.audio_file = audiolab.Sndfile(input_filename, 'r') - self.fft_size = fft_size - self.window = window_function(self.fft_size) - self.spectrum_range = None - self.lower = 100 - self.higher = 22050 - self.lower_log = math.log10(self.lower) - self.higher_log = math.log10(self.higher) - self.clip = lambda val, low, high: min(high, max(low, val)) - - # figure out what the maximum value is for an FFT doing the FFT of a DC signal - fft = numpy.fft.rfft(numpy.ones(fft_size) * self.window) - max_fft = (numpy.abs(fft)).max() - # set the scale to normalized audio and normalized FFT - self.scale = 1.0/max_level/max_fft if max_level > 0 else 1 - - def read(self, start, size, resize_if_less=False): - """ read size samples starting at start, if resize_if_less is True and less than size - samples are read, resize the array to size and fill with zeros """ - - # number of zeros to add to start and end of the buffer - add_to_start = 0 - add_to_end = 0 - - if start < 0: - # the first FFT window starts centered around zero - if size + start <= 0: - return numpy.zeros(size) if resize_if_less else numpy.array([]) - else: - self.audio_file.seek(0) - - add_to_start = -start # remember: start is negative! - to_read = size + start - - if to_read > self.audio_file.nframes: - add_to_end = to_read - self.audio_file.nframes - to_read = self.audio_file.nframes - else: - self.audio_file.seek(start) - - to_read = size - if start + to_read >= self.audio_file.nframes: - to_read = self.audio_file.nframes - start - add_to_end = size - to_read - - try: - samples = self.audio_file.read_frames(to_read) - except RuntimeError: - # this can happen for wave files with broken headers... - return numpy.zeros(size) if resize_if_less else numpy.zeros(2) - - # convert to mono by selecting left channel only - if self.audio_file.channels > 1: - samples = samples[:,0] - - if resize_if_less and (add_to_start > 0 or add_to_end > 0): - if add_to_start > 0: - samples = numpy.concatenate((numpy.zeros(add_to_start), samples), axis=1) - - if add_to_end > 0: - samples = numpy.resize(samples, size) - samples[size - add_to_end:] = 0 - - return samples - - - def spectral_centroid(self, seek_point, spec_range=110.0): - """ starting at seek_point read fft_size samples, and calculate the spectral centroid """ - - samples = self.read(seek_point - self.fft_size/2, self.fft_size, True) - - samples *= self.window - fft = numpy.fft.rfft(samples) - spectrum = self.scale * numpy.abs(fft) # normalized abs(FFT) between 0 and 1 - length = numpy.float64(spectrum.shape[0]) - - # scale the db spectrum from [- spec_range db ... 0 db] > [0..1] - db_spectrum = ((20*(numpy.log10(spectrum + 1e-60))).clip(-spec_range, 0.0) + spec_range)/spec_range - - energy = spectrum.sum() - spectral_centroid = 0 - - if energy > 1e-60: - # calculate the spectral centroid - - if self.spectrum_range == None: - self.spectrum_range = numpy.arange(length) - - spectral_centroid = (spectrum * self.spectrum_range).sum() / (energy * (length - 1)) * self.audio_file.samplerate * 0.5 - - # clip > log10 > scale between 0 and 1 - spectral_centroid = (math.log10(self.clip(spectral_centroid, self.lower, self.higher)) - self.lower_log) / (self.higher_log - self.lower_log) - - return (spectral_centroid, db_spectrum) - - - def peaks(self, start_seek, end_seek): - """ read all samples between start_seek and end_seek, then find the minimum and maximum peak - in that range. Returns that pair in the order they were found. So if min was found first, - it returns (min, max) else the other way around. """ - - # larger blocksizes are faster but take more mem... - # Aha, Watson, a clue, a tradeof! - block_size = 4096 - - max_index = -1 - max_value = -1 - min_index = -1 - min_value = 1 - - if start_seek < 0: - start_seek = 0 - - if end_seek > self.audio_file.nframes: - end_seek = self.audio_file.nframes - - if end_seek <= start_seek: - samples = self.read(start_seek, 1) - return (samples[0], samples[0]) - - if block_size > end_seek - start_seek: - block_size = end_seek - start_seek - - for i in range(start_seek, end_seek, block_size): - samples = self.read(i, block_size) - - local_max_index = numpy.argmax(samples) - local_max_value = samples[local_max_index] - - if local_max_value > max_value: - max_value = local_max_value - max_index = local_max_index - - local_min_index = numpy.argmin(samples) - local_min_value = samples[local_min_index] - - if local_min_value < min_value: - min_value = local_min_value - min_index = local_min_index - - return (min_value, max_value) if min_index < max_index else (max_value, min_value) - - -def interpolate_colors(colors, flat=False, num_colors=256): - """ given a list of colors, create a larger list of colors interpolating - the first one. If flatten is True a list of numers will be returned. If - False, a list of (r,g,b) tuples. num_colors is the number of colors wanted - in the final list """ - - palette = [] - - for i in range(num_colors): - index = (i * (len(colors) - 1))/(num_colors - 1.0) - index_int = int(index) - alpha = index - float(index_int) - - if alpha > 0: - r = (1.0 - alpha) * colors[index_int][0] + alpha * colors[index_int + 1][0] - g = (1.0 - alpha) * colors[index_int][1] + alpha * colors[index_int + 1][1] - b = (1.0 - alpha) * colors[index_int][2] + alpha * colors[index_int + 1][2] - else: - r = (1.0 - alpha) * colors[index_int][0] - g = (1.0 - alpha) * colors[index_int][1] - b = (1.0 - alpha) * colors[index_int][2] - - if flat: - palette.extend((int(r), int(g), int(b))) - else: - palette.append((int(r), int(g), int(b))) - - return palette - - -def desaturate(rgb, amount): - """ - desaturate colors by amount - amount == 0, no change - amount == 1, grey - """ - luminosity = sum(rgb) / 3.0 - desat = lambda color: color - amount * (color - luminosity) - - return tuple(map(int, map(desat, rgb))) - - -class WaveformImage(object): - """ - Given peaks and spectral centroids from the AudioProcessor, this class will construct - a wavefile image which can be saved as PNG. - """ - def __init__(self, image_width, image_height, palette=1): - if image_height % 2 == 0: - raise AudioProcessingException("Height should be uneven: images look much better at uneven height") - - if palette == 1: - background_color = (0,0,0) - colors = [ - (50,0,200), - (0,220,80), - (255,224,0), - (255,70,0), - ] - elif palette == 2: - background_color = (0,0,0) - colors = [self.color_from_value(value/29.0) for value in range(0,30)] - elif palette == 3: - background_color = (213, 217, 221) - colors = map( partial(desaturate, amount=0.7), [ - (50,0,200), - (0,220,80), - (255,224,0), - ]) - elif palette == 4: - background_color = (213, 217, 221) - colors = map( partial(desaturate, amount=0.8), [self.color_from_value(value/29.0) for value in range(0,30)]) - - self.image = Image.new("RGB", (image_width, image_height), background_color) - - self.image_width = image_width - self.image_height = image_height - - self.draw = ImageDraw.Draw(self.image) - self.previous_x, self.previous_y = None, None - - self.color_lookup = interpolate_colors(colors) - self.pix = self.image.load() - - def color_from_value(self, value): - """ given a value between 0 and 1, return an (r,g,b) tuple """ - - return ImageColor.getrgb("hsl(%d,%d%%,%d%%)" % (int( (1.0 - value) * 360 ), 80, 50)) - - def draw_peaks(self, x, peaks, spectral_centroid): - """ draw 2 peaks at x using the spectral_centroid for color """ - - y1 = self.image_height * 0.5 - peaks[0] * (self.image_height - 4) * 0.5 - y2 = self.image_height * 0.5 - peaks[1] * (self.image_height - 4) * 0.5 - - line_color = self.color_lookup[int(spectral_centroid*255.0)] - - if self.previous_y != None: - self.draw.line([self.previous_x, self.previous_y, x, y1, x, y2], line_color) - else: - self.draw.line([x, y1, x, y2], line_color) - - self.previous_x, self.previous_y = x, y2 - - self.draw_anti_aliased_pixels(x, y1, y2, line_color) - - def draw_anti_aliased_pixels(self, x, y1, y2, color): - """ vertical anti-aliasing at y1 and y2 """ - - y_max = max(y1, y2) - y_max_int = int(y_max) - alpha = y_max - y_max_int - - if alpha > 0.0 and alpha < 1.0 and y_max_int + 1 < self.image_height: - current_pix = self.pix[x, y_max_int + 1] - - r = int((1-alpha)*current_pix[0] + alpha*color[0]) - g = int((1-alpha)*current_pix[1] + alpha*color[1]) - b = int((1-alpha)*current_pix[2] + alpha*color[2]) - - self.pix[x, y_max_int + 1] = (r,g,b) - - y_min = min(y1, y2) - y_min_int = int(y_min) - alpha = 1.0 - (y_min - y_min_int) - - if alpha > 0.0 and alpha < 1.0 and y_min_int - 1 >= 0: - current_pix = self.pix[x, y_min_int - 1] - - r = int((1-alpha)*current_pix[0] + alpha*color[0]) - g = int((1-alpha)*current_pix[1] + alpha*color[1]) - b = int((1-alpha)*current_pix[2] + alpha*color[2]) - - self.pix[x, y_min_int - 1] = (r,g,b) - - def save(self, filename): - # draw a zero "zero" line - a = 25 - for x in range(self.image_width): - self.pix[x, self.image_height/2] = tuple(map(lambda p: p+a, self.pix[x, self.image_height/2])) - - self.image.save(filename) - - -class SpectrogramImage(object): - """ - Given spectra from the AudioProcessor, this class will construct a wavefile image which - can be saved as PNG. - """ - def __init__(self, image_width, image_height, fft_size): - self.image_width = image_width - self.image_height = image_height - self.fft_size = fft_size - - self.image = Image.new("RGBA", (image_height, image_width)) - - colors = [ - (0, 0, 0, 0), - (58/4, 68/4, 65/4, 255), - (80/2, 100/2, 153/2, 255), - (90, 180, 100, 255), - (224, 224, 44, 255), - (255, 60, 30, 255), - (255, 255, 255, 255) - ] - self.palette = interpolate_colors(colors) - - # generate the lookup which translates y-coordinate to fft-bin - self.y_to_bin = [] - f_min = 100.0 - f_max = 22050.0 - y_min = math.log10(f_min) - y_max = math.log10(f_max) - for y in range(self.image_height): - freq = math.pow(10.0, y_min + y / (image_height - 1.0) *(y_max - y_min)) - bin = freq / 22050.0 * (self.fft_size/2 + 1) - - if bin < self.fft_size/2: - alpha = bin - int(bin) - - self.y_to_bin.append((int(bin), alpha * 255)) - - # this is a bit strange, but using image.load()[x,y] = ... is - # a lot slower than using image.putadata and then rotating the image - # so we store all the pixels in an array and then create the image when saving - self.pixels = [] - - def draw_spectrum(self, x, spectrum): - # for all frequencies, draw the pixels - for (index, alpha) in self.y_to_bin: - self.pixels.append( self.palette[int((255.0-alpha) * spectrum[index] + alpha * spectrum[index + 1])] ) - - # if the FFT is too small to fill up the image, fill with black to the top - for y in range(len(self.y_to_bin), self.image_height): #@UnusedVariable - self.pixels.append(self.palette[0]) - - def save(self, filename, quality=80): - assert filename.lower().endswith(".jpg") - self.image.putdata(self.pixels) - self.image.transpose(Image.ROTATE_90).save(filename, quality=quality) - - -def create_wave_images(input_filename, output_filename_w, output_filename_s, image_width, image_height, fft_size, progress_callback=None): - """ - Utility function for creating both wavefile and spectrum images from an audio input file. - """ - processor = AudioProcessor(input_filename, fft_size, numpy.hanning) - samples_per_pixel = processor.audio_file.nframes / float(image_width) - - waveform = WaveformImage(image_width, image_height) - spectrogram = SpectrogramImage(image_width, image_height, fft_size) - - for x in range(image_width): - - if progress_callback and x % (image_width/10) == 0: - progress_callback((x*100)/image_width) - - seek_point = int(x * samples_per_pixel) - next_seek_point = int((x + 1) * samples_per_pixel) - - (spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point) - peaks = processor.peaks(seek_point, next_seek_point) - - waveform.draw_peaks(x, peaks, spectral_centroid) - spectrogram.draw_spectrum(x, db_spectrum) - - if progress_callback: - progress_callback(100) - - waveform.save(output_filename_w) - spectrogram.save(output_filename_s) - - -class NoSpaceLeftException(Exception): - pass - -def convert_to_pcm(input_filename, output_filename): - """ - converts any audio file type to pcm audio - """ - - if not os.path.exists(input_filename): - raise AudioProcessingException("file %s does not exist" % input_filename) - - sound_type = get_sound_type(input_filename) - - if sound_type == "mp3": - cmd = ["lame", "--decode", input_filename, output_filename] - elif sound_type == "ogg": - cmd = ["oggdec", input_filename, "-o", output_filename] - elif sound_type == "flac": - cmd = ["flac", "-f", "-d", "-s", "-o", output_filename, input_filename] - else: - return False - - process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - (stdout, stderr) = process.communicate() - - if process.returncode != 0 or not os.path.exists(output_filename): - if "No space left on device" in stderr + " " + stdout: - raise NoSpaceLeftException - raise AudioProcessingException("failed converting to pcm data:\n" + " ".join(cmd) + "\n" + stderr + "\n" + stdout) - - return True - - -def stereofy_and_find_info(stereofy_executble_path, input_filename, output_filename): - """ - converts a pcm wave file to two channel, 16 bit integer - """ - - if not os.path.exists(input_filename): - raise AudioProcessingException("file %s does not exist" % input_filename) - - cmd = [stereofy_executble_path, "--input", input_filename, "--output", output_filename] - - process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - (stdout, stderr) = process.communicate() - - if process.returncode != 0 or not os.path.exists(output_filename): - if "No space left on device" in stderr + " " + stdout: - raise NoSpaceLeftException - raise AudioProcessingException("failed calling stereofy data:\n" + " ".join(cmd) + "\n" + stderr + "\n" + stdout) - - stdout = (stdout + " " + stderr).replace("\n", " ") - - duration = 0 - m = re.match(r".*#duration (?P<duration>[\d\.]+).*", stdout) - if m != None: - duration = float(m.group("duration")) - - channels = 0 - m = re.match(r".*#channels (?P<channels>\d+).*", stdout) - if m != None: - channels = float(m.group("channels")) - - samplerate = 0 - m = re.match(r".*#samplerate (?P<samplerate>\d+).*", stdout) - if m != None: - samplerate = float(m.group("samplerate")) - - bitdepth = None - m = re.match(r".*#bitdepth (?P<bitdepth>\d+).*", stdout) - if m != None: - bitdepth = float(m.group("bitdepth")) - - bitrate = (os.path.getsize(input_filename) * 8.0) / 1024.0 / duration if duration > 0 else 0 - - return dict(duration=duration, channels=channels, samplerate=samplerate, bitrate=bitrate, bitdepth=bitdepth) - - -def convert_to_mp3(input_filename, output_filename, quality=70): - """ - converts the incoming wave file to a mp3 file - """ - - if not os.path.exists(input_filename): - raise AudioProcessingException("file %s does not exist" % input_filename) - - command = ["lame", "--silent", "--abr", str(quality), input_filename, output_filename] - - process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - (stdout, stderr) = process.communicate() - - if process.returncode != 0 or not os.path.exists(output_filename): - raise AudioProcessingException(stdout) - -def convert_to_ogg(input_filename, output_filename, quality=1): - """ - converts the incoming wave file to n ogg file - """ - - if not os.path.exists(input_filename): - raise AudioProcessingException("file %s does not exist" % input_filename) - - command = ["oggenc", "-q", str(quality), input_filename, "-o", output_filename] - - process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - (stdout, stderr) = process.communicate() - - if process.returncode != 0 or not os.path.exists(output_filename): - raise AudioProcessingException(stdout) - -def convert_using_ffmpeg(input_filename, output_filename): - """ - converts the incoming wave file to stereo pcm using fffmpeg - """ - TIMEOUT = 3 * 60 - def alarm_handler(signum, frame): - raise AudioProcessingException("timeout while waiting for ffmpeg") - - if not os.path.exists(input_filename): - raise AudioProcessingException("file %s does not exist" % input_filename) - - command = ["ffmpeg", "-y", "-i", input_filename, "-ac","1","-acodec", "pcm_s16le", "-ar", "44100", output_filename] - - process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - signal.signal(signal.SIGALRM,alarm_handler) - signal.alarm(TIMEOUT) - (stdout, stderr) = process.communicate() - signal.alarm(0) - if process.returncode != 0 or not os.path.exists(output_filename): - raise AudioProcessingException(stdout) diff --git a/mediagoblin/media_types/audio/audioprocessing.py b/mediagoblin/media_types/audio/audioprocessing.py deleted file mode 120000 index c5e3c52c..00000000 --- a/mediagoblin/media_types/audio/audioprocessing.py +++ /dev/null @@ -1 +0,0 @@ -../../../extlib/freesound/audioprocessing.py
\ No newline at end of file diff --git a/mediagoblin/media_types/audio/audiotospectrogram.py b/mediagoblin/media_types/audio/audiotospectrogram.py new file mode 100644 index 00000000..2d2f9423 --- /dev/null +++ b/mediagoblin/media_types/audio/audiotospectrogram.py @@ -0,0 +1,297 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from PIL import Image +import soundfile +import numpy + +SPECTROGRAM_MAX_FREQUENCY = 8000 # Old spectrogram.py sets upper limit to 22050 but + # usually there isn't much detail in higher frequencies +SPECTROGRAM_MIN_FREQUENCY = 20 +SPECTROGRAM_DB_RANGE = 110 +# Color palette copied from old spectrogram.py +SPECTROGRAM_COLORS = [(58 / 4, 68 / 4, 65 / 4), + (80 / 2, 100 / 2, 153 / 2), + (90, 180, 100), + (224, 224, 44), + (255, 60, 30), + (255, 255, 255)] +# The purpose of this table is to give more horizontal +# real estate to shorter sounds files. +# Format: (pixels, (range_min, range_max)) +# For sounds with a duration >= _range_min_ and < _range_max_ +# give _pixel_ horizontal pixels for each second of audio. +SPECTROGRAM_WIDTH_PERSECOND = [(240, ( 0, 20)), + (120, ( 20, 30)), + ( 60, ( 30, 60)), + ( 30, ( 60, 120)), + ( 15, (120, 240)), + ( 6, (240, 100000))] # Upper limit is arbitrary. Sounds with longer + # duration will still get assigned to the last bucket +SPECTROGRAM_HEIGHT = 500 + +class AudioBlocksFFT: + + def __init__(self, fileName, blockSize, overlap, minFreq, maxFreq, numBins = None, windowFunction = numpy.hanning): + self.audioData = soundfile.SoundFile(fileName, 'r') + self.numChannels = self.audioData.channels + self.sampleRate = self.audioData.samplerate + self.minFreq = minFreq + self.maxFreq = maxFreq + self.blockSize = blockSize + self.numBins = numBins + self.overlap = overlap + self.windowValues = windowFunction(blockSize) + self.peakFFTValue = 0 + try: + # PySoundFile V0.10.0 adds SoundFile.frames property and deprecates __len__() + self.totalSamples = self.audioData.frames + except AttributeError: + self.totalSamples = len(self.audioData) + + def peakFFTAmplitude(self): + """ + Peak amplitude of FFT for all blocks + """ + return self.peakFFTValue + + def totalSeconds(self): + """ + Total length in seconds + """ + return self.totalSamples / self.sampleRate + + def _filterFreqRange(self, fftAmplitude): + """ + Given a FFT amplitudes array keep only bins between minFreq, maxFreq + """ + nyquistFreq = self.sampleRate // 2 + numBins = len(fftAmplitude) + sliceWidth = nyquistFreq / numBins + startIdx = int(self.minFreq / sliceWidth) + endIdx = int(self.maxFreq / sliceWidth) + if numBins <= endIdx: + fftAmplitude = numpy.pad(fftAmplitude, (0, 1 + endIdx - numBins), 'constant', constant_values=(0)) + else: + fftAmplitude = fftAmplitude[:endIdx + 1] + return fftAmplitude[startIdx:] + + def _resizeAmplitudeArray(self, amplitudeValues, newSize): + """ + Resize amplitude values array + """ + if len(amplitudeValues) == newSize: + return amplitudeValues + if newSize > len(amplitudeValues): + # Resize up + result = numpy.zeros(newSize) + for idx in range(0, newSize): + srcIdx = (idx * len(amplitudeValues)) // newSize + result[idx] = amplitudeValues[srcIdx] + return result + # Resize down keeping peaks + result = numpy.zeros(newSize) + idx = 0 + for slice in numpy.array_split(amplitudeValues, newSize): + result[idx] = slice.max() + idx = idx + 1 + return result + + def __iter__(self): + """ + Read a block of audio data and compute FFT amplitudes + """ + self.audioData.seek(0) + for fileBlock in self.audioData.blocks(blocksize = self.blockSize, overlap = self.overlap): + # Mix down all channels to mono + audioBlock = fileBlock[:,0] + for channel in range(1, self.numChannels): + audioBlock = numpy.add(audioBlock, fileBlock[:,channel]) + # On the last block it may be necessary to pad with zeros + if len(audioBlock) < self.blockSize: + audioBlock = numpy.pad(audioBlock, (0, self.blockSize - len(audioBlock)), 'constant', constant_values=(0)) + # Compute FFT amplitude of this block + fftAmplitude = self._filterFreqRange(numpy.abs(numpy.fft.rfft(audioBlock * self.windowValues))) + self.peakFFTValue = max(self.peakFFTValue, fftAmplitude.max()) + # Resize if requested + if not self.numBins is None: + fftAmplitude = self._resizeAmplitudeArray(fftAmplitude, self.numBins) + yield (fftAmplitude, self.audioData.tell() / self.sampleRate) + +class SpectrogramColorMap: + + def __init__(self, columnData): + self.columnData = columnData + self.width = len(columnData) + self.height = len(columnData[0]) + self._buildColorPalette() + + def _colorBetween(self, beginColor, endColor, step): + """ + Interpolate between two colors + """ + rS, gS, bS = beginColor + rE, gE, bE = endColor + r = int(numpy.sqrt((1.0 - step) * (rS * rS) + step * (rE * rE))) + g = int(numpy.sqrt((1.0 - step) * (gS * gS) + step * (gE * gE))) + b = int(numpy.sqrt((1.0 - step) * (bS * bS) + step * (bE * bE))) + r = r if r < 256 else 255 + g = g if g < 256 else 255 + b = b if b < 256 else 255 + return (r, g, b) + + def _buildColorPalette(self): + """ + Build color palette + """ + colorPoints = SPECTROGRAM_COLORS + self.colors = [] + for i in range(1, len(colorPoints)): + for p in range(0, 200): + self.colors.append(self._colorBetween(colorPoints[i - 1], colorPoints[i], p / 200)) + + def getColorData(self, progressCallback = None): + """ + Map spectrogram data to pixel colors + """ + pixels = [self.colors[0]] * (self.width * self.height) + for x in range(0, self.width): + for y in range(0, self.height): + idx = x + self.width * y + amplitudeVal = self.columnData[x][self.height - y - 1] + colorIdx = int(len(self.colors) * amplitudeVal) + colorIdx = colorIdx if colorIdx > 0 else 0 + colorIdx = colorIdx if colorIdx < len(self.colors) else len(self.colors) - 1 + pixels[idx] = self.colors[colorIdx] + if progressCallback: + progressCallback(100 * x / self.width) + return pixels + +def drawSpectrogram(audioFileName, imageFileName, fftSize = 1024, fftOverlap = 0, progressCallback = None): + """ + Draw a spectrogram of the audio file + """ + + # Fraction of total work for each step + STEP_PERCENTAGE_FFT = 40 + STEP_PERCENTAGE_NORMALIZE = 5 + STEP_PERCENTAGE_ACCUMULATE = 10 + STEP_PERCENTAGE_DRAW = 40 + # Give last 5% to saving the file + + PERCENTAGE_REPORT_STEP = 2 + + nextReportedPercentage = PERCENTAGE_REPORT_STEP + def wrapProgressCallback(percentage): + nonlocal nextReportedPercentage + percentage = int(percentage) + if percentage >= nextReportedPercentage: + if progressCallback: + progressCallback(percentage) + nextReportedPercentage = (1 + percentage // PERCENTAGE_REPORT_STEP) * PERCENTAGE_REPORT_STEP + + def mapColorsProgressCallback(percentage): + wrapProgressCallback(STEP_PERCENTAGE_FFT + STEP_PERCENTAGE_NORMALIZE + STEP_PERCENTAGE_ACCUMULATE + + (STEP_PERCENTAGE_DRAW * (percentage / 100))) + + imageWidthLookup = SPECTROGRAM_WIDTH_PERSECOND + imageHeight = SPECTROGRAM_HEIGHT + + # Load audio file and compute FFT amplitudes + fftBlocksSource = AudioBlocksFFT(audioFileName, + fftSize, overlap = fftOverlap, + minFreq = SPECTROGRAM_MIN_FREQUENCY, maxFreq = SPECTROGRAM_MAX_FREQUENCY, + numBins = imageHeight) + soundLength = fftBlocksSource.totalSeconds() + fftAmplitudeBlocks = [] + for fftAmplitude, positionSeconds in fftBlocksSource: + fftAmplitudeBlocks.append(fftAmplitude) + wrapProgressCallback(STEP_PERCENTAGE_FFT * (positionSeconds / soundLength)) + + totalProgress = STEP_PERCENTAGE_FFT + + # Normalize FFT amplitude and convert to log scale + specRange = SPECTROGRAM_DB_RANGE + for i in range(0, len(fftAmplitudeBlocks)): + normalized = numpy.divide(fftAmplitudeBlocks[i], fftBlocksSource.peakFFTAmplitude()) + fftAmplitudeBlocks[i] = ((20*(numpy.log10(normalized + 1e-60))).clip(-specRange, 0.0) + specRange)/specRange + wrapProgressCallback(totalProgress + STEP_PERCENTAGE_NORMALIZE * (i / len(fftAmplitudeBlocks))) + + totalProgress = totalProgress + STEP_PERCENTAGE_NORMALIZE + + # Compute spectrogram width in pixels + imageWidthPerSecond, lengthRage = imageWidthLookup[-1] + for widthPerSecond, lengthLimit in imageWidthLookup: + limitLow, limitHigh = lengthLimit + if soundLength > limitLow and soundLength <= limitHigh: + imageWidthPerSecond = widthPerSecond + break + imageWidth = int(imageWidthPerSecond * soundLength) + + # Compute spectrogram values + columnValues = numpy.zeros(imageHeight) + spectrogram = [] + x = 0 + for idx in range(0, len(fftAmplitudeBlocks)): + newX = (idx * imageWidth) // len(fftAmplitudeBlocks) + if newX != x: + # Save column + spectrogram.append(numpy.copy(columnValues)) + x = newX + columnValues.fill(0) + columnValues = numpy.maximum(columnValues, fftAmplitudeBlocks[idx]) + wrapProgressCallback(totalProgress + STEP_PERCENTAGE_ACCUMULATE * (idx / len(fftAmplitudeBlocks))) + spectrogram.append(numpy.copy(columnValues)) + + totalProgress = totalProgress + STEP_PERCENTAGE_ACCUMULATE + + # Draw spectrogram + imageWidth = len(spectrogram) + colorData = SpectrogramColorMap(spectrogram).getColorData(progressCallback = mapColorsProgressCallback) + + totalProgress = totalProgress + STEP_PERCENTAGE_DRAW + + # Save final image + image = Image.new('RGB', (imageWidth, imageHeight)) + image.putdata(colorData) + image.save(imageFileName) + + if progressCallback: + progressCallback(100) + + +if __name__ == "__main__": + + import sys + + def printProgress(p): + sys.stdout.write("\rProgress : {}%".format(p)) + sys.stdout.flush() + + if not (len(sys.argv) == 2 or len(sys.argv) == 3): + print("Usage:\n{0} input_file [output_file]".format(sys.argv[0])) + exit() + + audioFile = sys.argv[1] + + if 3 == len(sys.argv): + outputFile = sys.argv[2] + else: + outputFile = 'spectrogram.png' + + sys.stdout.write("Input : {0}\nOutput : {1}\n".format(audioFile, outputFile)) + drawSpectrogram(audioFile, outputFile, progressCallback = printProgress) + sys.stdout.write("\nDone!\n") diff --git a/mediagoblin/media_types/audio/spectrogram.py b/mediagoblin/media_types/audio/spectrogram.py deleted file mode 100644 index 433bb300..00000000 --- a/mediagoblin/media_types/audio/spectrogram.py +++ /dev/null @@ -1,362 +0,0 @@ -# processing.py -- various audio processing functions -# Copyright (C) 2008 MUSIC TECHNOLOGY GROUP (MTG) -# UNIVERSITAT POMPEU FABRA -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# -# Authors: -# Bram de Jong <bram.dejong at domain.com where domain in gmail> -# 2012, Joar Wandborg <first name at last name dot se> - -from __future__ import print_function - -try: - from PIL import Image -except ImportError: - import Image -import math -import numpy - -try: - import scikits.audiolab as audiolab -except ImportError: - print("WARNING: audiolab is not installed so wav2png will not work") - - -class AudioProcessingException(Exception): - pass - - -class SpectrogramImage(object): - def __init__(self, image_size, fft_size): - self.image_width, self.image_height = image_size - self.fft_size = fft_size - - colors = [ - (0, 0, 0, 0), - (58 / 4, 68 / 4, 65 / 4, 255), - (80 / 2, 100 / 2, 153 / 2, 255), - (90, 180, 100, 255), - (224, 224, 44, 255), - (255, 60, 30, 255), - (255, 255, 255, 255) - ] - - self.palette = interpolate_colors(colors) - - # Generate lookup table for y-coordinate from fft-bin - self.y_to_bin = [] - - fft_min = 100.0 - fft_max = 22050.0 # kHz? - - y_min = math.log10(fft_min) - y_max = math.log10(fft_max) - - for y in range(self.image_height): - freq = math.pow( - 10.0, - y_min + y / (self.image_height - 1.0) - * (y_max - y_min)) - - fft_bin = freq / fft_max * (self.fft_size / 2 + 1) - - if fft_bin < self.fft_size / 2: - alpha = fft_bin - int(fft_bin) - - self.y_to_bin.append((int(fft_bin), alpha * 255)) - - # this is a bit strange, but using image.load()[x,y] = ... is - # a lot slower than using image.putadata and then rotating the image - # so we store all the pixels in an array and then create the image when saving - self.pixels = [] - - def draw_spectrum(self, x, spectrum): - # for all frequencies, draw the pixels - for index, alpha in self.y_to_bin: - self.pixels.append( - self.palette[int((255.0 - alpha) * spectrum[index] - + alpha * spectrum[index + 1])]) - - # if the FFT is too small to fill up the image, fill with black to the top - for y in range(len(self.y_to_bin), self.image_height): - self.pixels.append(self.palette[0]) - - def save(self, filename, quality=90): - self.image = Image.new( - 'RGBA', - (self.image_height, self.image_width)) - - self.image.putdata(self.pixels) - self.image.transpose(Image.ROTATE_90).save( - filename, - quality=quality) - - -class AudioProcessor(object): - """ - The audio processor processes chunks of audio an calculates the spectrac centroid and the peak - samples in that chunk of audio. - """ - def __init__(self, input_filename, fft_size, window_function=numpy.hanning): - max_level = get_max_level(input_filename) - - self.audio_file = audiolab.Sndfile(input_filename, 'r') - self.fft_size = fft_size - self.window = window_function(self.fft_size) - self.spectrum_range = None - self.lower = 100 - self.higher = 22050 - self.lower_log = math.log10(self.lower) - self.higher_log = math.log10(self.higher) - self.clip = lambda val, low, high: min(high, max(low, val)) - - # figure out what the maximum value is for an FFT doing the FFT of a DC signal - fft = numpy.fft.rfft(numpy.ones(fft_size) * self.window) - max_fft = (numpy.abs(fft)).max() - - # set the scale to normalized audio and normalized FFT - self.scale = 1.0 / max_level / max_fft if max_level > 0 else 1 - - def read(self, start, size, resize_if_less=False): - """ read size samples starting at start, if resize_if_less is True and less than size - samples are read, resize the array to size and fill with zeros """ - - # number of zeros to add to start and end of the buffer - add_to_start = 0 - add_to_end = 0 - - if start < 0: - # the first FFT window starts centered around zero - if size + start <= 0: - return numpy.zeros(size) if resize_if_less else numpy.array([]) - else: - self.audio_file.seek(0) - - add_to_start = - start # remember: start is negative! - to_read = size + start - - if to_read > self.audio_file.nframes: - add_to_end = to_read - self.audio_file.nframes - to_read = self.audio_file.nframes - else: - self.audio_file.seek(start) - - to_read = size - if start + to_read >= self.audio_file.nframes: - to_read = self.audio_file.nframes - start - add_to_end = size - to_read - - try: - samples = self.audio_file.read_frames(to_read) - except RuntimeError: - # this can happen for wave files with broken headers... - return numpy.zeros(size) if resize_if_less else numpy.zeros(2) - - # convert to mono by selecting left channel only - if self.audio_file.channels > 1: - samples = samples[:,0] - - if resize_if_less and (add_to_start > 0 or add_to_end > 0): - if add_to_start > 0: - samples = numpy.concatenate((numpy.zeros(add_to_start), samples), axis=1) - - if add_to_end > 0: - samples = numpy.resize(samples, size) - samples[size - add_to_end:] = 0 - - return samples - - def spectral_centroid(self, seek_point, spec_range=110.0): - """ starting at seek_point read fft_size samples, and calculate the spectral centroid """ - - samples = self.read(seek_point - self.fft_size/2, self.fft_size, True) - - samples *= self.window - fft = numpy.fft.rfft(samples) - spectrum = self.scale * numpy.abs(fft) # normalized abs(FFT) between 0 and 1 - - length = numpy.float64(spectrum.shape[0]) - - # scale the db spectrum from [- spec_range db ... 0 db] > [0..1] - db_spectrum = ((20*(numpy.log10(spectrum + 1e-60))).clip(-spec_range, 0.0) + spec_range)/spec_range - - energy = spectrum.sum() - spectral_centroid = 0 - - if energy > 1e-60: - # calculate the spectral centroid - - if self.spectrum_range == None: - self.spectrum_range = numpy.arange(length) - - spectral_centroid = (spectrum * self.spectrum_range).sum() / (energy * (length - 1)) * self.audio_file.samplerate * 0.5 - - # clip > log10 > scale between 0 and 1 - spectral_centroid = (math.log10(self.clip(spectral_centroid, self.lower, self.higher)) - self.lower_log) / (self.higher_log - self.lower_log) - - return (spectral_centroid, db_spectrum) - - - def peaks(self, start_seek, end_seek): - """ read all samples between start_seek and end_seek, then find the minimum and maximum peak - in that range. Returns that pair in the order they were found. So if min was found first, - it returns (min, max) else the other way around. """ - - # larger blocksizes are faster but take more mem... - # Aha, Watson, a clue, a tradeof! - block_size = 4096 - - max_index = -1 - max_value = -1 - min_index = -1 - min_value = 1 - - if start_seek < 0: - start_seek = 0 - - if end_seek > self.audio_file.nframes: - end_seek = self.audio_file.nframes - - if end_seek <= start_seek: - samples = self.read(start_seek, 1) - return (samples[0], samples[0]) - - if block_size > end_seek - start_seek: - block_size = end_seek - start_seek - - for i in range(start_seek, end_seek, block_size): - samples = self.read(i, block_size) - - local_max_index = numpy.argmax(samples) - local_max_value = samples[local_max_index] - - if local_max_value > max_value: - max_value = local_max_value - max_index = local_max_index - - local_min_index = numpy.argmin(samples) - local_min_value = samples[local_min_index] - - if local_min_value < min_value: - min_value = local_min_value - min_index = local_min_index - - return (min_value, max_value) if min_index < max_index else (max_value, min_value) - - -def create_spectrogram_image(source_filename, output_filename, - image_size, fft_size, progress_callback=None): - - processor = AudioProcessor(source_filename, fft_size, numpy.hamming) - samples_per_pixel = processor.audio_file.nframes / float(image_size[0]) - - spectrogram = SpectrogramImage(image_size, fft_size) - - for x in range(image_size[0]): - if progress_callback and x % (image_size[0] / 10) == 0: - progress_callback((x * 100) / image_size[0]) - - seek_point = int(x * samples_per_pixel) - next_seek_point = int((x + 1) * samples_per_pixel) - - (spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point) - - spectrogram.draw_spectrum(x, db_spectrum) - - if progress_callback: - progress_callback(100) - - spectrogram.save(output_filename) - - -def interpolate_colors(colors, flat=False, num_colors=256): - - palette = [] - - for i in range(num_colors): - # TODO: What does this do? - index = ( - (i * - (len(colors) - 1) # 7 - ) # 0..7..14..21..28... - / - (num_colors - 1.0) # 255.0 - ) - - # TODO: What is the meaning of 'alpha' in this context? - alpha = index - round(index) - - channels = list('rgb') - values = dict() - - for k, v in zip(range(len(channels)), channels): - if alpha > 0: - values[v] = ( - (1.0 - alpha) - * - colors[int(index)][k] - + - alpha * colors[int(index) + 1][k] - ) - else: - values[v] = ( - (1.0 - alpha) - * - colors[int(index)][k] - ) - - if flat: - palette.extend( - tuple(int(values[i]) for i in channels)) - else: - palette.append( - tuple(int(values[i]) for i in channels)) - - return palette - - -def get_max_level(filename): - max_value = 0 - buffer_size = 4096 - audio_file = audiolab.Sndfile(filename, 'r') - n_samples_left = audio_file.nframes - - while n_samples_left: - to_read = min(buffer_size, n_samples_left) - - try: - samples = audio_file.read_frames(to_read) - except RuntimeError: - # this can happen with a broken header - break - - # convert to mono by selecting left channel only - if audio_file.channels > 1: - samples = samples[:,0] - - max_value = max(max_value, numpy.abs(samples).max()) - - n_samples_left -= to_read - - audio_file.close() - - return max_value - -if __name__ == '__main__': - import sys - sys.argv[4] = int(sys.argv[4]) - sys.argv[3] = tuple([int(i) for i in sys.argv[3].split('x')]) - - create_spectrogram_image(*sys.argv[1:]) diff --git a/mediagoblin/media_types/audio/transcoders.py b/mediagoblin/media_types/audio/transcoders.py index a67f4429..11ecf163 100644 --- a/mediagoblin/media_types/audio/transcoders.py +++ b/mediagoblin/media_types/audio/transcoders.py @@ -43,45 +43,15 @@ gi.require_version('Gst', '1.0') from gi.repository import GObject, Gst Gst.init(None) - -# TODO: Now unused - remove. -class Python2AudioThumbnailer(object): +class Python3AudioThumbnailer(object): def __init__(self): _log.info('Initializing {0}'.format(self.__class__.__name__)) def spectrogram(self, src, dst, **kw): - import numpy - # This third-party bundled module is Python 2-only. - from mediagoblin.media_types.audio import audioprocessing - - width = kw['width'] - height = int(kw.get('height', float(width) * 0.3)) - fft_size = kw.get('fft_size', 2048) + from mediagoblin.media_types.audio import audiotospectrogram + fft_size = kw.get('fft_size', 1024) callback = kw.get('progress_callback') - processor = audioprocessing.AudioProcessor( - src, - fft_size, - numpy.hanning) - - samples_per_pixel = processor.audio_file.nframes / float(width) - - spectrogram = audioprocessing.SpectrogramImage(width, height, fft_size) - - for x in range(width): - if callback and x % (width / 10) == 0: - callback((x * 100) / width) - - seek_point = int(x * samples_per_pixel) - - (spectral_centroid, db_spectrum) = processor.spectral_centroid( - seek_point) - - spectrogram.draw_spectrum(x, db_spectrum) - - if callback: - callback(100) - - spectrogram.save(dst) + audiotospectrogram.drawSpectrogram(src, dst, fftSize = fft_size, progressCallback = callback) def thumbnail_spectrogram(self, src, dst, thumb_size): ''' @@ -111,31 +81,7 @@ class Python2AudioThumbnailer(object): th.save(dst) - -class DummyAudioThumbnailer(Python2AudioThumbnailer): - """A thumbnailer that just outputs a stock image. - - The Python package used for audio spectrograms, "scikits.audiolab", does not - support Python 3 and is a constant source of problems for people installing - MediaGoblin. Until the feature is rewritten, this thumbnailer class simply - provides a generic image. - - TODO: Consider Python 3 compatible interfaces to libsndfile, such as - https://pypi.python.org/pypi/PySoundFile/0.9.0.post1 as discussed here - https://issues.mediagoblin.org/ticket/5467#comment:6 - - """ - def spectrogram(self, src, dst, **kw): - # Using PIL here in case someone wants to swap out the image for a PNG. - # This will convert to JPEG, where simply copying the file won't. - img = Image.open('mediagoblin/static/images/media_thumbs/video.jpg') - img.save(dst) - - -# Due to recurring problems with spectrograms under Python 2, and the fact we're -# soon dropping Python 2 support, we're disabling spectrogram thumbnails. See #5594. -AudioThumbnailer = DummyAudioThumbnailer - +AudioThumbnailer = Python3AudioThumbnailer class AudioTranscoder(object): def __init__(self): diff --git a/mediagoblin/tests/test_audio.py b/mediagoblin/tests/test_audio.py index 9826ceb1..62d582ff 100644 --- a/mediagoblin/tests/test_audio.py +++ b/mediagoblin/tests/test_audio.py @@ -25,7 +25,6 @@ import imghdr #os.environ['GST_DEBUG'] = '4,python:4' pytest.importorskip("gi.repository.Gst") -pytest.importorskip("scikits.audiolab") import gi gi.require_version('Gst', '1.0') from gi.repository import Gst @@ -72,6 +72,8 @@ install_requires = [ 'PyLD<2.0.0', # Breaks a Python 3 test if >= 2.0.0. 'ExifRead>=2.0.0', 'email-validator', # Seems that WTForms must have dropped this. + 'soundfile<=0.10.999' # Tested with 0.10.3.post1 + # This is optional: # 'translitcodec', # For now we're expecting that users will install this from |