Reinstate audio spectrograms on Python 3 [#5610].

The `audiotospectrogram` module is a complete rewrite of the existing spectrogram code with support for Python 3. This allows us to drop the bundled `freesound` library and Python 2-only `audioprocessing` and `spectrogram` modules. Signed-off-by: Ben Sturmfels <ben@sturm.com.au>
author: Fernando Gutierrez <fergtm@nil.mx> 2021-03-03 22:16:37 +1100
committer: Ben Sturmfels <ben@sturm.com.au> 2021-03-03 22:21:11 +1100
commit: c2e93da0ce746eb7d6ffe79eec4f84c5d844328d (patch)
tree: aa60e1aa03862563d4e26a14a518f9d3799a5461 /mediagoblin/media_types
parent: 83429a8658986ccd1c54e7085d5df31b01fe16fc (diff)
download: mediagoblin-c2e93da0ce746eb7d6ffe79eec4f84c5d844328d.tar.lz
mediagoblin-c2e93da0ce746eb7d6ffe79eec4f84c5d844328d.tar.xz
mediagoblin-c2e93da0ce746eb7d6ffe79eec4f84c5d844328d.zip
4 files changed, 302 insertions, 422 deletions
diff --git a/mediagoblin/media_types/audio/audioprocessing.py b/mediagoblin/media_types/audio/audioprocessing.py
deleted file mode 120000
index c5e3c52c..00000000
--- a/mediagoblin/media_types/audio/audioprocessing.py
+++ /dev/null
@@ -1 +0,0 @@
-../../../extlib/freesound/audioprocessing.py
-\ No newline at end of file
diff --git a/mediagoblin/media_types/audio/audiotospectrogram.py b/mediagoblin/media_types/audio/audiotospectrogram.py
new file mode 100644
index 00000000..2d2f9423
--- /dev/null
+++ b/mediagoblin/media_types/audio/audiotospectrogram.py
@@ -0,0 +1,297 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from PIL import Image
+import soundfile
+import numpy
+
+SPECTROGRAM_MAX_FREQUENCY = 8000 # Old spectrogram.py sets upper limit to 22050 but
+                                 # usually there isn't much detail in higher frequencies
+SPECTROGRAM_MIN_FREQUENCY = 20
+SPECTROGRAM_DB_RANGE      = 110
+# Color palette copied from old spectrogram.py
+SPECTROGRAM_COLORS = [(58 / 4, 68 / 4, 65 / 4),
+                      (80 / 2, 100 / 2, 153 / 2),
+                      (90, 180, 100),
+                      (224, 224, 44),
+                      (255, 60, 30),
+                      (255, 255, 255)]
+# The purpose of this table is to give more horizontal
+# real estate to shorter sounds files.
+# Format: (pixels, (range_min, range_max))
+# For sounds with a duration >= _range_min_ and < _range_max_
+# give _pixel_ horizontal pixels for each second of audio.
+SPECTROGRAM_WIDTH_PERSECOND = [(240, (  0,     20)),
+                               (120, ( 20,     30)),
+                               ( 60, ( 30,     60)),
+                               ( 30, ( 60,    120)),
+                               ( 15, (120,    240)),
+                               (  6, (240, 100000))] # Upper limit is arbitrary. Sounds with longer
+                                                     # duration will still get assigned to the last bucket
+SPECTROGRAM_HEIGHT = 500
+
+class AudioBlocksFFT:
+
+    def __init__(self, fileName, blockSize, overlap, minFreq, maxFreq, numBins = None, windowFunction = numpy.hanning):
+        self.audioData = soundfile.SoundFile(fileName, 'r')
+        self.numChannels = self.audioData.channels
+        self.sampleRate = self.audioData.samplerate
+        self.minFreq = minFreq
+        self.maxFreq = maxFreq
+        self.blockSize = blockSize
+        self.numBins = numBins
+        self.overlap = overlap
+        self.windowValues = windowFunction(blockSize)
+        self.peakFFTValue = 0
+        try:
+            # PySoundFile V0.10.0 adds SoundFile.frames property and deprecates __len__()
+            self.totalSamples = self.audioData.frames
+        except AttributeError:
+            self.totalSamples = len(self.audioData)
+
+    def peakFFTAmplitude(self):
+        """
+        Peak amplitude of FFT for all blocks
+        """
+        return self.peakFFTValue
+
+    def totalSeconds(self):
+        """
+        Total length in seconds
+        """
+        return self.totalSamples / self.sampleRate
+
+    def _filterFreqRange(self, fftAmplitude):
+        """
+        Given a FFT amplitudes array keep only bins between minFreq, maxFreq
+        """
+        nyquistFreq = self.sampleRate // 2
+        numBins = len(fftAmplitude)
+        sliceWidth = nyquistFreq / numBins
+        startIdx = int(self.minFreq / sliceWidth)
+        endIdx = int(self.maxFreq / sliceWidth)
+        if numBins <= endIdx:
+            fftAmplitude = numpy.pad(fftAmplitude, (0, 1 + endIdx - numBins), 'constant', constant_values=(0))
+        else:
+            fftAmplitude = fftAmplitude[:endIdx + 1]
+        return fftAmplitude[startIdx:]
+
+    def _resizeAmplitudeArray(self, amplitudeValues, newSize):
+        """
+        Resize amplitude values array
+        """
+        if len(amplitudeValues) == newSize:
+            return amplitudeValues
+        if newSize > len(amplitudeValues):
+            # Resize up
+            result = numpy.zeros(newSize)
+            for idx in range(0, newSize):
+                srcIdx = (idx * len(amplitudeValues)) // newSize
+                result[idx] = amplitudeValues[srcIdx]
+            return result
+        # Resize down keeping peaks
+        result = numpy.zeros(newSize)
+        idx = 0
+        for slice in numpy.array_split(amplitudeValues, newSize):
+            result[idx] = slice.max()
+            idx = idx + 1
+        return result
+
+    def __iter__(self):
+        """
+        Read a block of audio data and compute FFT amplitudes
+        """
+        self.audioData.seek(0)
+        for fileBlock in self.audioData.blocks(blocksize = self.blockSize, overlap = self.overlap):
+            # Mix down all channels to mono
+            audioBlock = fileBlock[:,0]
+            for channel in range(1, self.numChannels):
+                audioBlock = numpy.add(audioBlock, fileBlock[:,channel])
+            # On the last block it may be necessary to pad with zeros
+            if len(audioBlock) < self.blockSize:
+                audioBlock = numpy.pad(audioBlock, (0, self.blockSize - len(audioBlock)), 'constant', constant_values=(0))
+            # Compute FFT amplitude of this block
+            fftAmplitude = self._filterFreqRange(numpy.abs(numpy.fft.rfft(audioBlock * self.windowValues)))
+            self.peakFFTValue = max(self.peakFFTValue, fftAmplitude.max())
+            # Resize if requested
+            if not self.numBins is None:
+                fftAmplitude = self._resizeAmplitudeArray(fftAmplitude, self.numBins)
+            yield (fftAmplitude, self.audioData.tell() / self.sampleRate)
+
+class SpectrogramColorMap:
+
+    def __init__(self, columnData):
+        self.columnData = columnData
+        self.width = len(columnData)
+        self.height = len(columnData[0])
+        self._buildColorPalette()
+
+    def _colorBetween(self, beginColor, endColor, step):
+        """
+        Interpolate between two colors
+        """
+        rS, gS, bS = beginColor
+        rE, gE, bE = endColor
+        r = int(numpy.sqrt((1.0 - step) * (rS * rS) + step * (rE * rE)))
+        g = int(numpy.sqrt((1.0 - step) * (gS * gS) + step * (gE * gE)))
+        b = int(numpy.sqrt((1.0 - step) * (bS * bS) + step * (bE * bE)))
+        r = r if r < 256 else 255
+        g = g if g < 256 else 255
+        b = b if b < 256 else 255
+        return (r, g, b)
+
+    def _buildColorPalette(self):
+        """
+        Build color palette
+        """
+        colorPoints = SPECTROGRAM_COLORS
+        self.colors = []
+        for i in range(1, len(colorPoints)):
+            for p in range(0, 200):
+                self.colors.append(self._colorBetween(colorPoints[i - 1], colorPoints[i], p / 200))
+
+    def getColorData(self, progressCallback = None):
+        """
+        Map spectrogram data to pixel colors
+        """
+        pixels = [self.colors[0]] * (self.width * self.height)
+        for x in range(0, self.width):
+            for y in range(0, self.height):
+                idx = x + self.width * y
+                amplitudeVal = self.columnData[x][self.height - y - 1]
+                colorIdx = int(len(self.colors) * amplitudeVal)
+                colorIdx = colorIdx if colorIdx > 0 else 0
+                colorIdx = colorIdx if colorIdx < len(self.colors) else len(self.colors) - 1
+                pixels[idx] = self.colors[colorIdx]
+            if progressCallback:
+                progressCallback(100 * x / self.width)
+        return pixels
+
+def drawSpectrogram(audioFileName, imageFileName, fftSize = 1024, fftOverlap = 0, progressCallback = None):
+    """
+    Draw a spectrogram of the audio file
+    """
+
+    # Fraction of total work for each step
+    STEP_PERCENTAGE_FFT        = 40
+    STEP_PERCENTAGE_NORMALIZE  = 5
+    STEP_PERCENTAGE_ACCUMULATE = 10
+    STEP_PERCENTAGE_DRAW       = 40
+    # Give last 5% to saving the file
+
+    PERCENTAGE_REPORT_STEP = 2
+
+    nextReportedPercentage = PERCENTAGE_REPORT_STEP
+    def wrapProgressCallback(percentage):
+        nonlocal nextReportedPercentage
+        percentage = int(percentage)
+        if percentage >= nextReportedPercentage:
+            if progressCallback:
+                progressCallback(percentage)
+            nextReportedPercentage = (1 + percentage // PERCENTAGE_REPORT_STEP) * PERCENTAGE_REPORT_STEP
+
+    def mapColorsProgressCallback(percentage):
+        wrapProgressCallback(STEP_PERCENTAGE_FFT + STEP_PERCENTAGE_NORMALIZE + STEP_PERCENTAGE_ACCUMULATE
+                             + (STEP_PERCENTAGE_DRAW * (percentage / 100)))
+
+    imageWidthLookup = SPECTROGRAM_WIDTH_PERSECOND
+    imageHeight = SPECTROGRAM_HEIGHT
+
+    # Load audio file and compute FFT amplitudes
+    fftBlocksSource = AudioBlocksFFT(audioFileName,
+                                     fftSize, overlap = fftOverlap,
+                                     minFreq = SPECTROGRAM_MIN_FREQUENCY, maxFreq = SPECTROGRAM_MAX_FREQUENCY,
+                                     numBins = imageHeight)
+    soundLength = fftBlocksSource.totalSeconds()
+    fftAmplitudeBlocks = []
+    for fftAmplitude, positionSeconds in fftBlocksSource:
+        fftAmplitudeBlocks.append(fftAmplitude)
+        wrapProgressCallback(STEP_PERCENTAGE_FFT * (positionSeconds / soundLength))
+
+    totalProgress = STEP_PERCENTAGE_FFT
+
+    # Normalize FFT amplitude and convert to log scale
+    specRange = SPECTROGRAM_DB_RANGE
+    for i in range(0, len(fftAmplitudeBlocks)):
+        normalized = numpy.divide(fftAmplitudeBlocks[i], fftBlocksSource.peakFFTAmplitude())
+        fftAmplitudeBlocks[i] = ((20*(numpy.log10(normalized + 1e-60))).clip(-specRange, 0.0) + specRange)/specRange
+        wrapProgressCallback(totalProgress + STEP_PERCENTAGE_NORMALIZE * (i / len(fftAmplitudeBlocks)))
+
+    totalProgress = totalProgress + STEP_PERCENTAGE_NORMALIZE
+
+    # Compute spectrogram width in pixels
+    imageWidthPerSecond, lengthRage = imageWidthLookup[-1]
+    for widthPerSecond, lengthLimit in imageWidthLookup:
+        limitLow, limitHigh = lengthLimit
+        if soundLength > limitLow and soundLength <= limitHigh:
+            imageWidthPerSecond = widthPerSecond
+            break
+    imageWidth = int(imageWidthPerSecond * soundLength)
+
+    # Compute spectrogram values
+    columnValues = numpy.zeros(imageHeight)
+    spectrogram = []
+    x = 0
+    for idx in range(0, len(fftAmplitudeBlocks)):
+        newX = (idx * imageWidth) // len(fftAmplitudeBlocks)
+        if newX != x:
+            # Save column
+            spectrogram.append(numpy.copy(columnValues))
+            x = newX
+            columnValues.fill(0)
+        columnValues = numpy.maximum(columnValues, fftAmplitudeBlocks[idx])
+        wrapProgressCallback(totalProgress + STEP_PERCENTAGE_ACCUMULATE * (idx / len(fftAmplitudeBlocks)))
+    spectrogram.append(numpy.copy(columnValues))
+
+    totalProgress = totalProgress + STEP_PERCENTAGE_ACCUMULATE
+
+    # Draw spectrogram
+    imageWidth = len(spectrogram)
+    colorData = SpectrogramColorMap(spectrogram).getColorData(progressCallback = mapColorsProgressCallback)
+
+    totalProgress = totalProgress + STEP_PERCENTAGE_DRAW
+
+    # Save final image
+    image = Image.new('RGB', (imageWidth, imageHeight))
+    image.putdata(colorData)
+    image.save(imageFileName)
+
+    if progressCallback:
+        progressCallback(100)
+
+
+if __name__ == "__main__":
+
+    import sys
+
+    def printProgress(p):
+        sys.stdout.write("\rProgress : {}%".format(p))
+        sys.stdout.flush()
+
+    if not (len(sys.argv) == 2 or len(sys.argv) == 3):
+        print("Usage:\n{0} input_file [output_file]".format(sys.argv[0]))
+        exit()
+
+    audioFile = sys.argv[1]
+
+    if 3 == len(sys.argv):
+        outputFile = sys.argv[2]
+    else:
+        outputFile = 'spectrogram.png'
+
+    sys.stdout.write("Input    : {0}\nOutput   : {1}\n".format(audioFile, outputFile))
+    drawSpectrogram(audioFile, outputFile, progressCallback = printProgress)
+    sys.stdout.write("\nDone!\n")
diff --git a/mediagoblin/media_types/audio/spectrogram.py b/mediagoblin/media_types/audio/spectrogram.py
deleted file mode 100644
index 433bb300..00000000
--- a/mediagoblin/media_types/audio/spectrogram.py
+++ /dev/null
@@ -1,362 +0,0 @@
-# processing.py -- various audio processing functions
-# Copyright (C) 2008 MUSIC TECHNOLOGY GROUP (MTG)
-#                    UNIVERSITAT POMPEU FABRA
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-# Authors:
-#   Bram de Jong <bram.dejong at domain.com where domain in gmail>
-#   2012, Joar Wandborg <first name at last name dot se>
-
-from __future__ import print_function
-
-try:
-    from PIL import Image
-except ImportError:
-    import Image
-import math
-import numpy
-
-try:
-    import scikits.audiolab as audiolab
-except ImportError:
-    print("WARNING: audiolab is not installed so wav2png will not work")
-
-
-class AudioProcessingException(Exception):
-    pass
-
-
-class SpectrogramImage(object):
-    def __init__(self, image_size, fft_size):
-        self.image_width, self.image_height = image_size
-        self.fft_size = fft_size
-
-        colors = [
-            (0, 0, 0, 0),
-            (58 / 4, 68 / 4, 65 / 4, 255),
-            (80 / 2, 100 / 2, 153 / 2, 255),
-            (90, 180, 100, 255),
-            (224, 224, 44, 255),
-            (255, 60, 30, 255),
-            (255, 255, 255, 255)
-         ]
-
-        self.palette = interpolate_colors(colors)
-
-        # Generate lookup table for y-coordinate from fft-bin
-        self.y_to_bin = []
-
-        fft_min = 100.0
-        fft_max = 22050.0  # kHz?
-
-        y_min = math.log10(fft_min)
-        y_max = math.log10(fft_max)
-
-        for y in range(self.image_height):
-            freq = math.pow(
-                    10.0,
-                    y_min + y / (self.image_height - 1.0)
-                    * (y_max - y_min))
-
-            fft_bin = freq / fft_max * (self.fft_size / 2 + 1)
-
-            if fft_bin < self.fft_size / 2:
-                alpha = fft_bin - int(fft_bin)
-
-                self.y_to_bin.append((int(fft_bin), alpha * 255))
-
-        # this is a bit strange, but using image.load()[x,y] = ... is
-        # a lot slower than using image.putadata and then rotating the image
-        # so we store all the pixels in an array and then create the image when saving
-        self.pixels = []
-
-    def draw_spectrum(self, x, spectrum):
-        # for all frequencies, draw the pixels
-        for index, alpha in self.y_to_bin:
-            self.pixels.append(
-                    self.palette[int((255.0 - alpha) * spectrum[index]
-                        + alpha * spectrum[index + 1])])
-
-        # if the FFT is too small to fill up the image, fill with black to the top
-        for y in range(len(self.y_to_bin), self.image_height):
-            self.pixels.append(self.palette[0])
-
-    def save(self, filename, quality=90):
-        self.image = Image.new(
-                'RGBA',
-                (self.image_height, self.image_width))
-
-        self.image.putdata(self.pixels)
-        self.image.transpose(Image.ROTATE_90).save(
-                filename,
-                quality=quality)
-
-
-class AudioProcessor(object):
-    """
-    The audio processor processes chunks of audio an calculates the spectrac centroid and the peak
-    samples in that chunk of audio.
-    """
-    def __init__(self, input_filename, fft_size, window_function=numpy.hanning):
-        max_level = get_max_level(input_filename)
-
-        self.audio_file = audiolab.Sndfile(input_filename, 'r')
-        self.fft_size = fft_size
-        self.window = window_function(self.fft_size)
-        self.spectrum_range = None
-        self.lower = 100
-        self.higher = 22050
-        self.lower_log = math.log10(self.lower)
-        self.higher_log = math.log10(self.higher)
-        self.clip = lambda val, low, high: min(high, max(low, val))
-
-        # figure out what the maximum value is for an FFT doing the FFT of a DC signal
-        fft = numpy.fft.rfft(numpy.ones(fft_size) * self.window)
-        max_fft = (numpy.abs(fft)).max()
-
-        # set the scale to normalized audio and normalized FFT
-        self.scale = 1.0 / max_level / max_fft if max_level > 0 else 1
-
-    def read(self, start, size, resize_if_less=False):
-        """ read size samples starting at start, if resize_if_less is True and less than size
-        samples are read, resize the array to size and fill with zeros """
-
-        # number of zeros to add to start and end of the buffer
-        add_to_start = 0
-        add_to_end = 0
-
-        if start < 0:
-            # the first FFT window starts centered around zero
-            if size + start <= 0:
-                return numpy.zeros(size) if resize_if_less else numpy.array([])
-            else:
-                self.audio_file.seek(0)
-
-                add_to_start = - start  # remember: start is negative!
-                to_read = size + start
-
-                if to_read > self.audio_file.nframes:
-                    add_to_end = to_read - self.audio_file.nframes
-                    to_read = self.audio_file.nframes
-        else:
-            self.audio_file.seek(start)
-
-            to_read = size
-            if start + to_read >= self.audio_file.nframes:
-                to_read = self.audio_file.nframes - start
-                add_to_end = size - to_read
-
-        try:
-            samples = self.audio_file.read_frames(to_read)
-        except RuntimeError:
-            # this can happen for wave files with broken headers...
-            return numpy.zeros(size) if resize_if_less else numpy.zeros(2)
-
-        # convert to mono by selecting left channel only
-        if self.audio_file.channels > 1:
-            samples = samples[:,0]
-
-        if resize_if_less and (add_to_start > 0 or add_to_end > 0):
-            if add_to_start > 0:
-                samples = numpy.concatenate((numpy.zeros(add_to_start), samples), axis=1)
-
-            if add_to_end > 0:
-                samples = numpy.resize(samples, size)
-                samples[size - add_to_end:] = 0
-
-        return samples
-
-    def spectral_centroid(self, seek_point, spec_range=110.0):
-        """ starting at seek_point read fft_size samples, and calculate the spectral centroid """
-
-        samples = self.read(seek_point - self.fft_size/2, self.fft_size, True)
-
-        samples *= self.window
-        fft = numpy.fft.rfft(samples)
-        spectrum = self.scale * numpy.abs(fft)  # normalized abs(FFT) between 0 and 1
-
-        length = numpy.float64(spectrum.shape[0])
-
-        # scale the db spectrum from [- spec_range db ... 0 db] > [0..1]
-        db_spectrum = ((20*(numpy.log10(spectrum + 1e-60))).clip(-spec_range, 0.0) + spec_range)/spec_range
-
-        energy = spectrum.sum()
-        spectral_centroid = 0
-
-        if energy > 1e-60:
-            # calculate the spectral centroid
-
-            if self.spectrum_range == None:
-                self.spectrum_range = numpy.arange(length)
-
-            spectral_centroid = (spectrum * self.spectrum_range).sum() / (energy * (length - 1)) * self.audio_file.samplerate * 0.5
-
-            # clip > log10 > scale between 0 and 1
-            spectral_centroid = (math.log10(self.clip(spectral_centroid, self.lower, self.higher)) - self.lower_log) / (self.higher_log - self.lower_log)
-
-        return (spectral_centroid, db_spectrum)
-
-
-    def peaks(self, start_seek, end_seek):
-        """ read all samples between start_seek and end_seek, then find the minimum and maximum peak
-        in that range. Returns that pair in the order they were found. So if min was found first,
-        it returns (min, max) else the other way around. """
-
-        # larger blocksizes are faster but take more mem...
-        # Aha, Watson, a clue, a tradeof!
-        block_size = 4096
-
-        max_index = -1
-        max_value = -1
-        min_index = -1
-        min_value = 1
-
-        if start_seek < 0:
-            start_seek = 0
-
-        if end_seek > self.audio_file.nframes:
-            end_seek = self.audio_file.nframes
-
-        if end_seek <= start_seek:
-            samples = self.read(start_seek, 1)
-            return (samples[0], samples[0])
-
-        if block_size > end_seek - start_seek:
-            block_size = end_seek - start_seek
-
-        for i in range(start_seek, end_seek, block_size):
-            samples = self.read(i, block_size)
-
-            local_max_index = numpy.argmax(samples)
-            local_max_value = samples[local_max_index]
-
-            if local_max_value > max_value:
-                max_value = local_max_value
-                max_index = local_max_index
-
-            local_min_index = numpy.argmin(samples)
-            local_min_value = samples[local_min_index]
-
-            if local_min_value < min_value:
-                min_value = local_min_value
-                min_index = local_min_index
-
-        return (min_value, max_value) if min_index < max_index else (max_value, min_value)
-
-
-def create_spectrogram_image(source_filename, output_filename,
-        image_size, fft_size, progress_callback=None):
-
-    processor = AudioProcessor(source_filename, fft_size, numpy.hamming)
-    samples_per_pixel = processor.audio_file.nframes / float(image_size[0])
-
-    spectrogram = SpectrogramImage(image_size, fft_size)
-
-    for x in range(image_size[0]):
-        if progress_callback and x % (image_size[0] / 10) == 0:
-            progress_callback((x * 100) / image_size[0])
-
-        seek_point = int(x * samples_per_pixel)
-        next_seek_point = int((x + 1) * samples_per_pixel)
-
-        (spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point)
-
-        spectrogram.draw_spectrum(x, db_spectrum)
-
-    if progress_callback:
-        progress_callback(100)
-
-    spectrogram.save(output_filename)
-
-
-def interpolate_colors(colors, flat=False, num_colors=256):
-
-    palette = []
-
-    for i in range(num_colors):
-        # TODO: What does this do?
-        index = (
-                (i *
-                    (len(colors) - 1)  # 7
-                )  # 0..7..14..21..28...
-            /
-                (num_colors - 1.0)  # 255.0
-            )
-
-        # TODO: What is the meaning of 'alpha' in this context?
-        alpha = index - round(index)
-
-        channels = list('rgb')
-        values = dict()
-
-        for k, v in zip(range(len(channels)), channels):
-            if alpha > 0:
-                values[v] = (
-                        (1.0 - alpha)
-                    *
-                        colors[int(index)][k]
-                    +
-                        alpha * colors[int(index) + 1][k]
-                    )
-            else:
-                values[v] = (
-                        (1.0 - alpha)
-                    *
-                        colors[int(index)][k]
-                    )
-
-        if flat:
-            palette.extend(
-                tuple(int(values[i]) for i in channels))
-        else:
-            palette.append(
-                tuple(int(values[i]) for i in channels))
-
-    return palette
-
-
-def get_max_level(filename):
-    max_value = 0
-    buffer_size = 4096
-    audio_file = audiolab.Sndfile(filename, 'r')
-    n_samples_left = audio_file.nframes
-
-    while n_samples_left:
-        to_read = min(buffer_size, n_samples_left)
-
-        try:
-            samples = audio_file.read_frames(to_read)
-        except RuntimeError:
-            # this can happen with a broken header
-            break
-
-        # convert to mono by selecting left channel only
-        if audio_file.channels > 1:
-            samples = samples[:,0]
-
-        max_value = max(max_value, numpy.abs(samples).max())
-
-        n_samples_left -= to_read
-
-    audio_file.close()
-
-    return max_value
-
-if __name__ == '__main__':
-    import sys
-    sys.argv[4] = int(sys.argv[4])
-    sys.argv[3] = tuple([int(i) for i in sys.argv[3].split('x')])
-
-    create_spectrogram_image(*sys.argv[1:])
diff --git a/mediagoblin/media_types/audio/transcoders.py b/mediagoblin/media_types/audio/transcoders.py
index a67f4429..11ecf163 100644
--- a/mediagoblin/media_types/audio/transcoders.py
+++ b/mediagoblin/media_types/audio/transcoders.py
@@ -43,45 +43,15 @@ gi.require_version('Gst', '1.0')
 from gi.repository import GObject, Gst
 Gst.init(None)
 
-
-# TODO: Now unused - remove.
-class Python2AudioThumbnailer(object):
+class Python3AudioThumbnailer(object):
     def __init__(self):
         _log.info('Initializing {0}'.format(self.__class__.__name__))
 
     def spectrogram(self, src, dst, **kw):
-        import numpy
-        # This third-party bundled module is Python 2-only.
-        from mediagoblin.media_types.audio import audioprocessing
-
-        width = kw['width']
-        height = int(kw.get('height', float(width) * 0.3))
-        fft_size = kw.get('fft_size', 2048)
+        from mediagoblin.media_types.audio import audiotospectrogram
+        fft_size = kw.get('fft_size', 1024)
         callback = kw.get('progress_callback')
-        processor = audioprocessing.AudioProcessor(
-            src,
-            fft_size,
-            numpy.hanning)
-
-        samples_per_pixel = processor.audio_file.nframes / float(width)
-
-        spectrogram = audioprocessing.SpectrogramImage(width, height, fft_size)
-
-        for x in range(width):
-            if callback and x % (width / 10) == 0:
-                callback((x * 100) / width)
-
-            seek_point = int(x * samples_per_pixel)
-
-            (spectral_centroid, db_spectrum) = processor.spectral_centroid(
-                seek_point)
-
-            spectrogram.draw_spectrum(x, db_spectrum)
-
-        if callback:
-            callback(100)
-
-        spectrogram.save(dst)
+        audiotospectrogram.drawSpectrogram(src, dst, fftSize = fft_size, progressCallback = callback)
 
     def thumbnail_spectrogram(self, src, dst, thumb_size):
         '''
@@ -111,31 +81,7 @@ class Python2AudioThumbnailer(object):
 
         th.save(dst)
 
-
-class DummyAudioThumbnailer(Python2AudioThumbnailer):
-    """A thumbnailer that just outputs a stock image.
-
-    The Python package used for audio spectrograms, "scikits.audiolab", does not
-    support Python 3 and is a constant source of problems for people installing
-    MediaGoblin. Until the feature is rewritten, this thumbnailer class simply
-    provides a generic image.
-
-    TODO: Consider Python 3 compatible interfaces to libsndfile, such as
-    https://pypi.python.org/pypi/PySoundFile/0.9.0.post1 as discussed here
-    https://issues.mediagoblin.org/ticket/5467#comment:6
-
-    """
-    def spectrogram(self, src, dst, **kw):
-        # Using PIL here in case someone wants to swap out the image for a PNG.
-        # This will convert to JPEG, where simply copying the file won't.
-        img = Image.open('mediagoblin/static/images/media_thumbs/video.jpg')
-        img.save(dst)
-
-
-# Due to recurring problems with spectrograms under Python 2, and the fact we're
-# soon dropping Python 2 support, we're disabling spectrogram thumbnails. See #5594.
-AudioThumbnailer = DummyAudioThumbnailer
-
+AudioThumbnailer = Python3AudioThumbnailer
 
 class AudioTranscoder(object):
     def __init__(self):
author	Fernando Gutierrez <fergtm@nil.mx>	2021-03-03 22:16:37 +1100
committer	Ben Sturmfels <ben@sturm.com.au>	2021-03-03 22:21:11 +1100
commit	c2e93da0ce746eb7d6ffe79eec4f84c5d844328d (patch)
tree	aa60e1aa03862563d4e26a14a518f9d3799a5461 /mediagoblin/media_types
parent	83429a8658986ccd1c54e7085d5df31b01fe16fc (diff)
download	mediagoblin-c2e93da0ce746eb7d6ffe79eec4f84c5d844328d.tar.lz mediagoblin-c2e93da0ce746eb7d6ffe79eec4f84c5d844328d.tar.xz mediagoblin-c2e93da0ce746eb7d6ffe79eec4f84c5d844328d.zip