aboutsummaryrefslogtreecommitdiffstats
path: root/mediagoblin/media_types
diff options
context:
space:
mode:
authorFernando Gutierrez <fergtm@nil.mx>2021-03-03 22:16:37 +1100
committerBen Sturmfels <ben@sturm.com.au>2021-03-03 22:21:11 +1100
commitc2e93da0ce746eb7d6ffe79eec4f84c5d844328d (patch)
treeaa60e1aa03862563d4e26a14a518f9d3799a5461 /mediagoblin/media_types
parent83429a8658986ccd1c54e7085d5df31b01fe16fc (diff)
downloadmediagoblin-c2e93da0ce746eb7d6ffe79eec4f84c5d844328d.tar.lz
mediagoblin-c2e93da0ce746eb7d6ffe79eec4f84c5d844328d.tar.xz
mediagoblin-c2e93da0ce746eb7d6ffe79eec4f84c5d844328d.zip
Reinstate audio spectrograms on Python 3 [#5610].
The `audiotospectrogram` module is a complete rewrite of the existing spectrogram code with support for Python 3. This allows us to drop the bundled `freesound` library and Python 2-only `audioprocessing` and `spectrogram` modules. Signed-off-by: Ben Sturmfels <ben@sturm.com.au>
Diffstat (limited to 'mediagoblin/media_types')
l---------mediagoblin/media_types/audio/audioprocessing.py1
-rw-r--r--mediagoblin/media_types/audio/audiotospectrogram.py297
-rw-r--r--mediagoblin/media_types/audio/spectrogram.py362
-rw-r--r--mediagoblin/media_types/audio/transcoders.py64
4 files changed, 302 insertions, 422 deletions
diff --git a/mediagoblin/media_types/audio/audioprocessing.py b/mediagoblin/media_types/audio/audioprocessing.py
deleted file mode 120000
index c5e3c52c..00000000
--- a/mediagoblin/media_types/audio/audioprocessing.py
+++ /dev/null
@@ -1 +0,0 @@
-../../../extlib/freesound/audioprocessing.py \ No newline at end of file
diff --git a/mediagoblin/media_types/audio/audiotospectrogram.py b/mediagoblin/media_types/audio/audiotospectrogram.py
new file mode 100644
index 00000000..2d2f9423
--- /dev/null
+++ b/mediagoblin/media_types/audio/audiotospectrogram.py
@@ -0,0 +1,297 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+from PIL import Image
+import soundfile
+import numpy
+
+SPECTROGRAM_MAX_FREQUENCY = 8000 # Old spectrogram.py sets upper limit to 22050 but
+ # usually there isn't much detail in higher frequencies
+SPECTROGRAM_MIN_FREQUENCY = 20
+SPECTROGRAM_DB_RANGE = 110
+# Color palette copied from old spectrogram.py
+SPECTROGRAM_COLORS = [(58 / 4, 68 / 4, 65 / 4),
+ (80 / 2, 100 / 2, 153 / 2),
+ (90, 180, 100),
+ (224, 224, 44),
+ (255, 60, 30),
+ (255, 255, 255)]
+# The purpose of this table is to give more horizontal
+# real estate to shorter sounds files.
+# Format: (pixels, (range_min, range_max))
+# For sounds with a duration >= _range_min_ and < _range_max_
+# give _pixel_ horizontal pixels for each second of audio.
+SPECTROGRAM_WIDTH_PERSECOND = [(240, ( 0, 20)),
+ (120, ( 20, 30)),
+ ( 60, ( 30, 60)),
+ ( 30, ( 60, 120)),
+ ( 15, (120, 240)),
+ ( 6, (240, 100000))] # Upper limit is arbitrary. Sounds with longer
+ # duration will still get assigned to the last bucket
+SPECTROGRAM_HEIGHT = 500
+
+class AudioBlocksFFT:
+
+ def __init__(self, fileName, blockSize, overlap, minFreq, maxFreq, numBins = None, windowFunction = numpy.hanning):
+ self.audioData = soundfile.SoundFile(fileName, 'r')
+ self.numChannels = self.audioData.channels
+ self.sampleRate = self.audioData.samplerate
+ self.minFreq = minFreq
+ self.maxFreq = maxFreq
+ self.blockSize = blockSize
+ self.numBins = numBins
+ self.overlap = overlap
+ self.windowValues = windowFunction(blockSize)
+ self.peakFFTValue = 0
+ try:
+ # PySoundFile V0.10.0 adds SoundFile.frames property and deprecates __len__()
+ self.totalSamples = self.audioData.frames
+ except AttributeError:
+ self.totalSamples = len(self.audioData)
+
+ def peakFFTAmplitude(self):
+ """
+ Peak amplitude of FFT for all blocks
+ """
+ return self.peakFFTValue
+
+ def totalSeconds(self):
+ """
+ Total length in seconds
+ """
+ return self.totalSamples / self.sampleRate
+
+ def _filterFreqRange(self, fftAmplitude):
+ """
+ Given a FFT amplitudes array keep only bins between minFreq, maxFreq
+ """
+ nyquistFreq = self.sampleRate // 2
+ numBins = len(fftAmplitude)
+ sliceWidth = nyquistFreq / numBins
+ startIdx = int(self.minFreq / sliceWidth)
+ endIdx = int(self.maxFreq / sliceWidth)
+ if numBins <= endIdx:
+ fftAmplitude = numpy.pad(fftAmplitude, (0, 1 + endIdx - numBins), 'constant', constant_values=(0))
+ else:
+ fftAmplitude = fftAmplitude[:endIdx + 1]
+ return fftAmplitude[startIdx:]
+
+ def _resizeAmplitudeArray(self, amplitudeValues, newSize):
+ """
+ Resize amplitude values array
+ """
+ if len(amplitudeValues) == newSize:
+ return amplitudeValues
+ if newSize > len(amplitudeValues):
+ # Resize up
+ result = numpy.zeros(newSize)
+ for idx in range(0, newSize):
+ srcIdx = (idx * len(amplitudeValues)) // newSize
+ result[idx] = amplitudeValues[srcIdx]
+ return result
+ # Resize down keeping peaks
+ result = numpy.zeros(newSize)
+ idx = 0
+ for slice in numpy.array_split(amplitudeValues, newSize):
+ result[idx] = slice.max()
+ idx = idx + 1
+ return result
+
+ def __iter__(self):
+ """
+ Read a block of audio data and compute FFT amplitudes
+ """
+ self.audioData.seek(0)
+ for fileBlock in self.audioData.blocks(blocksize = self.blockSize, overlap = self.overlap):
+ # Mix down all channels to mono
+ audioBlock = fileBlock[:,0]
+ for channel in range(1, self.numChannels):
+ audioBlock = numpy.add(audioBlock, fileBlock[:,channel])
+ # On the last block it may be necessary to pad with zeros
+ if len(audioBlock) < self.blockSize:
+ audioBlock = numpy.pad(audioBlock, (0, self.blockSize - len(audioBlock)), 'constant', constant_values=(0))
+ # Compute FFT amplitude of this block
+ fftAmplitude = self._filterFreqRange(numpy.abs(numpy.fft.rfft(audioBlock * self.windowValues)))
+ self.peakFFTValue = max(self.peakFFTValue, fftAmplitude.max())
+ # Resize if requested
+ if not self.numBins is None:
+ fftAmplitude = self._resizeAmplitudeArray(fftAmplitude, self.numBins)
+ yield (fftAmplitude, self.audioData.tell() / self.sampleRate)
+
+class SpectrogramColorMap:
+
+ def __init__(self, columnData):
+ self.columnData = columnData
+ self.width = len(columnData)
+ self.height = len(columnData[0])
+ self._buildColorPalette()
+
+ def _colorBetween(self, beginColor, endColor, step):
+ """
+ Interpolate between two colors
+ """
+ rS, gS, bS = beginColor
+ rE, gE, bE = endColor
+ r = int(numpy.sqrt((1.0 - step) * (rS * rS) + step * (rE * rE)))
+ g = int(numpy.sqrt((1.0 - step) * (gS * gS) + step * (gE * gE)))
+ b = int(numpy.sqrt((1.0 - step) * (bS * bS) + step * (bE * bE)))
+ r = r if r < 256 else 255
+ g = g if g < 256 else 255
+ b = b if b < 256 else 255
+ return (r, g, b)
+
+ def _buildColorPalette(self):
+ """
+ Build color palette
+ """
+ colorPoints = SPECTROGRAM_COLORS
+ self.colors = []
+ for i in range(1, len(colorPoints)):
+ for p in range(0, 200):
+ self.colors.append(self._colorBetween(colorPoints[i - 1], colorPoints[i], p / 200))
+
+ def getColorData(self, progressCallback = None):
+ """
+ Map spectrogram data to pixel colors
+ """
+ pixels = [self.colors[0]] * (self.width * self.height)
+ for x in range(0, self.width):
+ for y in range(0, self.height):
+ idx = x + self.width * y
+ amplitudeVal = self.columnData[x][self.height - y - 1]
+ colorIdx = int(len(self.colors) * amplitudeVal)
+ colorIdx = colorIdx if colorIdx > 0 else 0
+ colorIdx = colorIdx if colorIdx < len(self.colors) else len(self.colors) - 1
+ pixels[idx] = self.colors[colorIdx]
+ if progressCallback:
+ progressCallback(100 * x / self.width)
+ return pixels
+
+def drawSpectrogram(audioFileName, imageFileName, fftSize = 1024, fftOverlap = 0, progressCallback = None):
+ """
+ Draw a spectrogram of the audio file
+ """
+
+ # Fraction of total work for each step
+ STEP_PERCENTAGE_FFT = 40
+ STEP_PERCENTAGE_NORMALIZE = 5
+ STEP_PERCENTAGE_ACCUMULATE = 10
+ STEP_PERCENTAGE_DRAW = 40
+ # Give last 5% to saving the file
+
+ PERCENTAGE_REPORT_STEP = 2
+
+ nextReportedPercentage = PERCENTAGE_REPORT_STEP
+ def wrapProgressCallback(percentage):
+ nonlocal nextReportedPercentage
+ percentage = int(percentage)
+ if percentage >= nextReportedPercentage:
+ if progressCallback:
+ progressCallback(percentage)
+ nextReportedPercentage = (1 + percentage // PERCENTAGE_REPORT_STEP) * PERCENTAGE_REPORT_STEP
+
+ def mapColorsProgressCallback(percentage):
+ wrapProgressCallback(STEP_PERCENTAGE_FFT + STEP_PERCENTAGE_NORMALIZE + STEP_PERCENTAGE_ACCUMULATE
+ + (STEP_PERCENTAGE_DRAW * (percentage / 100)))
+
+ imageWidthLookup = SPECTROGRAM_WIDTH_PERSECOND
+ imageHeight = SPECTROGRAM_HEIGHT
+
+ # Load audio file and compute FFT amplitudes
+ fftBlocksSource = AudioBlocksFFT(audioFileName,
+ fftSize, overlap = fftOverlap,
+ minFreq = SPECTROGRAM_MIN_FREQUENCY, maxFreq = SPECTROGRAM_MAX_FREQUENCY,
+ numBins = imageHeight)
+ soundLength = fftBlocksSource.totalSeconds()
+ fftAmplitudeBlocks = []
+ for fftAmplitude, positionSeconds in fftBlocksSource:
+ fftAmplitudeBlocks.append(fftAmplitude)
+ wrapProgressCallback(STEP_PERCENTAGE_FFT * (positionSeconds / soundLength))
+
+ totalProgress = STEP_PERCENTAGE_FFT
+
+ # Normalize FFT amplitude and convert to log scale
+ specRange = SPECTROGRAM_DB_RANGE
+ for i in range(0, len(fftAmplitudeBlocks)):
+ normalized = numpy.divide(fftAmplitudeBlocks[i], fftBlocksSource.peakFFTAmplitude())
+ fftAmplitudeBlocks[i] = ((20*(numpy.log10(normalized + 1e-60))).clip(-specRange, 0.0) + specRange)/specRange
+ wrapProgressCallback(totalProgress + STEP_PERCENTAGE_NORMALIZE * (i / len(fftAmplitudeBlocks)))
+
+ totalProgress = totalProgress + STEP_PERCENTAGE_NORMALIZE
+
+ # Compute spectrogram width in pixels
+ imageWidthPerSecond, lengthRage = imageWidthLookup[-1]
+ for widthPerSecond, lengthLimit in imageWidthLookup:
+ limitLow, limitHigh = lengthLimit
+ if soundLength > limitLow and soundLength <= limitHigh:
+ imageWidthPerSecond = widthPerSecond
+ break
+ imageWidth = int(imageWidthPerSecond * soundLength)
+
+ # Compute spectrogram values
+ columnValues = numpy.zeros(imageHeight)
+ spectrogram = []
+ x = 0
+ for idx in range(0, len(fftAmplitudeBlocks)):
+ newX = (idx * imageWidth) // len(fftAmplitudeBlocks)
+ if newX != x:
+ # Save column
+ spectrogram.append(numpy.copy(columnValues))
+ x = newX
+ columnValues.fill(0)
+ columnValues = numpy.maximum(columnValues, fftAmplitudeBlocks[idx])
+ wrapProgressCallback(totalProgress + STEP_PERCENTAGE_ACCUMULATE * (idx / len(fftAmplitudeBlocks)))
+ spectrogram.append(numpy.copy(columnValues))
+
+ totalProgress = totalProgress + STEP_PERCENTAGE_ACCUMULATE
+
+ # Draw spectrogram
+ imageWidth = len(spectrogram)
+ colorData = SpectrogramColorMap(spectrogram).getColorData(progressCallback = mapColorsProgressCallback)
+
+ totalProgress = totalProgress + STEP_PERCENTAGE_DRAW
+
+ # Save final image
+ image = Image.new('RGB', (imageWidth, imageHeight))
+ image.putdata(colorData)
+ image.save(imageFileName)
+
+ if progressCallback:
+ progressCallback(100)
+
+
+if __name__ == "__main__":
+
+ import sys
+
+ def printProgress(p):
+ sys.stdout.write("\rProgress : {}%".format(p))
+ sys.stdout.flush()
+
+ if not (len(sys.argv) == 2 or len(sys.argv) == 3):
+ print("Usage:\n{0} input_file [output_file]".format(sys.argv[0]))
+ exit()
+
+ audioFile = sys.argv[1]
+
+ if 3 == len(sys.argv):
+ outputFile = sys.argv[2]
+ else:
+ outputFile = 'spectrogram.png'
+
+ sys.stdout.write("Input : {0}\nOutput : {1}\n".format(audioFile, outputFile))
+ drawSpectrogram(audioFile, outputFile, progressCallback = printProgress)
+ sys.stdout.write("\nDone!\n")
diff --git a/mediagoblin/media_types/audio/spectrogram.py b/mediagoblin/media_types/audio/spectrogram.py
deleted file mode 100644
index 433bb300..00000000
--- a/mediagoblin/media_types/audio/spectrogram.py
+++ /dev/null
@@ -1,362 +0,0 @@
-# processing.py -- various audio processing functions
-# Copyright (C) 2008 MUSIC TECHNOLOGY GROUP (MTG)
-# UNIVERSITAT POMPEU FABRA
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# Authors:
-# Bram de Jong <bram.dejong at domain.com where domain in gmail>
-# 2012, Joar Wandborg <first name at last name dot se>
-
-from __future__ import print_function
-
-try:
- from PIL import Image
-except ImportError:
- import Image
-import math
-import numpy
-
-try:
- import scikits.audiolab as audiolab
-except ImportError:
- print("WARNING: audiolab is not installed so wav2png will not work")
-
-
-class AudioProcessingException(Exception):
- pass
-
-
-class SpectrogramImage(object):
- def __init__(self, image_size, fft_size):
- self.image_width, self.image_height = image_size
- self.fft_size = fft_size
-
- colors = [
- (0, 0, 0, 0),
- (58 / 4, 68 / 4, 65 / 4, 255),
- (80 / 2, 100 / 2, 153 / 2, 255),
- (90, 180, 100, 255),
- (224, 224, 44, 255),
- (255, 60, 30, 255),
- (255, 255, 255, 255)
- ]
-
- self.palette = interpolate_colors(colors)
-
- # Generate lookup table for y-coordinate from fft-bin
- self.y_to_bin = []
-
- fft_min = 100.0
- fft_max = 22050.0 # kHz?
-
- y_min = math.log10(fft_min)
- y_max = math.log10(fft_max)
-
- for y in range(self.image_height):
- freq = math.pow(
- 10.0,
- y_min + y / (self.image_height - 1.0)
- * (y_max - y_min))
-
- fft_bin = freq / fft_max * (self.fft_size / 2 + 1)
-
- if fft_bin < self.fft_size / 2:
- alpha = fft_bin - int(fft_bin)
-
- self.y_to_bin.append((int(fft_bin), alpha * 255))
-
- # this is a bit strange, but using image.load()[x,y] = ... is
- # a lot slower than using image.putadata and then rotating the image
- # so we store all the pixels in an array and then create the image when saving
- self.pixels = []
-
- def draw_spectrum(self, x, spectrum):
- # for all frequencies, draw the pixels
- for index, alpha in self.y_to_bin:
- self.pixels.append(
- self.palette[int((255.0 - alpha) * spectrum[index]
- + alpha * spectrum[index + 1])])
-
- # if the FFT is too small to fill up the image, fill with black to the top
- for y in range(len(self.y_to_bin), self.image_height):
- self.pixels.append(self.palette[0])
-
- def save(self, filename, quality=90):
- self.image = Image.new(
- 'RGBA',
- (self.image_height, self.image_width))
-
- self.image.putdata(self.pixels)
- self.image.transpose(Image.ROTATE_90).save(
- filename,
- quality=quality)
-
-
-class AudioProcessor(object):
- """
- The audio processor processes chunks of audio an calculates the spectrac centroid and the peak
- samples in that chunk of audio.
- """
- def __init__(self, input_filename, fft_size, window_function=numpy.hanning):
- max_level = get_max_level(input_filename)
-
- self.audio_file = audiolab.Sndfile(input_filename, 'r')
- self.fft_size = fft_size
- self.window = window_function(self.fft_size)
- self.spectrum_range = None
- self.lower = 100
- self.higher = 22050
- self.lower_log = math.log10(self.lower)
- self.higher_log = math.log10(self.higher)
- self.clip = lambda val, low, high: min(high, max(low, val))
-
- # figure out what the maximum value is for an FFT doing the FFT of a DC signal
- fft = numpy.fft.rfft(numpy.ones(fft_size) * self.window)
- max_fft = (numpy.abs(fft)).max()
-
- # set the scale to normalized audio and normalized FFT
- self.scale = 1.0 / max_level / max_fft if max_level > 0 else 1
-
- def read(self, start, size, resize_if_less=False):
- """ read size samples starting at start, if resize_if_less is True and less than size
- samples are read, resize the array to size and fill with zeros """
-
- # number of zeros to add to start and end of the buffer
- add_to_start = 0
- add_to_end = 0
-
- if start < 0:
- # the first FFT window starts centered around zero
- if size + start <= 0:
- return numpy.zeros(size) if resize_if_less else numpy.array([])
- else:
- self.audio_file.seek(0)
-
- add_to_start = - start # remember: start is negative!
- to_read = size + start
-
- if to_read > self.audio_file.nframes:
- add_to_end = to_read - self.audio_file.nframes
- to_read = self.audio_file.nframes
- else:
- self.audio_file.seek(start)
-
- to_read = size
- if start + to_read >= self.audio_file.nframes:
- to_read = self.audio_file.nframes - start
- add_to_end = size - to_read
-
- try:
- samples = self.audio_file.read_frames(to_read)
- except RuntimeError:
- # this can happen for wave files with broken headers...
- return numpy.zeros(size) if resize_if_less else numpy.zeros(2)
-
- # convert to mono by selecting left channel only
- if self.audio_file.channels > 1:
- samples = samples[:,0]
-
- if resize_if_less and (add_to_start > 0 or add_to_end > 0):
- if add_to_start > 0:
- samples = numpy.concatenate((numpy.zeros(add_to_start), samples), axis=1)
-
- if add_to_end > 0:
- samples = numpy.resize(samples, size)
- samples[size - add_to_end:] = 0
-
- return samples
-
- def spectral_centroid(self, seek_point, spec_range=110.0):
- """ starting at seek_point read fft_size samples, and calculate the spectral centroid """
-
- samples = self.read(seek_point - self.fft_size/2, self.fft_size, True)
-
- samples *= self.window
- fft = numpy.fft.rfft(samples)
- spectrum = self.scale * numpy.abs(fft) # normalized abs(FFT) between 0 and 1
-
- length = numpy.float64(spectrum.shape[0])
-
- # scale the db spectrum from [- spec_range db ... 0 db] > [0..1]
- db_spectrum = ((20*(numpy.log10(spectrum + 1e-60))).clip(-spec_range, 0.0) + spec_range)/spec_range
-
- energy = spectrum.sum()
- spectral_centroid = 0
-
- if energy > 1e-60:
- # calculate the spectral centroid
-
- if self.spectrum_range == None:
- self.spectrum_range = numpy.arange(length)
-
- spectral_centroid = (spectrum * self.spectrum_range).sum() / (energy * (length - 1)) * self.audio_file.samplerate * 0.5
-
- # clip > log10 > scale between 0 and 1
- spectral_centroid = (math.log10(self.clip(spectral_centroid, self.lower, self.higher)) - self.lower_log) / (self.higher_log - self.lower_log)
-
- return (spectral_centroid, db_spectrum)
-
-
- def peaks(self, start_seek, end_seek):
- """ read all samples between start_seek and end_seek, then find the minimum and maximum peak
- in that range. Returns that pair in the order they were found. So if min was found first,
- it returns (min, max) else the other way around. """
-
- # larger blocksizes are faster but take more mem...
- # Aha, Watson, a clue, a tradeof!
- block_size = 4096
-
- max_index = -1
- max_value = -1
- min_index = -1
- min_value = 1
-
- if start_seek < 0:
- start_seek = 0
-
- if end_seek > self.audio_file.nframes:
- end_seek = self.audio_file.nframes
-
- if end_seek <= start_seek:
- samples = self.read(start_seek, 1)
- return (samples[0], samples[0])
-
- if block_size > end_seek - start_seek:
- block_size = end_seek - start_seek
-
- for i in range(start_seek, end_seek, block_size):
- samples = self.read(i, block_size)
-
- local_max_index = numpy.argmax(samples)
- local_max_value = samples[local_max_index]
-
- if local_max_value > max_value:
- max_value = local_max_value
- max_index = local_max_index
-
- local_min_index = numpy.argmin(samples)
- local_min_value = samples[local_min_index]
-
- if local_min_value < min_value:
- min_value = local_min_value
- min_index = local_min_index
-
- return (min_value, max_value) if min_index < max_index else (max_value, min_value)
-
-
-def create_spectrogram_image(source_filename, output_filename,
- image_size, fft_size, progress_callback=None):
-
- processor = AudioProcessor(source_filename, fft_size, numpy.hamming)
- samples_per_pixel = processor.audio_file.nframes / float(image_size[0])
-
- spectrogram = SpectrogramImage(image_size, fft_size)
-
- for x in range(image_size[0]):
- if progress_callback and x % (image_size[0] / 10) == 0:
- progress_callback((x * 100) / image_size[0])
-
- seek_point = int(x * samples_per_pixel)
- next_seek_point = int((x + 1) * samples_per_pixel)
-
- (spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point)
-
- spectrogram.draw_spectrum(x, db_spectrum)
-
- if progress_callback:
- progress_callback(100)
-
- spectrogram.save(output_filename)
-
-
-def interpolate_colors(colors, flat=False, num_colors=256):
-
- palette = []
-
- for i in range(num_colors):
- # TODO: What does this do?
- index = (
- (i *
- (len(colors) - 1) # 7
- ) # 0..7..14..21..28...
- /
- (num_colors - 1.0) # 255.0
- )
-
- # TODO: What is the meaning of 'alpha' in this context?
- alpha = index - round(index)
-
- channels = list('rgb')
- values = dict()
-
- for k, v in zip(range(len(channels)), channels):
- if alpha > 0:
- values[v] = (
- (1.0 - alpha)
- *
- colors[int(index)][k]
- +
- alpha * colors[int(index) + 1][k]
- )
- else:
- values[v] = (
- (1.0 - alpha)
- *
- colors[int(index)][k]
- )
-
- if flat:
- palette.extend(
- tuple(int(values[i]) for i in channels))
- else:
- palette.append(
- tuple(int(values[i]) for i in channels))
-
- return palette
-
-
-def get_max_level(filename):
- max_value = 0
- buffer_size = 4096
- audio_file = audiolab.Sndfile(filename, 'r')
- n_samples_left = audio_file.nframes
-
- while n_samples_left:
- to_read = min(buffer_size, n_samples_left)
-
- try:
- samples = audio_file.read_frames(to_read)
- except RuntimeError:
- # this can happen with a broken header
- break
-
- # convert to mono by selecting left channel only
- if audio_file.channels > 1:
- samples = samples[:,0]
-
- max_value = max(max_value, numpy.abs(samples).max())
-
- n_samples_left -= to_read
-
- audio_file.close()
-
- return max_value
-
-if __name__ == '__main__':
- import sys
- sys.argv[4] = int(sys.argv[4])
- sys.argv[3] = tuple([int(i) for i in sys.argv[3].split('x')])
-
- create_spectrogram_image(*sys.argv[1:])
diff --git a/mediagoblin/media_types/audio/transcoders.py b/mediagoblin/media_types/audio/transcoders.py
index a67f4429..11ecf163 100644
--- a/mediagoblin/media_types/audio/transcoders.py
+++ b/mediagoblin/media_types/audio/transcoders.py
@@ -43,45 +43,15 @@ gi.require_version('Gst', '1.0')
from gi.repository import GObject, Gst
Gst.init(None)
-
-# TODO: Now unused - remove.
-class Python2AudioThumbnailer(object):
+class Python3AudioThumbnailer(object):
def __init__(self):
_log.info('Initializing {0}'.format(self.__class__.__name__))
def spectrogram(self, src, dst, **kw):
- import numpy
- # This third-party bundled module is Python 2-only.
- from mediagoblin.media_types.audio import audioprocessing
-
- width = kw['width']
- height = int(kw.get('height', float(width) * 0.3))
- fft_size = kw.get('fft_size', 2048)
+ from mediagoblin.media_types.audio import audiotospectrogram
+ fft_size = kw.get('fft_size', 1024)
callback = kw.get('progress_callback')
- processor = audioprocessing.AudioProcessor(
- src,
- fft_size,
- numpy.hanning)
-
- samples_per_pixel = processor.audio_file.nframes / float(width)
-
- spectrogram = audioprocessing.SpectrogramImage(width, height, fft_size)
-
- for x in range(width):
- if callback and x % (width / 10) == 0:
- callback((x * 100) / width)
-
- seek_point = int(x * samples_per_pixel)
-
- (spectral_centroid, db_spectrum) = processor.spectral_centroid(
- seek_point)
-
- spectrogram.draw_spectrum(x, db_spectrum)
-
- if callback:
- callback(100)
-
- spectrogram.save(dst)
+ audiotospectrogram.drawSpectrogram(src, dst, fftSize = fft_size, progressCallback = callback)
def thumbnail_spectrogram(self, src, dst, thumb_size):
'''
@@ -111,31 +81,7 @@ class Python2AudioThumbnailer(object):
th.save(dst)
-
-class DummyAudioThumbnailer(Python2AudioThumbnailer):
- """A thumbnailer that just outputs a stock image.
-
- The Python package used for audio spectrograms, "scikits.audiolab", does not
- support Python 3 and is a constant source of problems for people installing
- MediaGoblin. Until the feature is rewritten, this thumbnailer class simply
- provides a generic image.
-
- TODO: Consider Python 3 compatible interfaces to libsndfile, such as
- https://pypi.python.org/pypi/PySoundFile/0.9.0.post1 as discussed here
- https://issues.mediagoblin.org/ticket/5467#comment:6
-
- """
- def spectrogram(self, src, dst, **kw):
- # Using PIL here in case someone wants to swap out the image for a PNG.
- # This will convert to JPEG, where simply copying the file won't.
- img = Image.open('mediagoblin/static/images/media_thumbs/video.jpg')
- img.save(dst)
-
-
-# Due to recurring problems with spectrograms under Python 2, and the fact we're
-# soon dropping Python 2 support, we're disabling spectrogram thumbnails. See #5594.
-AudioThumbnailer = DummyAudioThumbnailer
-
+AudioThumbnailer = Python3AudioThumbnailer
class AudioTranscoder(object):
def __init__(self):