# Audacity 1.1.0 TimeStretch Patch # # By Doug Hoyte # # Some code (smsPitchScale.cpp) is # COPYRIGHT 1999 Stephan M. Sprenger # # # This patch adds an effect that can "Stretch" the time of a selected # sample. It makes it longer or shorter depending on a ratio given. # # Also included is the experimental "Pitch Scaling" that will keep a # sample at the same pitch, even after being stretched. # # # Limitations: # - The GUI sucks. Ideally, I'd like it to be one window that has a slide # bar and a checkbox. Also, it would show you the length of the selected # sample (in seconds) and you could choose the desired length in seconds # or via the ratio. Unfortunatley, my wxWindows skills suck... # - After being stretched, the current selection should be the new audio, # not where the old audio was. This isn't a problem with other effects # because they don't change length. I wasn't sure how to change the # selection boundaries, so I just left it. # - The Pitch Scale code is SLOOOW. # - Sometimes the Pitch Scale code leaves a tiny bit of garbage at the # beginning of a clip. # - The Pitch Scale code degrades the quality of the sample somewhat. # # # Apply like so: # # cd audacity-1.1.0/ # patch -p1 < /tmp/audacity-src-1.1.0-timestretch.patch # # Replace /tmp/ with wherever you put the patch, obviously. # configure, make, and enjoy. # # # diff -urNb audacity-src-1.1.0/src/Makefile.in audacity-src-1.1.0-timestretch/src/Makefile.in --- audacity-src-1.1.0/src/Makefile.in Wed Jun 5 00:45:54 2002 +++ audacity-src-1.1.0-timestretch/src/Makefile.in Sat Aug 10 01:16:52 2002 @@ -77,6 +77,8 @@ $(OBJDIR)/effects/NoiseRemoval.o \ $(OBJDIR)/effects/Phaser.o \ $(OBJDIR)/effects/Reverse.o \ + $(OBJDIR)/effects/smsPitchScale.o \ + $(OBJDIR)/effects/TimeStretch.o \ $(OBJDIR)/effects/Wahwah.o \ $(OBJDIR)/export/Export.o \ $(OBJDIR)/export/ExportMP3.o \ diff -urNb audacity-src-1.1.0/src/effects/LoadEffects.cpp audacity-src-1.1.0-timestretch/src/effects/LoadEffects.cpp --- audacity-src-1.1.0/src/effects/LoadEffects.cpp Wed Jun 5 00:45:54 2002 +++ audacity-src-1.1.0-timestretch/src/effects/LoadEffects.cpp Sat Aug 10 01:17:56 2002 @@ -22,6 +22,7 @@ #include "NoiseRemoval.h" #include "Phaser.h" #include "Reverse.h" +#include "TimeStretch.h" #include "Wahwah.h" #ifdef USE_WAVELET @@ -53,6 +54,7 @@ Effect::RegisterEffect(new EffectNoiseRemoval(), false); Effect::RegisterEffect(new EffectPhaser(), false); Effect::RegisterEffect(new EffectReverse(), false); + Effect::RegisterEffect(new EffectTimeStretch(), false); Effect::RegisterEffect(new EffectWahwah(), false); #ifdef USE_WAVELET diff -urNb audacity-src-1.1.0/src/effects/TimeStretch.cpp audacity-src-1.1.0-timestretch/src/effects/TimeStretch.cpp --- audacity-src-1.1.0/src/effects/TimeStretch.cpp Wed Dec 31 16:00:00 1969 +++ audacity-src-1.1.0-timestretch/src/effects/TimeStretch.cpp Sat Aug 10 01:17:12 2002 @@ -0,0 +1,137 @@ +/********************************************************************** + + Audacity: A Digital Audio Editor + + TimeStretch.cpp + + Doug Hoyte + + This class is able to stretch a selection, making it take up more or + less time. + +**********************************************************************/ + + +#include +#include + +#include "TimeStretch.h" +#include "smsPitchScale.h" +#include "../WaveTrack.h" + +// +// EffectTimeStretch +// + +EffectTimeStretch::EffectTimeStretch() +{ + ratio = 1.0; + pitchscale = false; +} + + +bool EffectTimeStretch::PromptUser() +{ + wxString temp; + wxString title = _("TimeStretch by Doug Hoyte"); + wxString caption = _("Ratio (new length/orig length): "); + wxString default_value = wxString::Format("%f", ratio); + + ratio = 1.0; + pitchscale = false; + + temp = wxGetTextFromUser(caption, title, + default_value, mParent, -1, -1, TRUE); + if (temp == "") + return false; + while (sscanf((const char *) temp, "%f", &ratio) < 0) { + caption = _("Please enter a positive number for the ratio: "); + temp = wxGetTextFromUser(caption, title, + default_value, mParent, -1, -1, TRUE); + if (temp == "") + return false; + } + + caption = _("Do you want pitch scaling (EXPERIMENTAL)? (y/n): "); + default_value = wxString::Format("n"); + temp = wxGetTextFromUser(caption, title, + default_value, mParent, -1, -1, TRUE); + if (temp == "") + return false; + while (temp != "n" && temp != "y") { + caption = _("Please enter y or n for the pitch scaling: "); + temp = wxGetTextFromUser(caption, title, + default_value, mParent, -1, -1, TRUE); + if (temp == "") + return false; + } + + if (temp == "y") pitchscale = true; + else pitchscale = false; + + + return true; +} + + + +bool EffectTimeStretch::Process() +{ + TrackListIterator iter(mWaveTracks); + VTrack *t = iter.First(); + int count = 0; + while(t) { + sampleCount start, len; + GetSamples((WaveTrack *)t, &start, &len); + bool success = ProcessOne(count, (WaveTrack *)t, start, len); + + if (!success) + return false; + + t = iter.Next(); + count++; + } + + return true; +} + + + +bool EffectTimeStretch::ProcessOne(int count, WaveTrack *t, + sampleCount base, sampleCount len) +{ + int newlen=(int) (len*ratio) + 1; + double basesecs=0; + float *inbuf = new float[len]; + float *outbuf = new float[newlen]; + int i,nextloc,currloc=0; + + + if (ratio == 1) goto bail; + + basesecs = base / (t->GetRate()); + + t->Get(inbuf, base, len); + + for (i=0; i 1) t->InsertSilence(basesecs, (newlen-len)/(t->GetRate()) ); + if (ratio < 1) t->Clear(basesecs, basesecs+((len-newlen) / (t->GetRate())) ); + + + if (pitchscale == true) smsPitchScale(ratio, newlen, 2048, 4, t->GetRate(), outbuf, outbuf); + + t->Set(outbuf, base, newlen); + + bail: + + delete[] inbuf; + delete[] outbuf; + + return true; +} diff -urNb audacity-src-1.1.0/src/effects/TimeStretch.h audacity-src-1.1.0-timestretch/src/effects/TimeStretch.h --- audacity-src-1.1.0/src/effects/TimeStretch.h Wed Dec 31 16:00:00 1969 +++ audacity-src-1.1.0-timestretch/src/effects/TimeStretch.h Sat Aug 10 01:17:12 2002 @@ -0,0 +1,62 @@ +/********************************************************************** + + Audacity: A Digital Audio Editor + + TimeStretch.h + + Doug Hoyte + + This class is able to stretch a selection, making it take up more or + less time. + +**********************************************************************/ + +#ifndef __AUDACITY_EFFECT_TIMESTRETCH__ +#define __AUDACITY_EFFECT_TIMESTRETCH__ + +class wxString; + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Effect.h" +#include "smsPitchScale.h" + + + +#define __UNINITIALIZED__ (-1) + +class WaveTrack; + +class EffectTimeStretch:public Effect { + + public: + EffectTimeStretch(); + + virtual wxString GetEffectName() { + return wxString(_("TimeStretch")); + } + + virtual wxString GetEffectAction() { + return wxString(_("Stretching Time")); + } + + virtual bool PromptUser(); + + virtual bool Process(); + + private: + bool ProcessOne(int count, WaveTrack * t, sampleCount start, sampleCount len); + + + float ratio; + bool pitchscale; +}; + +#endif diff -urNb audacity-src-1.1.0/src/effects/smsPitchScale.cpp audacity-src-1.1.0-timestretch/src/effects/smsPitchScale.cpp --- audacity-src-1.1.0/src/effects/smsPitchScale.cpp Wed Dec 31 16:00:00 1969 +++ audacity-src-1.1.0-timestretch/src/effects/smsPitchScale.cpp Sat Aug 10 01:17:18 2002 @@ -0,0 +1,280 @@ +/**************************************************************************** +* +* NAME: smsPitchScale.cpp +* VERSION: 1.01 +* HOME URL: http://www.dspdimension.com +* KNOWN BUGS: none +* +* SYNOPSIS: Routine for doing pitch scaling while maintaining +* duration using the Short Time Fourier Transform. +* +* DESCRIPTION: The routine takes a pitchScale factor value which is between 0.5 +* (one octave down) and 2. (one octave up). A value of exactly 1 does not change +* the pitch. numSampsToProcess tells the routine how many samples in indata[0... +* numSampsToProcess-1] should be pitch scaled and moved to outdata[0 ... +* numSampsToProcess-1]. The two buffers can be identical (ie. it can process the +* data in-place). fftFrameSize defines the FFT frame size used for the +* processing. Typical values are 1024, 2048 and 4096. It may be any value <= +* MAX_FFT_FRAME_LENGTH but it MUST be a power of 2. osamp is the STFT +* oversampling factor which also determines the overlap between adjacent STFT +* frames. It should at least be 4 for moderate scaling ratios. A value of 32 is +* recommended for best quality. sampleRate takes the sample rate for the signal +* in unit Hz, ie. 44100 for 44.1 kHz audio. The data passed to the routine in +* indata[] should be in the range [-1.0, 1.0), which is also the output range +* for the data. +* +* COPYRIGHT 1999 Stephan M. Sprenger +* +* The Wide Open License (WOL) +* +* Permission to use, copy, modify, distribute and sell this software and its +* documentation for any purpose is hereby granted without fee, provided that +* the above copyright notice and this license appear in all source copies. +* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF +* ANY KIND. See http://www.dspguru.com/wol.htm for more information. +* +*****************************************************************************/ + +#include +#include + +#include "smsPitchScale.h" + + +void smsPitchScale(float pitchScale, long numSampsToProcess, long fftFrameSize, long osamp, float sampleRate, float *indata, float *outdata) +/* + Routine smsPitchScale(). See top of file for explanation + Purpose: doing pitch scaling while maintaining duration using the Short + Time Fourier Transform. + Author: (c)1999 Stephan M. Sprenger +*/ +{ + + static float gInFIFO[MAX_FRAME_LENGTH]; + static float gOutFIFO[MAX_FRAME_LENGTH]; + static float gFFTworksp[2*MAX_FRAME_LENGTH]; + static float gLastPhase[MAX_FRAME_LENGTH/2]; + static float gSumPhase[MAX_FRAME_LENGTH/2]; + static float gOutputAccum[2*MAX_FRAME_LENGTH]; + static float gAnaFreq[MAX_FRAME_LENGTH]; + static float gAnaMagn[MAX_FRAME_LENGTH]; + static float gSynFreq[MAX_FRAME_LENGTH]; + static float gSynMagn[MAX_FRAME_LENGTH]; + static long gRover = false, gInit = false; + double magn, phase, tmp, window, real, imag; + double freqPerBin, expct, fadeZoneLen; + long i,k, qpd, index, inFifoLatency, stepSize, fftFrameSize2; + + /* set up some handy variables */ + fadeZoneLen = fftFrameSize/2; + fftFrameSize2 = fftFrameSize/2; + stepSize = fftFrameSize/osamp; + freqPerBin = sampleRate/(double)fftFrameSize; + expct = 2.*M_PI*(double)stepSize/(double)fftFrameSize; + inFifoLatency = fftFrameSize-stepSize; + if (gRover == false) gRover = inFifoLatency; + + /* initialize our static arrays */ + if (gInit == false) { + memset(gInFIFO, 0, MAX_FRAME_LENGTH*sizeof(float)); + memset(gOutFIFO, 0, MAX_FRAME_LENGTH*sizeof(float)); + memset(gFFTworksp, 0, 2*MAX_FRAME_LENGTH*sizeof(float)); + memset(gLastPhase, 0, MAX_FRAME_LENGTH*sizeof(float)/2); + memset(gSumPhase, 0, MAX_FRAME_LENGTH*sizeof(float)/2); + memset(gOutputAccum, 0, 2*MAX_FRAME_LENGTH*sizeof(float)); + memset(gAnaFreq, 0, MAX_FRAME_LENGTH*sizeof(float)); + memset(gAnaMagn, 0, MAX_FRAME_LENGTH*sizeof(float)); + gInit = true; + } + + /* main processing loop */ + for (i = 0; i < numSampsToProcess; i++){ + + /* As long as we have not yet collected enough data just read in */ + gInFIFO[gRover] = indata[i]; + outdata[i] = gOutFIFO[gRover-inFifoLatency]; + gRover++; + + /* now we have enough data for processing */ + if (gRover >= fftFrameSize) { + gRover = inFifoLatency; + + /* do windowing and re,im interleave */ + for (k = 0; k < fftFrameSize;k++) { + window = -.5*cos(2.*M_PI*(double)k/(double)fftFrameSize)+.5; + gFFTworksp[2*k] = gInFIFO[k] * window; + gFFTworksp[2*k+1] = 0.; + } + + + /* ***************** ANALYSIS ******************* */ + /* do transform */ + smsFft(gFFTworksp, fftFrameSize, -1); + + /* this is the analysis step */ + for (k = 0; k <= fftFrameSize2; k++) { + + /* de-interlace FFT buffer */ + real = gFFTworksp[2*k]; + imag = gFFTworksp[2*k+1]; + + /* compute magnitude and phase */ + magn = 2.*sqrt(real*real + imag*imag); + phase = atan2(imag,real); + + /* compute phase difference */ + tmp = phase - gLastPhase[k]; + gLastPhase[k] = phase; + + /* subtract expected phase difference */ + tmp -= (double)k*expct; + + /* map delta phase into +/- Pi interval */ + qpd = (long int) (tmp/M_PI); + if (qpd >= 0) qpd += qpd&1; + else qpd -= qpd&1; + tmp -= M_PI*(double)qpd; + + /* get deviation from bin frequency from the +/- Pi interval */ + tmp = osamp*tmp/(2.*M_PI); + + /* compute the k-th partials' true frequency */ + tmp = (double)k*freqPerBin + tmp*freqPerBin; + + /* store magnitude and true frequency in analysis arrays */ + gAnaMagn[k] = magn; + gAnaFreq[k] = tmp; + + } + + + + /* ***************** PROCESSING ******************* */ + /* this does the actual pitch scaling */ + memset(gSynMagn, 0, fftFrameSize*sizeof(float)); + memset(gSynFreq, 0, fftFrameSize*sizeof(float)); + for (k = 0; k <= fftFrameSize2; k++) { + index = (long int) (k/pitchScale); + if (index <= fftFrameSize2) { + /* new bin overrides existing if magnitude is higher */ + if (gAnaMagn[index] > gSynMagn[k]) { + gSynMagn[k] = gAnaMagn[index]; + gSynFreq[k] = gAnaFreq[index] * pitchScale; + } + /* fill empty bins with nearest neighbour */ + if ((gSynFreq[k] == 0.) && (k > 0)) { + gSynFreq[k] = gSynFreq[k-1]; + gSynMagn[k] = gSynMagn[k-1]; + } + } + } + + + /* ***************** SYNTHESIS ******************* */ + /* this is the synthesis step */ + for (k = 0; k <= fftFrameSize2; k++) { + + /* get magnitude and true frequency from synthesis arrays */ + magn = gSynMagn[k]; + tmp = gSynFreq[k]; + + /* subtract bin mid frequency */ + tmp -= (double)k*freqPerBin; + + /* get bin deviation from freq deviation */ + tmp /= freqPerBin; + + /* take osamp into account */ + tmp = 2.*M_PI*tmp/osamp; + + /* add the overlap phase advance back in */ + tmp += (double)k*expct; + + /* accumulate delta phase to get bin phase */ + gSumPhase[k] += tmp; + phase = gSumPhase[k]; + + /* get real and imag part and re-interleave */ + gFFTworksp[2*k] = magn*cos(phase); + gFFTworksp[2*k+1] = magn*sin(phase); + } + + /* zero negative frequencies */ + for (k = fftFrameSize+2; k < 2*fftFrameSize; k++) gFFTworksp[k] = 0.; + + /* do inverse transform */ + smsFft(gFFTworksp, fftFrameSize, 1); + + /* do windowing and add to output accumulator */ + for(k=0; k < fftFrameSize; k++) { + window = -.5*cos(2.*M_PI*(double)k/(double)fftFrameSize)+.5; + gOutputAccum[k] += 2.*window*gFFTworksp[2*k]/(fftFrameSize2*osamp); + } + for (k = 0; k < stepSize; k++) gOutFIFO[k] = gOutputAccum[k]; + + /* shift accumulator */ + memmove(gOutputAccum, gOutputAccum+stepSize, fftFrameSize*sizeof(float)); + + /* move input FIFO */ + for (k = 0; k < inFifoLatency; k++) gInFIFO[k] = gInFIFO[k+stepSize]; + } + } +} + + + +void smsFft(float *fftBuffer, long fftFrameSize, long sign) +/* + FFT routine, (C)1996 S.M.Sprenger. Sign = -1 is FFT, 1 is iFFT (inverse) + Fills fftBuffer[0...2*fftFrameSize-1] with the Fourier transform of the + time domain data in fftBuffer[0...2*fftFrameSize-1]. The FFT array takes + and returns the cosine and sine parts in an interleaved manner, ie. + fftBuffer[0] = cosPart[0], fftBuffer[1] = sinPart[0], asf. fftFrameSize + must be a power of 2. It expects a complex input signal (see footnote 2), + ie. when working with 'common' audio signals our input signal has to be + passed as {in[0],0.,in[1],0.,in[2],0.,...} asf. In that case, the transform + of the frequencies of interest is in fftBuffer[0...fftFrameSize]. +*/ +{ + float wr, wi, arg, *p1, *p2, temp; + float tr, ti, ur, ui, *p1r, *p1i, *p2r, *p2i; + long i, bitm, j, le, le2, k; + + for (i = 2; i < 2*fftFrameSize-2; i += 2) { + for (bitm = 2, j = 0; bitm < 2*fftFrameSize; bitm <<= 1) { + if (i & bitm) j++; + j <<= 1; + } + if (i < j) { + p1 = fftBuffer+i; p2 = fftBuffer+j; + temp = *p1; *(p1++) = *p2; + *(p2++) = temp; temp = *p1; + *p1 = *p2; *p2 = temp; + } + } + for (k = 0, le = 2; k < log(fftFrameSize)/log(2.); k++) { + le <<= 1; + le2 = le>>1; + ur = 1.0; + ui = 0.0; + arg = M_PI / (le2>>1); + wr = cos(arg); + wi = sign*sin(arg); + for (j = 0; j < le2; j += 2) { + p1r = fftBuffer+j; p1i = p1r+1; + p2r = p1r+le2; p2i = p2r+1; + for (i = j; i < 2*fftFrameSize; i += le) { + tr = *p2r * ur - *p2i * ui; + ti = *p2r * ui + *p2i * ur; + *p2r = *p1r - tr; *p2i = *p1i - ti; + *p1r += tr; *p1i += ti; + p1r += le; p1i += le; + p2r += le; p2i += le; + } + tr = ur*wr - ui*wi; + ui = ur*wi + ui*wr; + ur = tr; + } + } +} + diff -urNb audacity-src-1.1.0/src/effects/smsPitchScale.h audacity-src-1.1.0-timestretch/src/effects/smsPitchScale.h --- audacity-src-1.1.0/src/effects/smsPitchScale.h Wed Dec 31 16:00:00 1969 +++ audacity-src-1.1.0-timestretch/src/effects/smsPitchScale.h Sat Aug 10 01:17:18 2002 @@ -0,0 +1,33 @@ +/********************************************************************** + + Audacity: A Digital Audio Editor + + smsPitchScale.cpp + + Doug Hoyte + + These functions are used by the TimeStretch class in order to provide + pitch scaling. Parts of these functions may have been modified by me. + + smsPitchScale.cpp and smsPitchScale.h are... + +* COPYRIGHT 1999 Stephan M. Sprenger +* +* The Wide Open License (WOL) +* +* Permission to use, copy, modify, distribute and sell this software and its +* documentation for any purpose is hereby granted without fee, provided that +* the above copyright notice and this license appear in all source copies. +* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF +* ANY KIND. See http://www.dspguru.com/wol.htm for more information. +* +*****************************************************************************/ + + +#define M_PI 3.14159265358979323846 +#define MAX_FRAME_LENGTH 8192 + +void smsFft(float *fftBuffer, long fftFrameSize, long sign); + +void smsPitchScale(float pitchScale, long numSampsToProcess, long fftFrameSize, +long osamp, float sampleRate, float *indata, float *outdata);