I have been experiencing a very bizarre slowdown of at least 60x that occurs in my sound DSP application only under certain conditions. The code in question implements a 4-way all-pass time-domain crossover using sets of 4th-order Linkwitz-Riley filters.
The slowdown only occurs when the input sound to the crossover is all zeros (or close to zeros). In particular, the slowdown occurs when the input audio has previously been non-zero and then becomes silent. It is enough to halt my entire sound rendering pipeline when there are 10 or 15 instances running, missing buffer deadlines for the device.
I have tried this code with both GCC and VS2012, OS X and windows 7, on multiple different computers, and it always occurs. Could this be some strange artifact of the x86 floating-point implementation? I can't fathom why the same code would be fast when sound is playing and ridiculously slow when sound is not playing. I've looked at the assembly and it seems fine to me.
This also the 2nd time I have implemented a crossover of this sort (first version didn't use SIMD), and both versions displayed the same behavior.
#define GSOUND_FREQUENCY_COUNT 4
class GSOUND_ALIGN(16) SIMDCrossover
{
public:
//********************************************************************************
//********************************************************************************
//********************************************************************************
//****** Public Type Declarations
/// The wide SIMD type used to process all frequency bands together.
typedef SIMDArray<Float32,GSOUND_FREQUENCY_COUNT> SIMDType;
/// A class that stores a few samples of history information for a crossover filter set.
class GSOUND_ALIGN(16) FilterHistory
{
public:
FilterHistory()
{
for ( Index i = 0; i < 4; i++ )
{
for ( Index j = 0; j < GSOUND_FREQUENCY_COUNT; j++ )
input[i][j] = output[i][j] = 0;
}
}
/// The input and output history for the cascaded 2nd-order filters.
SIMDType input[4];
SIMDType output[4];
};
/// A class that stores a few samples of history information for a crossover.
class GSOUND_ALIGN(16) CrossoverHistory
{
public:
/// History information for each of the filter sets in this crossover history.
FilterHistory filters[GSOUND_FREQUENCY_COUNT - 1];
};
//********************************************************************************
//********************************************************************************
//********************************************************************************
//****** Constructor
/// Create a new SIMD crossover.
GSOUND_INLINE SIMDCrossover()
{
}
//********************************************************************************
//********************************************************************************
//********************************************************************************
//****** Crossover Filter Processing Method
/// Apply this crossover filter to the specified input buffer, writing the band-separated SIMD output.
GSOUND_NO_INLINE void filter( CrossoverHistory& history, const Float32* input, Float32* simdOutput, Size numSamples )
{
const Float32* const inputEnd = input + numSamples;
const Size advance = SIMDType::getWidth();
// Copy the history to the stack so that there is no round-trip to memory.
CrossoverHistory localHistory = history;
while ( input != inputEnd )
{
// Expand the input to the SIMD width.
SIMDType simdInput(*input);
// Apply each filter set (loop will be unrolled).
for ( Index i = 0; i < numFilterSets; i++ )
filters[i].apply( simdInput, localHistory.filters[i] );
// Write the output.
simdInput.store( simdOutput );
input++;
simdOutput += advance;
}
// Store the history.
history = localHistory;
}
//********************************************************************************
//********************************************************************************
//********************************************************************************
//****** Crossover Band Accessor Method
/// Reset the crossover for the specified frequency bands.
GSOUND_INLINE void setBands( const FrequencyBands& bands, SampleRate sampleRate )
{
GSOUND_DEBUG_ASSERT_MESSAGE( bands.getBandCount() % 4 == 0,
"SIMD crossover requires number of bands that is multiple of SIMD width." );
GSOUND_DEBUG_ASSERT( bands.getCrossoverCount() == numFilterSets );
for ( Index i = 0; i < numFilterSets; i++ )
{
FilterSet& filterSet = filters[i];
const Float crossover = bands.getCrossover(i);
const Float frequencyRatio = math::clamp( Float(crossover / sampleRate), Float(0), Float(0.499) );
const Float w0HighPass = math::tan( math::pi<Float>()*frequencyRatio );
const Float w0LowPass = Float(1) / w0HighPass;
// Determine the filter for each band for this filter set.
for ( Index j = 0; j < numFrequencyBands; j++ )
{
if ( i >= j )
{
// Low-pass 4th-order Linkwitz-Riley filter, implemented as 2 cascaded 2nd-order Butterworth filters.
getButterworth2LowPass( w0LowPass, filterSet.a[0][j], filterSet.a[1][j], filterSet.a[2][j],
filterSet.b[0][j], filterSet.b[1][j] );
getButterworth2LowPass( w0LowPass, filterSet.a[3][j], filterSet.a[4][j], filterSet.a[5][j],
filterSet.b[2][j], filterSet.b[3][j] );
}
else
{
// High-pass 4th-order Linkwitz-Riley filter, implemented as 2 cascaded 2nd-order Butterworth filters.
getButterworth2HighPass( w0HighPass, filterSet.a[0][j], filterSet.a[1][j], filterSet.a[2][j],
filterSet.b[0][j], filterSet.b[1][j] );
getButterworth2HighPass( w0HighPass, filterSet.a[3][j], filterSet.a[4][j], filterSet.a[5][j],
filterSet.b[2][j], filterSet.b[3][j] );
}
}
}
}
private:
//********************************************************************************
//********************************************************************************
//********************************************************************************
//****** Private Type Declarations
/// A class that implements a SIMD-wide set of crossover filters.
class FilterSet
{
public:
/// Apply the filter set to the specified value using the given history.
GSOUND_FORCE_INLINE void apply( SIMDType& inputOutput, FilterHistory& history )
{
// Apply first 2nd-order filter.
SIMDType in = a[0]*inputOutput;
SIMDType in2 = (in - b[0]*history.output[0]) + (a[1]*history.input[0] - b[1]*history.output[1]) + a[2]*history.input[1];
// Update the history information.
history.input[1] = history.input[0]; history.input[0] = in;
history.output[1] = history.output[0]; history.output[0] = in2;
// Apply second 2nd-order filter to the result of the first.
in = a[3]*in2;
inputOutput = (in - b[2]*history.output[2]) + (a[4]*history.input[2] - b[3]*history.output[3]) + a[5]*history.input[3];
// Update the history information.
history.input[3] = history.input[2]; history.input[2] = in;
history.output[3] = history.output[2]; history.output[2] = inputOutput;
}
/// The coefficients for two cascaded 2nd-order filters.
SIMDType a[6];
SIMDType b[4];
};
//********************************************************************************
//********************************************************************************
//********************************************************************************
//****** Filter Computation Method
/// Get the coefficients of a 1st-order butterworth filter with the given w0.
GSOUND_INLINE static void getButterworth1LowPass( Float w0, Float& a0, Float& a1, Float& b0 )
{
Float A = 1 + w0;
a0 = 1 / A;
a1 = 1;
b0 = (1 - w0)*a0;
}
/// Get the coefficients of a 1st-order butterworth high-pass filter with the given w0.
GSOUND_INLINE static void getButterworth1HighPass( Float w0, Float& a0, Float& a1, Float& a2, Float& b0, Float& b1 )
{
getButterworth1LowPass( w0, a0, a1, b0 );
a1 = -a1;
b0 = -b0;
}
/// Get the coefficients of a 2nd-order butterworth low-pass filter with the given w0.
GSOUND_INLINE static void getButterworth2LowPass( Float w0, Float& a0, Float& a1, Float& a2, Float& b0, Float& b1 )
{
Float B1 = -2.0f * math::cos( math::pi<Float>()*Float(3)/Float(4) );
Float w0squared = w0*w0;
Float A = 1 + B1*w0 + w0squared;
a0 = 1 / A;
a1 = 2;
a2 = 1;
b0 = 2*(1 - w0squared)*a0;
b1 = (1 - B1*w0 + w0squared)*a0;
}
/// Get the coefficients of a 2nd-order butterworth high-pass filter with the given w0.
GSOUND_INLINE static void getButterworth2HighPass( Float w0, Float& a0, Float& a1, Float& a2, Float& b0, Float& b1 )
{
getButterworth2LowPass( w0, a0, a1, a2, b0, b1 );
a1 = -a1;
b0 = -b0;
}
//********************************************************************************
//********************************************************************************
//********************************************************************************
//****** Private Static Data Members
/// The number of frequency bands that this SIMD crossover can handle.
static const Size numFrequencyBands = GSOUND_FREQUENCY_COUNT;
/// The number of filter sets that this SIMD crossover uses.
static const Size numFilterSets = GSOUND_FREQUENCY_COUNT - 1;
//********************************************************************************
//********************************************************************************
//********************************************************************************
//****** Private Data Members
/// The coefficients for two cascaded 2nd-order filters.
FilterSet filters[numFilterSets];
/// The input and output history for the cascaded 2nd-order filters.
SIMDType inputHistory[4];
SIMDType outputHistory[4];
};