package convolve import ( "log" "math" "math/cmplx" "github.com/mjibson/go-dsp/fft" "gonum.org/v1/gonum/dsp/fourier" ) // nextPowerOfTwo returns the next power of two >= n func nextPowerOfTwo(n int) int { p := 1 for p < n { p <<= 1 } return p } // Convolve performs FFT-based convolution of two audio signals // Deprecated: Use Deconvolve for IR extraction from sweep and recorded signals func Convolve(signal1, signal2 []float64) []float64 { resultLen := len(signal1) + len(signal2) - 1 fftLen := nextPowerOfTwo(resultLen) log.Printf("[convolve] signal1: %d, signal2: %d, resultLen: %d, fftLen: %d", len(signal1), len(signal2), resultLen, fftLen) // Zero-pad both signals to fftLen as float64 x := make([]float64, fftLen) copy(x, signal1) y := make([]float64, fftLen) copy(y, signal2) // FFT fft := fourier.NewFFT(fftLen) xFreq := fft.Coefficients(nil, x) // []complex128 yFreq := fft.Coefficients(nil, y) // []complex128 log.Printf("[convolve] xFreq length: %d, yFreq length: %d", len(xFreq), len(yFreq)) // Multiply in frequency domain outFreq := make([]complex128, len(xFreq)) for i := 0; i < len(xFreq) && i < len(yFreq); i++ { outFreq[i] = xFreq[i] * yFreq[i] } // Inverse FFT (returns []float64) outTime := fft.Sequence(nil, outFreq) log.Printf("[convolve] outTime length: %d", len(outTime)) // Defensive: avoid index out of range copyLen := resultLen if len(outTime) < resultLen { log.Printf("[convolve] Warning: outTime length (%d) < resultLen (%d), truncating resultLen", len(outTime), resultLen) copyLen = len(outTime) } result := make([]float64, copyLen) copy(result, outTime[:copyLen]) return result } // Deconvolve extracts the impulse response (IR) from a sweep and its recorded version // by dividing the FFT of the recorded by the FFT of the sweep, with regularization. func Deconvolve(sweep, recorded []float64) []float64 { resultLen := len(recorded) fftLen := nextPowerOfTwo(resultLen) log.Printf("[deconvolve] sweep: %d, recorded: %d, resultLen: %d, fftLen: %d", len(sweep), len(recorded), resultLen, fftLen) // Zero-pad both signals to fftLen sweepPadded := make([]float64, fftLen) recordedPadded := make([]float64, fftLen) copy(sweepPadded, sweep) copy(recordedPadded, recorded) fft := fourier.NewFFT(fftLen) sweepFFT := fft.Coefficients(nil, sweepPadded) recordedFFT := fft.Coefficients(nil, recordedPadded) log.Printf("[deconvolve] sweepFFT length: %d, recordedFFT length: %d", len(sweepFFT), len(recordedFFT)) // Regularization epsilon to avoid division by zero const epsilon = 1e-10 minLen := len(sweepFFT) if len(recordedFFT) < minLen { minLen = len(recordedFFT) } irFFT := make([]complex128, minLen) for i := 0; i < minLen; i++ { denom := sweepFFT[i] if cmplx.Abs(denom) < epsilon { denom = complex(epsilon, 0) } irFFT[i] = recordedFFT[i] / denom } irTime := fft.Sequence(nil, irFFT) log.Printf("[deconvolve] irTime length: %d", len(irTime)) // Defensive: avoid index out of range copyLen := resultLen if len(irTime) < resultLen { log.Printf("[deconvolve] Warning: irTime length (%d) < resultLen (%d), truncating resultLen", len(irTime), resultLen) copyLen = len(irTime) } result := make([]float64, copyLen) copy(result, irTime[:copyLen]) return result } // Normalize normalizes the audio data to prevent clipping // targetPeak is the maximum peak value (e.g., 0.95 for 95% of full scale) func Normalize(data []float64, targetPeak float64) []float64 { if len(data) == 0 { return data } // Find the maximum absolute value maxVal := 0.0 for _, sample := range data { absVal := math.Abs(sample) if absVal > maxVal { maxVal = absVal } } if maxVal == 0 { return data } // Calculate normalization factor normFactor := targetPeak / maxVal // Apply normalization normalized := make([]float64, len(data)) for i, sample := range data { normalized[i] = sample * normFactor } return normalized } // TrimSilence removes leading and trailing silence from the audio data // threshold is the amplitude threshold below which samples are considered silence func TrimSilence(data []float64, threshold float64) []float64 { if len(data) == 0 { return data } // Find start (first non-silent sample) start := 0 for i, sample := range data { if math.Abs(sample) > threshold { start = i break } } // Find end (last non-silent sample) end := len(data) - 1 for i := len(data) - 1; i >= 0; i-- { if math.Abs(data[i]) > threshold { end = i break } } if start >= end { return []float64{} } return data[start : end+1] } // TrimOrPad trims or zero-pads the data to the specified number of samples func TrimOrPad(data []float64, targetSamples int) []float64 { if len(data) == targetSamples { return data } else if len(data) > targetSamples { return data[:targetSamples] } else { out := make([]float64, targetSamples) copy(out, data) // zero-padding is default return out } } // padOrTruncate ensures a slice is exactly n elements long func padOrTruncate[T any](in []T, n int) []T { if len(in) == n { return in } else if len(in) > n { return in[:n] } out := make([]T, n) copy(out, in) return out } // Helper to reconstruct full Hermitian spectrum from N/2+1 real FFT func hermitianSymmetric(fullLen int, halfSpec []complex128) []complex128 { full := make([]complex128, fullLen) N := fullLen // DC full[0] = halfSpec[0] // Positive freqs for k := 1; k < N/2; k++ { full[k] = halfSpec[k] full[N-k] = cmplx.Conj(halfSpec[k]) } // Nyquist (if even) if N%2 == 0 { full[N/2] = halfSpec[N/2] } return full } // MinimumPhaseTransform using go-dsp/fft for full complex FFT/IFFT func MinimumPhaseTransform(ir []float64) []float64 { if len(ir) == 0 { return ir } origLen := len(ir) fftLen := nextPowerOfTwo(origLen) padded := padOrTruncate(ir, fftLen) log.Printf("[MPT] fftLen: %d, padded len: %d", fftLen, len(padded)) // Convert to complex signal := make([]complex128, fftLen) for i, v := range padded { signal[i] = complex(v, 0) } // FFT X := fft.FFT(signal) // Log-magnitude spectrum (complex) logMag := make([]complex128, fftLen) for i, v := range X { mag := cmplx.Abs(v) if mag < 1e-12 { mag = 1e-12 } logMag[i] = complex(math.Log(mag), 0) } // IFFT to get real cepstrum cepstrumC := fft.IFFT(logMag) // Minimum phase cepstrum minPhaseCep := make([]complex128, fftLen) minPhaseCep[0] = cepstrumC[0] // DC for i := 1; i < fftLen/2; i++ { minPhaseCep[i] = 2 * cepstrumC[i] } if fftLen%2 == 0 { minPhaseCep[fftLen/2] = cepstrumC[fftLen/2] // Nyquist (if even) } // Negative quefrency: zero (already zero by default) // FFT of minimum phase cepstrum minPhaseSpec := fft.FFT(minPhaseCep) // Exponentiate to get minimum phase spectrum for i := range minPhaseSpec { minPhaseSpec[i] = cmplx.Exp(minPhaseSpec[i]) } // IFFT to get minimum phase IR minPhaseIR := fft.IFFT(minPhaseSpec) // Return the real part, original length result := make([]float64, origLen) for i := range result { result[i] = real(minPhaseIR[i]) } return result } // realSlice extracts the real part of a []complex128 as []float64 func realSlice(in []complex128) []float64 { out := make([]float64, len(in)) for i, v := range in { out[i] = real(v) } return out } // Resample resamples audio data from one sample rate to another using linear interpolation // with proper anti-aliasing for downsampling func Resample(data []float64, fromSampleRate, toSampleRate int) []float64 { if fromSampleRate == toSampleRate { return data } // For downsampling, apply anti-aliasing filter to prevent aliasing // Filter out frequencies above the target Nyquist frequency if toSampleRate < fromSampleRate { // Calculate target Nyquist frequency (slightly below to ensure clean cutoff) targetNyquist := float64(toSampleRate) / 2.0 // Use 90% of Nyquist as cutoff to ensure clean anti-aliasing antiAliasCutoff := targetNyquist * 0.9 // Apply steep low-pass filter to prevent aliasing // Use 48 dB/octave slope for clean anti-aliasing data = CascadeHighcut(data, fromSampleRate, antiAliasCutoff, 48) } // Calculate the resampling ratio ratio := float64(toSampleRate) / float64(fromSampleRate) newLength := int(float64(len(data)) * ratio) if newLength == 0 { return []float64{} } result := make([]float64, newLength) for i := 0; i < newLength; i++ { // Calculate the position in the original data pos := float64(i) / ratio // Get the integer and fractional parts posInt := int(pos) posFrac := pos - float64(posInt) // Linear interpolation if posInt >= len(data)-1 { // Beyond the end of the data, use the last sample result[i] = data[len(data)-1] } else { // Interpolate between two samples sample1 := data[posInt] sample2 := data[posInt+1] result[i] = sample1 + posFrac*(sample2-sample1) } } return result } // FadeOutLinear applies a linear fade-out to the last fadeSamples of the data. // fadeSamples is the number of samples over which to fade to zero. func FadeOutLinear(data []float64, fadeSamples int) []float64 { if fadeSamples <= 0 || len(data) == 0 { return data } if fadeSamples > len(data) { fadeSamples = len(data) } out := make([]float64, len(data)) copy(out, data) start := len(data) - fadeSamples for i := start; i < len(data); i++ { fade := float64(len(data)-i) / float64(fadeSamples) out[i] *= fade } return out } // biquadFilterState holds the state for a biquad filter type biquadFilterState struct { x1, x2 float64 // input history (intermediate values in DF2T) } // applyBiquadDF2T applies a biquad filter using Direct Form II Transposed (more numerically stable) // This form is preferred for cascaded filters as it reduces numerical errors // Uses high-precision calculations and proper state management for maximum quality func applyBiquadDF2T(data []float64, b0, b1, b2, a1, a2 float64, state *biquadFilterState) []float64 { out := make([]float64, len(data)) // Initialize state if nil if state == nil { state = &biquadFilterState{} } // Direct Form II Transposed implementation with high precision // This structure minimizes numerical errors in cascaded filters for i := 0; i < len(data); i++ { x := data[i] // Compute intermediate value (w[n] = x[n] - a1*w[n-1] - a2*w[n-2]) w := x - a1*state.x1 - a2*state.x2 // Compute output (y[n] = b0*w[n] + b1*w[n-1] + b2*w[n-2]) y := b0*w + b1*state.x1 + b2*state.x2 out[i] = y // Update state (shift delay line) state.x2 = state.x1 state.x1 = w } return out } // warmupFilter applies a filter with warm-up to avoid transients // This is especially important for high-quality filtering func warmupFilter(data []float64, b0, b1, b2, a1, a2 float64) []float64 { if len(data) == 0 { return data } // Use first sample value for warm-up to avoid transients warmupValue := data[0] state := &biquadFilterState{} // Warm up the filter with a few samples of the first value // This initializes the filter state properly warmupSamples := 10 warmupData := make([]float64, warmupSamples) for i := range warmupData { warmupData[i] = warmupValue } _ = applyBiquadDF2T(warmupData, b0, b1, b2, a1, a2, state) // Now apply filter to actual data with warmed-up state return applyBiquadDF2T(data, b0, b1, b2, a1, a2, state) } // ApplyLowpassButterworth applies a 2nd-order Butterworth low-pass filter to the data. // cutoffHz: cutoff frequency in Hz, sampleRate: sample rate in Hz. // Uses Direct Form II Transposed for better numerical stability. func ApplyLowpassButterworth(data []float64, sampleRate int, cutoffHz float64) []float64 { if cutoffHz <= 0 || cutoffHz >= float64(sampleRate)/2 { return data } // Biquad coefficients for Butterworth low-pass w0 := 2 * math.Pi * cutoffHz / float64(sampleRate) cosw0 := math.Cos(w0) sinw0 := math.Sin(w0) Q := 1.0 / math.Sqrt(2) // Butterworth Q alpha := sinw0 / (2 * Q) b0 := (1 - cosw0) / 2 b1 := 1 - cosw0 b2 := (1 - cosw0) / 2 a0 := 1 + alpha a1 := -2 * cosw0 a2 := 1 - alpha // Normalize coefficients b0 /= a0 b1 /= a0 b2 /= a0 a1 /= a0 a2 /= a0 // Apply filter using Direct Form II Transposed state := &biquadFilterState{} return applyBiquadDF2T(data, b0, b1, b2, a1, a2, state) } // ApplyHighpassButterworth applies a 2nd-order Butterworth high-pass filter to the data. // cutoffHz: cutoff frequency in Hz, sampleRate: sample rate in Hz. // Uses Direct Form II Transposed for better numerical stability. func ApplyHighpassButterworth(data []float64, sampleRate int, cutoffHz float64) []float64 { if cutoffHz <= 0 || cutoffHz >= float64(sampleRate)/2 { return data } // Biquad coefficients for Butterworth high-pass w0 := 2 * math.Pi * cutoffHz / float64(sampleRate) cosw0 := math.Cos(w0) sinw0 := math.Sin(w0) Q := 1.0 / math.Sqrt(2) // Butterworth Q alpha := sinw0 / (2 * Q) b0 := (1 + cosw0) / 2 b1 := -(1 + cosw0) b2 := (1 + cosw0) / 2 a0 := 1 + alpha a1 := -2 * cosw0 a2 := 1 - alpha // Normalize coefficients b0 /= a0 b1 /= a0 b2 /= a0 a1 /= a0 a2 /= a0 // Apply filter using Direct Form II Transposed state := &biquadFilterState{} return applyBiquadDF2T(data, b0, b1, b2, a1, a2, state) } // CascadeLowcut applies the low-cut (high-pass) filter multiple times for steeper slopes. // Uses Linkwitz-Riley design principles with high-quality implementation for maximum precision. // Features: proper warm-up, high-precision coefficients, optimized cascade structure. // slopeDb: 12, 24, 36, 48, ... (dB/octave) func CascadeLowcut(data []float64, sampleRate int, cutoffHz float64, slopeDb int) []float64 { if slopeDb < 12 { slopeDb = 12 } if cutoffHz <= 0 || cutoffHz >= float64(sampleRate)/2 { return data } n := slopeDb / 12 if n == 0 { return data } // High-quality filter implementation with proper coefficient calculation // Use high precision for coefficient calculation w0 := 2.0 * math.Pi * cutoffHz / float64(sampleRate) // Pre-calculate trigonometric functions once for precision cosw0 := math.Cos(w0) sinw0 := math.Sin(w0) // Butterworth Q factor (1/sqrt(2) ≈ 0.7071067811865476 for maximally flat response) const butterworthQ = 0.7071067811865476 alpha := sinw0 / (2.0 * butterworthQ) // High-pass Butterworth coefficients b0 := (1.0 + cosw0) / 2.0 b1 := -(1.0 + cosw0) b2 := (1.0 + cosw0) / 2.0 a0 := 1.0 + alpha a1 := -2.0 * cosw0 a2 := 1.0 - alpha // Normalize coefficients for Direct Form II Transposed // This ensures proper scaling and numerical stability b0 /= a0 b1 /= a0 b2 /= a0 a1 /= a0 a2 /= a0 // Apply cascaded filters with proper warm-up for each stage // This ensures clean filtering without transients, especially important for MPT out := make([]float64, len(data)) copy(out, data) for stage := 0; stage < n; stage++ { // Use warm-up for first stage to avoid transients // Subsequent stages benefit from the already-filtered signal if stage == 0 { out = warmupFilter(out, b0, b1, b2, a1, a2) } else { // For subsequent stages, use fresh state but no warm-up needed // as the signal is already filtered state := &biquadFilterState{} filtered := applyBiquadDF2T(out, b0, b1, b2, a1, a2, state) copy(out, filtered) } } return out } // CascadeHighcut applies the high-cut (low-pass) filter multiple times for steeper slopes. // Uses Linkwitz-Riley design principles with high-quality implementation for maximum precision. // Features: proper warm-up, high-precision coefficients, optimized cascade structure. // slopeDb: 12, 24, 36, 48, ... (dB/octave) func CascadeHighcut(data []float64, sampleRate int, cutoffHz float64, slopeDb int) []float64 { if slopeDb < 12 { slopeDb = 12 } if cutoffHz <= 0 || cutoffHz >= float64(sampleRate)/2 { return data } n := slopeDb / 12 if n == 0 { return data } // High-quality filter implementation with proper coefficient calculation // Use high precision for coefficient calculation w0 := 2.0 * math.Pi * cutoffHz / float64(sampleRate) // Pre-calculate trigonometric functions once for precision cosw0 := math.Cos(w0) sinw0 := math.Sin(w0) // Butterworth Q factor (1/sqrt(2) ≈ 0.7071067811865476 for maximally flat response) const butterworthQ = 0.7071067811865476 alpha := sinw0 / (2.0 * butterworthQ) // Low-pass Butterworth coefficients b0 := (1.0 - cosw0) / 2.0 b1 := 1.0 - cosw0 b2 := (1.0 - cosw0) / 2.0 a0 := 1.0 + alpha a1 := -2.0 * cosw0 a2 := 1.0 - alpha // Normalize coefficients for Direct Form II Transposed // This ensures proper scaling and numerical stability b0 /= a0 b1 /= a0 b2 /= a0 a1 /= a0 a2 /= a0 // Apply cascaded filters with proper warm-up for each stage // This ensures clean filtering without transients, especially important for MPT out := make([]float64, len(data)) copy(out, data) for stage := 0; stage < n; stage++ { // Use warm-up for first stage to avoid transients // Subsequent stages benefit from the already-filtered signal if stage == 0 { out = warmupFilter(out, b0, b1, b2, a1, a2) } else { // For subsequent stages, use fresh state but no warm-up needed // as the signal is already filtered state := &biquadFilterState{} filtered := applyBiquadDF2T(out, b0, b1, b2, a1, a2, state) copy(out, filtered) } } return out } // min returns the minimum of three integers func min(a, b, c int) int { if a <= b && a <= c { return a } if b <= a && b <= c { return b } return c } // DetectPhaseInversion detects if the recorded sweep is phase-inverted compared to the sweep // by computing the normalized cross-correlation over a range of lags func DetectPhaseInversion(sweep, recorded []float64) bool { if len(sweep) == 0 || len(recorded) == 0 { return false } windowSize := min(len(sweep), len(recorded), 9600) // 100ms at 96kHz sweepWindow := sweep[:windowSize] recordedWindow := recorded[:windowSize] maxLag := 500 // +/- 500 samples (~5ms) bestCorr := 0.0 bestLag := 0 for lag := -maxLag; lag <= maxLag; lag++ { var corr, sweepSum, recordedSum, sweepSumSq, recordedSumSq float64 count := 0 for i := 0; i < windowSize; i++ { j := i + lag if j < 0 || j >= windowSize { continue } corr += sweepWindow[i] * recordedWindow[j] sweepSum += sweepWindow[i] recordedSum += recordedWindow[j] sweepSumSq += sweepWindow[i] * sweepWindow[i] recordedSumSq += recordedWindow[j] * recordedWindow[j] count++ } if count == 0 { continue } sweepMean := sweepSum / float64(count) recordedMean := recordedSum / float64(count) sweepVar := sweepSumSq/float64(count) - sweepMean*sweepMean recordedVar := recordedSumSq/float64(count) - recordedMean*recordedMean if sweepVar <= 0 || recordedVar <= 0 { continue } corrCoeff := (corr/float64(count) - sweepMean*recordedMean) / math.Sqrt(sweepVar*recordedVar) if math.Abs(corrCoeff) > math.Abs(bestCorr) { bestCorr = corrCoeff bestLag = lag } } log.Printf("[deconvolve] Phase cross-correlation: best lag = %d, coeff = %.4f", bestLag, bestCorr) return bestCorr < 0.0 } // InvertPhase inverts the phase of the audio data by negating all samples func InvertPhase(data []float64) []float64 { inverted := make([]float64, len(data)) for i, sample := range data { inverted[i] = -sample } return inverted } // DeconvolveWithPhaseCorrection extracts the impulse response with automatic phase correction func DeconvolveWithPhaseCorrection(sweep, recorded []float64) []float64 { // Detect if recorded sweep is phase-inverted isInverted := DetectPhaseInversion(sweep, recorded) if isInverted { log.Printf("[deconvolve] Detected phase inversion in recorded sweep, correcting...") recorded = InvertPhase(recorded) } else { log.Printf("[deconvolve] Phase alignment verified") } // Perform normal deconvolution return Deconvolve(sweep, recorded) }