package convolve import ( "log" "math" "math/cmplx" "github.com/mjibson/go-dsp/fft" "gonum.org/v1/gonum/dsp/fourier" ) // nextPowerOfTwo returns the next power of two >= n func nextPowerOfTwo(n int) int { p := 1 for p < n { p <<= 1 } return p } // Convolve performs FFT-based convolution of two audio signals // Deprecated: Use Deconvolve for IR extraction from sweep and recorded signals func Convolve(signal1, signal2 []float64) []float64 { resultLen := len(signal1) + len(signal2) - 1 fftLen := nextPowerOfTwo(resultLen) log.Printf("[convolve] signal1: %d, signal2: %d, resultLen: %d, fftLen: %d", len(signal1), len(signal2), resultLen, fftLen) // Zero-pad both signals to fftLen as float64 x := make([]float64, fftLen) copy(x, signal1) y := make([]float64, fftLen) copy(y, signal2) // FFT fft := fourier.NewFFT(fftLen) xFreq := fft.Coefficients(nil, x) // []complex128 yFreq := fft.Coefficients(nil, y) // []complex128 log.Printf("[convolve] xFreq length: %d, yFreq length: %d", len(xFreq), len(yFreq)) // Multiply in frequency domain outFreq := make([]complex128, len(xFreq)) for i := 0; i < len(xFreq) && i < len(yFreq); i++ { outFreq[i] = xFreq[i] * yFreq[i] } // Inverse FFT (returns []float64) outTime := fft.Sequence(nil, outFreq) log.Printf("[convolve] outTime length: %d", len(outTime)) // Defensive: avoid index out of range copyLen := resultLen if len(outTime) < resultLen { log.Printf("[convolve] Warning: outTime length (%d) < resultLen (%d), truncating resultLen", len(outTime), resultLen) copyLen = len(outTime) } result := make([]float64, copyLen) copy(result, outTime[:copyLen]) return result } // Deconvolve extracts the impulse response (IR) from a sweep and its recorded version // by dividing the FFT of the recorded by the FFT of the sweep, with regularization. func Deconvolve(sweep, recorded []float64) []float64 { resultLen := len(recorded) fftLen := nextPowerOfTwo(resultLen) log.Printf("[deconvolve] sweep: %d, recorded: %d, resultLen: %d, fftLen: %d", len(sweep), len(recorded), resultLen, fftLen) // Zero-pad both signals to fftLen sweepPadded := make([]float64, fftLen) recordedPadded := make([]float64, fftLen) copy(sweepPadded, sweep) copy(recordedPadded, recorded) fft := fourier.NewFFT(fftLen) sweepFFT := fft.Coefficients(nil, sweepPadded) recordedFFT := fft.Coefficients(nil, recordedPadded) log.Printf("[deconvolve] sweepFFT length: %d, recordedFFT length: %d", len(sweepFFT), len(recordedFFT)) // Regularization epsilon to avoid division by zero const epsilon = 1e-10 minLen := len(sweepFFT) if len(recordedFFT) < minLen { minLen = len(recordedFFT) } irFFT := make([]complex128, minLen) for i := 0; i < minLen; i++ { denom := sweepFFT[i] if cmplx.Abs(denom) < epsilon { denom = complex(epsilon, 0) } irFFT[i] = recordedFFT[i] / denom } irTime := fft.Sequence(nil, irFFT) log.Printf("[deconvolve] irTime length: %d", len(irTime)) // Defensive: avoid index out of range copyLen := resultLen if len(irTime) < resultLen { log.Printf("[deconvolve] Warning: irTime length (%d) < resultLen (%d), truncating resultLen", len(irTime), resultLen) copyLen = len(irTime) } result := make([]float64, copyLen) copy(result, irTime[:copyLen]) return result } // Normalize normalizes the audio data to prevent clipping // targetPeak is the maximum peak value (e.g., 0.95 for 95% of full scale) func Normalize(data []float64, targetPeak float64) []float64 { if len(data) == 0 { return data } // Find the maximum absolute value maxVal := 0.0 for _, sample := range data { absVal := math.Abs(sample) if absVal > maxVal { maxVal = absVal } } if maxVal == 0 { return data } // Calculate normalization factor normFactor := targetPeak / maxVal // Apply normalization normalized := make([]float64, len(data)) for i, sample := range data { normalized[i] = sample * normFactor } return normalized } // TrimSilence removes leading and trailing silence from the audio data // threshold is the amplitude threshold below which samples are considered silence func TrimSilence(data []float64, threshold float64) []float64 { if len(data) == 0 { return data } // Find start (first non-silent sample) start := 0 for i, sample := range data { if math.Abs(sample) > threshold { start = i break } } // Find end (last non-silent sample) end := len(data) - 1 for i := len(data) - 1; i >= 0; i-- { if math.Abs(data[i]) > threshold { end = i break } } if start >= end { return []float64{} } return data[start : end+1] } // TrimOrPad trims or zero-pads the data to the specified number of samples func TrimOrPad(data []float64, targetSamples int) []float64 { if len(data) == targetSamples { return data } else if len(data) > targetSamples { return data[:targetSamples] } else { out := make([]float64, targetSamples) copy(out, data) // zero-padding is default return out } } // padOrTruncate ensures a slice is exactly n elements long func padOrTruncate[T any](in []T, n int) []T { if len(in) == n { return in } else if len(in) > n { return in[:n] } out := make([]T, n) copy(out, in) return out } // Helper to reconstruct full Hermitian spectrum from N/2+1 real FFT func hermitianSymmetric(fullLen int, halfSpec []complex128) []complex128 { full := make([]complex128, fullLen) N := fullLen // DC full[0] = halfSpec[0] // Positive freqs for k := 1; k < N/2; k++ { full[k] = halfSpec[k] full[N-k] = cmplx.Conj(halfSpec[k]) } // Nyquist (if even) if N%2 == 0 { full[N/2] = halfSpec[N/2] } return full } // MinimumPhaseTransform using go-dsp/fft for full complex FFT/IFFT func MinimumPhaseTransform(ir []float64) []float64 { if len(ir) == 0 { return ir } origLen := len(ir) fftLen := nextPowerOfTwo(origLen) padded := padOrTruncate(ir, fftLen) log.Printf("[MPT] fftLen: %d, padded len: %d", fftLen, len(padded)) // Convert to complex signal := make([]complex128, fftLen) for i, v := range padded { signal[i] = complex(v, 0) } // FFT X := fft.FFT(signal) // Log-magnitude spectrum (complex) logMag := make([]complex128, fftLen) for i, v := range X { mag := cmplx.Abs(v) if mag < 1e-12 { mag = 1e-12 } logMag[i] = complex(math.Log(mag), 0) } // IFFT to get real cepstrum cepstrumC := fft.IFFT(logMag) // Minimum phase cepstrum minPhaseCep := make([]complex128, fftLen) minPhaseCep[0] = cepstrumC[0] // DC for i := 1; i < fftLen/2; i++ { minPhaseCep[i] = 2 * cepstrumC[i] } if fftLen%2 == 0 { minPhaseCep[fftLen/2] = cepstrumC[fftLen/2] // Nyquist (if even) } // Negative quefrency: zero (already zero by default) // FFT of minimum phase cepstrum minPhaseSpec := fft.FFT(minPhaseCep) // Exponentiate to get minimum phase spectrum for i := range minPhaseSpec { minPhaseSpec[i] = cmplx.Exp(minPhaseSpec[i]) } // IFFT to get minimum phase IR minPhaseIR := fft.IFFT(minPhaseSpec) // Return the real part, original length result := make([]float64, origLen) for i := range result { result[i] = real(minPhaseIR[i]) } return result } // realSlice extracts the real part of a []complex128 as []float64 func realSlice(in []complex128) []float64 { out := make([]float64, len(in)) for i, v := range in { out[i] = real(v) } return out } // Resample resamples audio data from one sample rate to another using linear interpolation func Resample(data []float64, fromSampleRate, toSampleRate int) []float64 { if fromSampleRate == toSampleRate { return data } // Calculate the resampling ratio ratio := float64(toSampleRate) / float64(fromSampleRate) newLength := int(float64(len(data)) * ratio) if newLength == 0 { return []float64{} } result := make([]float64, newLength) for i := 0; i < newLength; i++ { // Calculate the position in the original data pos := float64(i) / ratio // Get the integer and fractional parts posInt := int(pos) posFrac := pos - float64(posInt) // Linear interpolation if posInt >= len(data)-1 { // Beyond the end of the data, use the last sample result[i] = data[len(data)-1] } else { // Interpolate between two samples sample1 := data[posInt] sample2 := data[posInt+1] result[i] = sample1 + posFrac*(sample2-sample1) } } return result }