valhallir-deconvolver/pkg/convolve/convolve.go

package convolve

import (
	"log"
	"math"
	"math/cmplx"

	"github.com/mjibson/go-dsp/fft"
	"gonum.org/v1/gonum/dsp/fourier"
)

// nextPowerOfTwo returns the next power of two >= n
func nextPowerOfTwo(n int) int {
	p := 1
	for p < n {
		p <<= 1
	}
	return p
}

// Convolve performs FFT-based convolution of two audio signals
// Deprecated: Use Deconvolve for IR extraction from sweep and recorded signals
func Convolve(signal1, signal2 []float64) []float64 {
	resultLen := len(signal1) + len(signal2) - 1
	fftLen := nextPowerOfTwo(resultLen)

	log.Printf("[convolve] signal1: %d, signal2: %d, resultLen: %d, fftLen: %d", len(signal1), len(signal2), resultLen, fftLen)

	// Zero-pad both signals to fftLen as float64
	x := make([]float64, fftLen)
	copy(x, signal1)
	y := make([]float64, fftLen)
	copy(y, signal2)

	// FFT
	fft := fourier.NewFFT(fftLen)
	xFreq := fft.Coefficients(nil, x) // []complex128
	yFreq := fft.Coefficients(nil, y) // []complex128

	log.Printf("[convolve] xFreq length: %d, yFreq length: %d", len(xFreq), len(yFreq))

	// Multiply in frequency domain
	outFreq := make([]complex128, len(xFreq))
	for i := 0; i < len(xFreq) && i < len(yFreq); i++ {
		outFreq[i] = xFreq[i] * yFreq[i]
	}

	// Inverse FFT (returns []float64)
	outTime := fft.Sequence(nil, outFreq)
	log.Printf("[convolve] outTime length: %d", len(outTime))

	// Defensive: avoid index out of range
	copyLen := resultLen
	if len(outTime) < resultLen {
		log.Printf("[convolve] Warning: outTime length (%d) < resultLen (%d), truncating resultLen", len(outTime), resultLen)
		copyLen = len(outTime)
	}

	result := make([]float64, copyLen)
	copy(result, outTime[:copyLen])

	return result
}

// Deconvolve extracts the impulse response (IR) from a sweep and its recorded version
// by dividing the FFT of the recorded by the FFT of the sweep, with regularization.
func Deconvolve(sweep, recorded []float64) []float64 {
	resultLen := len(recorded)
	fftLen := nextPowerOfTwo(resultLen)

	log.Printf("[deconvolve] sweep: %d, recorded: %d, resultLen: %d, fftLen: %d", len(sweep), len(recorded), resultLen, fftLen)

	// Zero-pad both signals to fftLen
	sweepPadded := make([]float64, fftLen)
	recordedPadded := make([]float64, fftLen)
	copy(sweepPadded, sweep)
	copy(recordedPadded, recorded)

	fft := fourier.NewFFT(fftLen)
	sweepFFT := fft.Coefficients(nil, sweepPadded)
	recordedFFT := fft.Coefficients(nil, recordedPadded)

	log.Printf("[deconvolve] sweepFFT length: %d, recordedFFT length: %d", len(sweepFFT), len(recordedFFT))

	// Regularization epsilon to avoid division by zero
	const epsilon = 1e-10
	minLen := len(sweepFFT)
	if len(recordedFFT) < minLen {
		minLen = len(recordedFFT)
	}
	irFFT := make([]complex128, minLen)
	for i := 0; i < minLen; i++ {
		denom := sweepFFT[i]
		if cmplx.Abs(denom) < epsilon {
			denom = complex(epsilon, 0)
		}
		irFFT[i] = recordedFFT[i] / denom
	}

	irTime := fft.Sequence(nil, irFFT)
	log.Printf("[deconvolve] irTime length: %d", len(irTime))

	// Defensive: avoid index out of range
	copyLen := resultLen
	if len(irTime) < resultLen {
		log.Printf("[deconvolve] Warning: irTime length (%d) < resultLen (%d), truncating resultLen", len(irTime), resultLen)
		copyLen = len(irTime)
	}

	result := make([]float64, copyLen)
	copy(result, irTime[:copyLen])

	return result
}

// Normalize normalizes the audio data to prevent clipping
// targetPeak is the maximum peak value (e.g., 0.95 for 95% of full scale)
func Normalize(data []float64, targetPeak float64) []float64 {
	if len(data) == 0 {
		return data
	}
	// Find the maximum absolute value
	maxVal := 0.0
	for _, sample := range data {
		absVal := math.Abs(sample)
		if absVal > maxVal {
			maxVal = absVal
		}
	}
	if maxVal == 0 {
		return data
	}
	// Calculate normalization factor
	normFactor := targetPeak / maxVal
	// Apply normalization
	normalized := make([]float64, len(data))
	for i, sample := range data {
		normalized[i] = sample * normFactor
	}
	return normalized
}

// TrimSilence removes leading and trailing silence from the audio data
// threshold is the amplitude threshold below which samples are considered silence
func TrimSilence(data []float64, threshold float64) []float64 {
	if len(data) == 0 {
		return data
	}
	// Find start (first non-silent sample)
	start := 0
	for i, sample := range data {
		if math.Abs(sample) > threshold {
			start = i
			break
		}
	}
	// Find end (last non-silent sample)
	end := len(data) - 1
	for i := len(data) - 1; i >= 0; i-- {
		if math.Abs(data[i]) > threshold {
			end = i
			break
		}
	}
	if start >= end {
		return []float64{}
	}
	return data[start : end+1]
}

// TrimOrPad trims or zero-pads the data to the specified number of samples
func TrimOrPad(data []float64, targetSamples int) []float64 {
	if len(data) == targetSamples {
		return data
	} else if len(data) > targetSamples {
		return data[:targetSamples]
	} else {
		out := make([]float64, targetSamples)
		copy(out, data)
		// zero-padding is default
		return out
	}
}

// padOrTruncate ensures a slice is exactly n elements long
func padOrTruncate[T any](in []T, n int) []T {
	if len(in) == n {
		return in
	} else if len(in) > n {
		return in[:n]
	}
	out := make([]T, n)
	copy(out, in)
	return out
}

// Helper to reconstruct full Hermitian spectrum from N/2+1 real FFT
func hermitianSymmetric(fullLen int, halfSpec []complex128) []complex128 {
	full := make([]complex128, fullLen)
	N := fullLen
	// DC
	full[0] = halfSpec[0]
	// Positive freqs
	for k := 1; k < N/2; k++ {
		full[k] = halfSpec[k]
		full[N-k] = cmplx.Conj(halfSpec[k])
	}
	// Nyquist (if even)
	if N%2 == 0 {
		full[N/2] = halfSpec[N/2]
	}
	return full
}

// MinimumPhaseTransform using go-dsp/fft for full complex FFT/IFFT
func MinimumPhaseTransform(ir []float64) []float64 {
	if len(ir) == 0 {
		return ir
	}

	origLen := len(ir)
	fftLen := nextPowerOfTwo(origLen)
	padded := padOrTruncate(ir, fftLen)
	log.Printf("[MPT] fftLen: %d, padded len: %d", fftLen, len(padded))

	// Convert to complex
	signal := make([]complex128, fftLen)
	for i, v := range padded {
		signal[i] = complex(v, 0)
	}

	// FFT
	X := fft.FFT(signal)

	// Log-magnitude spectrum (complex)
	logMag := make([]complex128, fftLen)
	for i, v := range X {
		mag := cmplx.Abs(v)
		if mag < 1e-12 {
			mag = 1e-12
		}
		logMag[i] = complex(math.Log(mag), 0)
	}

	// IFFT to get real cepstrum
	cepstrumC := fft.IFFT(logMag)

	// Minimum phase cepstrum
	minPhaseCep := make([]complex128, fftLen)
	minPhaseCep[0] = cepstrumC[0] // DC
	for i := 1; i < fftLen/2; i++ {
		minPhaseCep[i] = 2 * cepstrumC[i]
	}
	if fftLen%2 == 0 {
		minPhaseCep[fftLen/2] = cepstrumC[fftLen/2] // Nyquist (if even)
	}
	// Negative quefrency: zero (already zero by default)

	// FFT of minimum phase cepstrum
	minPhaseSpec := fft.FFT(minPhaseCep)

	// Exponentiate to get minimum phase spectrum
	for i := range minPhaseSpec {
		minPhaseSpec[i] = cmplx.Exp(minPhaseSpec[i])
	}

	// IFFT to get minimum phase IR
	minPhaseIR := fft.IFFT(minPhaseSpec)

	// Return the real part, original length
	result := make([]float64, origLen)
	for i := range result {
		result[i] = real(minPhaseIR[i])
	}
	return result
}

// realSlice extracts the real part of a []complex128 as []float64
func realSlice(in []complex128) []float64 {
	out := make([]float64, len(in))
	for i, v := range in {
		out[i] = real(v)
	}
	return out
}

// Resample resamples audio data from one sample rate to another using linear interpolation
func Resample(data []float64, fromSampleRate, toSampleRate int) []float64 {
	if fromSampleRate == toSampleRate {
		return data
	}

	// Calculate the resampling ratio
	ratio := float64(toSampleRate) / float64(fromSampleRate)
	newLength := int(float64(len(data)) * ratio)

	if newLength == 0 {
		return []float64{}
	}

	result := make([]float64, newLength)

	for i := 0; i < newLength; i++ {
		// Calculate the position in the original data
		pos := float64(i) / ratio

		// Get the integer and fractional parts
		posInt := int(pos)
		posFrac := pos - float64(posInt)

		// Linear interpolation
		if posInt >= len(data)-1 {
			// Beyond the end of the data, use the last sample
			result[i] = data[len(data)-1]
		} else {
			// Interpolate between two samples
			sample1 := data[posInt]
			sample2 := data[posInt+1]
			result[i] = sample1 + posFrac*(sample2-sample1)
		}
	}

	return result
}

// FadeOutLinear applies a linear fade-out to the last fadeSamples of the data.
// fadeSamples is the number of samples over which to fade to zero.
func FadeOutLinear(data []float64, fadeSamples int) []float64 {
	if fadeSamples <= 0 || len(data) == 0 {
		return data
	}
	if fadeSamples > len(data) {
		fadeSamples = len(data)
	}
	out := make([]float64, len(data))
	copy(out, data)
	start := len(data) - fadeSamples
	for i := start; i < len(data); i++ {
		fade := float64(len(data)-i) / float64(fadeSamples)
		out[i] *= fade
	}
	return out
}

// ApplyLowpassButterworth applies a 2nd-order Butterworth low-pass filter to the data.
// cutoffHz: cutoff frequency in Hz, sampleRate: sample rate in Hz.
func ApplyLowpassButterworth(data []float64, sampleRate int, cutoffHz float64) []float64 {
	if cutoffHz <= 0 || cutoffHz >= float64(sampleRate)/2 {
		return data
	}
	// Biquad coefficients
	w0 := 2 * math.Pi * cutoffHz / float64(sampleRate)
	cosw0 := math.Cos(w0)
	sinw0 := math.Sin(w0)
	Q := 1.0 / math.Sqrt(2) // Butterworth Q
	alpha := sinw0 / (2 * Q)

	b0 := (1 - cosw0) / 2
	b1 := 1 - cosw0
	b2 := (1 - cosw0) / 2
	a0 := 1 + alpha
	a1 := -2 * cosw0
	a2 := 1 - alpha

	// Normalize
	b0 /= a0
	b1 /= a0
	b2 /= a0
	a1 /= a0
	a2 /= a0

	// Apply filter (Direct Form I)
	out := make([]float64, len(data))
	var x1, x2, y1, y2 float64
	for i := 0; i < len(data); i++ {
		x0 := data[i]
		y0 := b0*x0 + b1*x1 + b2*x2 - a1*y1 - a2*y2
		out[i] = y0
		x2 = x1
		x1 = x0
		y2 = y1
		y1 = y0
	}
	return out
}

// ApplyHighpassButterworth applies a 2nd-order Butterworth high-pass filter to the data.
// cutoffHz: cutoff frequency in Hz, sampleRate: sample rate in Hz.
func ApplyHighpassButterworth(data []float64, sampleRate int, cutoffHz float64) []float64 {
	if cutoffHz <= 0 || cutoffHz >= float64(sampleRate)/2 {
		return data
	}
	// Biquad coefficients
	w0 := 2 * math.Pi * cutoffHz / float64(sampleRate)
	cosw0 := math.Cos(w0)
	sinw0 := math.Sin(w0)
	Q := 1.0 / math.Sqrt(2) // Butterworth Q
	alpha := sinw0 / (2 * Q)

	b0 := (1 + cosw0) / 2
	b1 := -(1 + cosw0)
	b2 := (1 + cosw0) / 2
	a0 := 1 + alpha
	a1 := -2 * cosw0
	a2 := 1 - alpha

	// Normalize
	b0 /= a0
	b1 /= a0
	b2 /= a0
	a1 /= a0
	a2 /= a0

	// Apply filter (Direct Form I)
	out := make([]float64, len(data))
	var x1, x2, y1, y2 float64
	for i := 0; i < len(data); i++ {
		x0 := data[i]
		y0 := b0*x0 + b1*x1 + b2*x2 - a1*y1 - a2*y2
		out[i] = y0
		x2 = x1
		x1 = x0
		y2 = y1
		y1 = y0
	}
	return out
}

// CascadeLowcut applies the low-cut (high-pass) filter multiple times for steeper slopes.
// slopeDb: 12, 24, 36, ... (dB/octave)
func CascadeLowcut(data []float64, sampleRate int, cutoffHz float64, slopeDb int) []float64 {
	if slopeDb < 12 {
		slopeDb = 12
	}
	n := slopeDb / 12
	out := data
	for i := 0; i < n; i++ {
		out = ApplyHighpassButterworth(out, sampleRate, cutoffHz)
	}
	return out
}

// CascadeHighcut applies the high-cut (low-pass) filter multiple times for steeper slopes.
// slopeDb: 12, 24, 36, ... (dB/octave)
func CascadeHighcut(data []float64, sampleRate int, cutoffHz float64, slopeDb int) []float64 {
	if slopeDb < 12 {
		slopeDb = 12
	}
	n := slopeDb / 12
	out := data
	for i := 0; i < n; i++ {
		out = ApplyLowpassButterworth(out, sampleRate, cutoffHz)
	}
	return out
}

// min returns the minimum of three integers
func min(a, b, c int) int {
	if a <= b && a <= c {
		return a
	}
	if b <= a && b <= c {
		return b
	}
	return c
}

// DetectPhaseInversion detects if the recorded sweep is phase-inverted compared to the sweep
// by computing the normalized cross-correlation over a range of lags
func DetectPhaseInversion(sweep, recorded []float64) bool {
	if len(sweep) == 0 || len(recorded) == 0 {
		return false
	}

	windowSize := min(len(sweep), len(recorded), 9600) // 100ms at 96kHz
	sweepWindow := sweep[:windowSize]
	recordedWindow := recorded[:windowSize]

	maxLag := 500 // +/- 500 samples (~5ms)
	bestCorr := 0.0
	bestLag := 0

	for lag := -maxLag; lag <= maxLag; lag++ {
		var corr, sweepSum, recordedSum, sweepSumSq, recordedSumSq float64
		count := 0
		for i := 0; i < windowSize; i++ {
			j := i + lag
			if j < 0 || j >= windowSize {
				continue
			}
			corr += sweepWindow[i] * recordedWindow[j]
			sweepSum += sweepWindow[i]
			recordedSum += recordedWindow[j]
			sweepSumSq += sweepWindow[i] * sweepWindow[i]
			recordedSumSq += recordedWindow[j] * recordedWindow[j]
			count++
		}
		if count == 0 {
			continue
		}
		sweepMean := sweepSum / float64(count)
		recordedMean := recordedSum / float64(count)
		sweepVar := sweepSumSq/float64(count) - sweepMean*sweepMean
		recordedVar := recordedSumSq/float64(count) - recordedMean*recordedMean
		if sweepVar <= 0 || recordedVar <= 0 {
			continue
		}
		corrCoeff := (corr/float64(count) - sweepMean*recordedMean) / math.Sqrt(sweepVar*recordedVar)
		if math.Abs(corrCoeff) > math.Abs(bestCorr) {
			bestCorr = corrCoeff
			bestLag = lag
		}
	}

	log.Printf("[deconvolve] Phase cross-correlation: best lag = %d, coeff = %.4f", bestLag, bestCorr)
	return bestCorr < 0.0
}

// InvertPhase inverts the phase of the audio data by negating all samples
func InvertPhase(data []float64) []float64 {
	inverted := make([]float64, len(data))
	for i, sample := range data {
		inverted[i] = -sample
	}
	return inverted
}

// DeconvolveWithPhaseCorrection extracts the impulse response with automatic phase correction
func DeconvolveWithPhaseCorrection(sweep, recorded []float64) []float64 {
	// Detect if recorded sweep is phase-inverted
	isInverted := DetectPhaseInversion(sweep, recorded)

	if isInverted {
		log.Printf("[deconvolve] Detected phase inversion in recorded sweep, correcting...")
		recorded = InvertPhase(recorded)
	} else {
		log.Printf("[deconvolve] Phase alignment verified")
	}

	// Perform normal deconvolution
	return Deconvolve(sweep, recorded)
}