Initial commit. Complete first version of the deconvolver

2025-07-11 08:55:27 +02:00
commit a2f510e062
12 changed files with 977 additions and 0 deletions
--- a/pkg/convolve/convolve.go
+++ b/pkg/convolve/convolve.go
@@ -0,0 +1,324 @@
+package convolve
+
+import (
+	"log"
+	"math"
+	"math/cmplx"
+
+	"github.com/mjibson/go-dsp/fft"
+	"gonum.org/v1/gonum/dsp/fourier"
+)
+
+// nextPowerOfTwo returns the next power of two >= n
+func nextPowerOfTwo(n int) int {
+	p := 1
+	for p < n {
+		p <<= 1
+	}
+	return p
+}
+
+// Convolve performs FFT-based convolution of two audio signals
+// Deprecated: Use Deconvolve for IR extraction from sweep and recorded signals
+func Convolve(signal1, signal2 []float64) []float64 {
+	resultLen := len(signal1) + len(signal2) - 1
+	fftLen := nextPowerOfTwo(resultLen)
+
+	log.Printf("[convolve] signal1: %d, signal2: %d, resultLen: %d, fftLen: %d", len(signal1), len(signal2), resultLen, fftLen)
+
+	// Zero-pad both signals to fftLen as float64
+	x := make([]float64, fftLen)
+	copy(x, signal1)
+	y := make([]float64, fftLen)
+	copy(y, signal2)
+
+	// FFT
+	fft := fourier.NewFFT(fftLen)
+	xFreq := fft.Coefficients(nil, x) // []complex128
+	yFreq := fft.Coefficients(nil, y) // []complex128
+
+	log.Printf("[convolve] xFreq length: %d, yFreq length: %d", len(xFreq), len(yFreq))
+
+	// Multiply in frequency domain
+	outFreq := make([]complex128, len(xFreq))
+	for i := 0; i < len(xFreq) && i < len(yFreq); i++ {
+		outFreq[i] = xFreq[i] * yFreq[i]
+	}
+
+	// Inverse FFT (returns []float64)
+	outTime := fft.Sequence(nil, outFreq)
+	log.Printf("[convolve] outTime length: %d", len(outTime))
+
+	// Defensive: avoid index out of range
+	copyLen := resultLen
+	if len(outTime) < resultLen {
+		log.Printf("[convolve] Warning: outTime length (%d) < resultLen (%d), truncating resultLen", len(outTime), resultLen)
+		copyLen = len(outTime)
+	}
+
+	result := make([]float64, copyLen)
+	copy(result, outTime[:copyLen])
+
+	return result
+}
+
+// Deconvolve extracts the impulse response (IR) from a sweep and its recorded version
+// by dividing the FFT of the recorded by the FFT of the sweep, with regularization.
+func Deconvolve(sweep, recorded []float64) []float64 {
+	resultLen := len(recorded)
+	fftLen := nextPowerOfTwo(resultLen)
+
+	log.Printf("[deconvolve] sweep: %d, recorded: %d, resultLen: %d, fftLen: %d", len(sweep), len(recorded), resultLen, fftLen)
+
+	// Zero-pad both signals to fftLen
+	sweepPadded := make([]float64, fftLen)
+	recordedPadded := make([]float64, fftLen)
+	copy(sweepPadded, sweep)
+	copy(recordedPadded, recorded)
+
+	fft := fourier.NewFFT(fftLen)
+	sweepFFT := fft.Coefficients(nil, sweepPadded)
+	recordedFFT := fft.Coefficients(nil, recordedPadded)
+
+	log.Printf("[deconvolve] sweepFFT length: %d, recordedFFT length: %d", len(sweepFFT), len(recordedFFT))
+
+	// Regularization epsilon to avoid division by zero
+	const epsilon = 1e-10
+	minLen := len(sweepFFT)
+	if len(recordedFFT) < minLen {
+		minLen = len(recordedFFT)
+	}
+	irFFT := make([]complex128, minLen)
+	for i := 0; i < minLen; i++ {
+		denom := sweepFFT[i]
+		if cmplx.Abs(denom) < epsilon {
+			denom = complex(epsilon, 0)
+		}
+		irFFT[i] = recordedFFT[i] / denom
+	}
+
+	irTime := fft.Sequence(nil, irFFT)
+	log.Printf("[deconvolve] irTime length: %d", len(irTime))
+
+	// Defensive: avoid index out of range
+	copyLen := resultLen
+	if len(irTime) < resultLen {
+		log.Printf("[deconvolve] Warning: irTime length (%d) < resultLen (%d), truncating resultLen", len(irTime), resultLen)
+		copyLen = len(irTime)
+	}
+
+	result := make([]float64, copyLen)
+	copy(result, irTime[:copyLen])
+
+	return result
+}
+
+// Normalize normalizes the audio data to prevent clipping
+// targetPeak is the maximum peak value (e.g., 0.95 for 95% of full scale)
+func Normalize(data []float64, targetPeak float64) []float64 {
+	if len(data) == 0 {
+		return data
+	}
+	// Find the maximum absolute value
+	maxVal := 0.0
+	for _, sample := range data {
+		absVal := math.Abs(sample)
+		if absVal > maxVal {
+			maxVal = absVal
+		}
+	}
+	if maxVal == 0 {
+		return data
+	}
+	// Calculate normalization factor
+	normFactor := targetPeak / maxVal
+	// Apply normalization
+	normalized := make([]float64, len(data))
+	for i, sample := range data {
+		normalized[i] = sample * normFactor
+	}
+	return normalized
+}
+
+// TrimSilence removes leading and trailing silence from the audio data
+// threshold is the amplitude threshold below which samples are considered silence
+func TrimSilence(data []float64, threshold float64) []float64 {
+	if len(data) == 0 {
+		return data
+	}
+	// Find start (first non-silent sample)
+	start := 0
+	for i, sample := range data {
+		if math.Abs(sample) > threshold {
+			start = i
+			break
+		}
+	}
+	// Find end (last non-silent sample)
+	end := len(data) - 1
+	for i := len(data) - 1; i >= 0; i-- {
+		if math.Abs(data[i]) > threshold {
+			end = i
+			break
+		}
+	}
+	if start >= end {
+		return []float64{}
+	}
+	return data[start : end+1]
+}
+
+// TrimOrPad trims or zero-pads the data to the specified number of samples
+func TrimOrPad(data []float64, targetSamples int) []float64 {
+	if len(data) == targetSamples {
+		return data
+	} else if len(data) > targetSamples {
+		return data[:targetSamples]
+	} else {
+		out := make([]float64, targetSamples)
+		copy(out, data)
+		// zero-padding is default
+		return out
+	}
+}
+
+// padOrTruncate ensures a slice is exactly n elements long
+func padOrTruncate[T any](in []T, n int) []T {
+	if len(in) == n {
+		return in
+	} else if len(in) > n {
+		return in[:n]
+	}
+	out := make([]T, n)
+	copy(out, in)
+	return out
+}
+
+// Helper to reconstruct full Hermitian spectrum from N/2+1 real FFT
+func hermitianSymmetric(fullLen int, halfSpec []complex128) []complex128 {
+	full := make([]complex128, fullLen)
+	N := fullLen
+	// DC
+	full[0] = halfSpec[0]
+	// Positive freqs
+	for k := 1; k < N/2; k++ {
+		full[k] = halfSpec[k]
+		full[N-k] = cmplx.Conj(halfSpec[k])
+	}
+	// Nyquist (if even)
+	if N%2 == 0 {
+		full[N/2] = halfSpec[N/2]
+	}
+	return full
+}
+
+// MinimumPhaseTransform using go-dsp/fft for full complex FFT/IFFT
+func MinimumPhaseTransform(ir []float64) []float64 {
+	if len(ir) == 0 {
+		return ir
+	}
+
+	origLen := len(ir)
+	fftLen := nextPowerOfTwo(origLen)
+	padded := padOrTruncate(ir, fftLen)
+	log.Printf("[MPT] fftLen: %d, padded len: %d", fftLen, len(padded))
+
+	// Convert to complex
+	signal := make([]complex128, fftLen)
+	for i, v := range padded {
+		signal[i] = complex(v, 0)
+	}
+
+	// FFT
+	X := fft.FFT(signal)
+
+	// Log-magnitude spectrum (complex)
+	logMag := make([]complex128, fftLen)
+	for i, v := range X {
+		mag := cmplx.Abs(v)
+		if mag < 1e-12 {
+			mag = 1e-12
+		}
+		logMag[i] = complex(math.Log(mag), 0)
+	}
+
+	// IFFT to get real cepstrum
+	cepstrumC := fft.IFFT(logMag)
+
+	// Minimum phase cepstrum
+	minPhaseCep := make([]complex128, fftLen)
+	minPhaseCep[0] = cepstrumC[0] // DC
+	for i := 1; i < fftLen/2; i++ {
+		minPhaseCep[i] = 2 * cepstrumC[i]
+	}
+	if fftLen%2 == 0 {
+		minPhaseCep[fftLen/2] = cepstrumC[fftLen/2] // Nyquist (if even)
+	}
+	// Negative quefrency: zero (already zero by default)
+
+	// FFT of minimum phase cepstrum
+	minPhaseSpec := fft.FFT(minPhaseCep)
+
+	// Exponentiate to get minimum phase spectrum
+	for i := range minPhaseSpec {
+		minPhaseSpec[i] = cmplx.Exp(minPhaseSpec[i])
+	}
+
+	// IFFT to get minimum phase IR
+	minPhaseIR := fft.IFFT(minPhaseSpec)
+
+	// Return the real part, original length
+	result := make([]float64, origLen)
+	for i := range result {
+		result[i] = real(minPhaseIR[i])
+	}
+	return result
+}
+
+// realSlice extracts the real part of a []complex128 as []float64
+func realSlice(in []complex128) []float64 {
+	out := make([]float64, len(in))
+	for i, v := range in {
+		out[i] = real(v)
+	}
+	return out
+}
+
+// Resample resamples audio data from one sample rate to another using linear interpolation
+func Resample(data []float64, fromSampleRate, toSampleRate int) []float64 {
+	if fromSampleRate == toSampleRate {
+		return data
+	}
+
+	// Calculate the resampling ratio
+	ratio := float64(toSampleRate) / float64(fromSampleRate)
+	newLength := int(float64(len(data)) * ratio)
+
+	if newLength == 0 {
+		return []float64{}
+	}
+
+	result := make([]float64, newLength)
+
+	for i := 0; i < newLength; i++ {
+		// Calculate the position in the original data
+		pos := float64(i) / ratio
+
+		// Get the integer and fractional parts
+		posInt := int(pos)
+		posFrac := pos - float64(posInt)
+
+		// Linear interpolation
+		if posInt >= len(data)-1 {
+			// Beyond the end of the data, use the last sample
+			result[i] = data[len(data)-1]
+		} else {
+			// Interpolate between two samples
+			sample1 := data[posInt]
+			sample2 := data[posInt+1]
+			result[i] = sample1 + posFrac*(sample2-sample1)
+		}
+	}
+
+	return result
+}
--- a/pkg/wav/reader.go
+++ b/pkg/wav/reader.go
@@ -0,0 +1,104 @@
+package wav
+
+import (
+	"fmt"
+	"os"
+
+	"valhallir-convoluter/pkg/convolve"
+
+	"github.com/go-audio/audio"
+	"github.com/go-audio/wav"
+)
+
+// WAVData represents the PCM data and metadata from a WAV file
+type WAVData struct {
+	SampleRate int
+	BitDepth   int
+	Channels   int
+	PCMData    []float64
+}
+
+// toMono averages all channels to mono
+func toMono(data []float64, channels int) []float64 {
+	if channels == 1 {
+		return data
+	}
+	mono := make([]float64, len(data)/channels)
+	for i := 0; i < len(mono); i++ {
+		sum := 0.0
+		for c := 0; c < channels; c++ {
+			sum += data[i*channels+c]
+		}
+		mono[i] = sum / float64(channels)
+	}
+	return mono
+}
+
+// ReadWAVFile reads a WAV file and returns its PCM data as float64 (resampled to 96kHz mono)
+func ReadWAVFile(filePath string) (*WAVData, error) {
+	file, err := os.Open(filePath)
+	if err != nil {
+		return nil, fmt.Errorf("failed to open file %s: %w", filePath, err)
+	}
+	defer file.Close()
+
+	decoder := wav.NewDecoder(file)
+	if !decoder.IsValidFile() {
+		return nil, fmt.Errorf("file %s is not a valid WAV file", filePath)
+	}
+
+	// Read all PCM data
+	var pcmData []int32
+	buf := &audio.IntBuffer{Data: make([]int, 4096), Format: &audio.Format{SampleRate: int(decoder.SampleRate), NumChannels: int(decoder.NumChans)}}
+
+	for {
+		n, err := decoder.PCMBuffer(buf)
+		if err != nil {
+			break
+		}
+		if n == 0 {
+			break
+		}
+
+		// Convert int samples to float64
+		for i := 0; i < n; i++ {
+			pcmData = append(pcmData, int32(buf.Data[i]))
+		}
+	}
+
+	// Convert int32 to float64 (-1.0 to 1.0 range, scale by bit depth)
+	floatData := make([]float64, len(pcmData))
+	var norm float64
+	if decoder.BitDepth == 16 {
+		norm = float64(1 << 15)
+	} else if decoder.BitDepth == 24 {
+		norm = float64(1 << 23)
+	} else if decoder.BitDepth == 32 {
+		norm = float64(1 << 31)
+	} else {
+		norm = float64(1 << 23) // fallback
+	}
+	for i, sample := range pcmData {
+		floatData[i] = float64(sample) / norm
+	}
+
+	// Convert to mono if needed
+	channels := int(decoder.NumChans)
+	if channels > 1 {
+		floatData = toMono(floatData, channels)
+		channels = 1
+	}
+
+	// Resample to 96kHz if needed
+	inSampleRate := int(decoder.SampleRate)
+	if inSampleRate != 96000 {
+		floatData = convolve.Resample(floatData, inSampleRate, 96000)
+	}
+
+	return &WAVData{
+		SampleRate: 96000,
+		BitDepth:   int(decoder.BitDepth), // original bit depth
+		Channels:   1,
+		PCMData:    floatData,
+	}, nil
+}
--- a/pkg/wav/writer.go
+++ b/pkg/wav/writer.go
@@ -0,0 +1,90 @@
+package wav
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/go-audio/audio"
+	"github.com/go-audio/wav"
+)
+
+// WriteWAVFileWithOptions writes float64 audio data to a WAV file with specified sample rate and bit depth
+func WriteWAVFileWithOptions(filePath string, data []float64, sampleRate, bitDepth int) error {
+	file, err := os.Create(filePath)
+	if err != nil {
+		return fmt.Errorf("failed to create file %s: %w", filePath, err)
+	}
+	defer file.Close()
+
+	// Convert float64 to appropriate integer format based on bit depth
+	var intData []int
+	switch bitDepth {
+	case 16:
+		intData = make([]int, len(data))
+		for i, sample := range data {
+			// Clamp to [-1, 1] range
+			if sample > 1.0 {
+				sample = 1.0
+			} else if sample < -1.0 {
+				sample = -1.0
+			}
+			// Convert to 16-bit integer
+			intSample := int(sample * float64(1<<15))
+			intData[i] = intSample
+		}
+	case 24:
+		intData = make([]int, len(data))
+		for i, sample := range data {
+			// Clamp to [-1, 1] range
+			if sample > 1.0 {
+				sample = 1.0
+			} else if sample < -1.0 {
+				sample = -1.0
+			}
+			// Convert to 24-bit integer
+			intSample := int(sample * float64(1<<23))
+			intData[i] = intSample
+		}
+	case 32:
+		intData = make([]int, len(data))
+		for i, sample := range data {
+			// Clamp to [-1, 1] range
+			if sample > 1.0 {
+				sample = 1.0
+			} else if sample < -1.0 {
+				sample = -1.0
+			}
+			// Convert to 32-bit integer
+			intSample := int(sample * float64(1<<31))
+			intData[i] = intSample
+		}
+	default:
+		return fmt.Errorf("unsupported bit depth: %d", bitDepth)
+	}
+
+	// Create audio buffer
+	audioBuf := &audio.IntBuffer{
+		Format: &audio.Format{
+			NumChannels: 1,
+			SampleRate:  sampleRate,
+		},
+		Data:           intData,
+		SourceBitDepth: bitDepth,
+	}
+
+	// Create WAV encoder
+	encoder := wav.NewEncoder(file, sampleRate, bitDepth, 1, 1)
+	defer encoder.Close()
+
+	// Write audio data
+	if err := encoder.Write(audioBuf); err != nil {
+		return fmt.Errorf("failed to write audio data: %w", err)
+	}
+
+	return nil
+}
+
+// WriteWAVFile writes float64 audio data to a 96kHz 24-bit WAV file (default format)
+func WriteWAVFile(filePath string, data []float64, sampleRate int) error {
+	return WriteWAVFileWithOptions(filePath, data, sampleRate, 24)
+}