Initial commit. Complete first version of the deconvolver
This commit is contained in:
324
pkg/convolve/convolve.go
Normal file
324
pkg/convolve/convolve.go
Normal file
@@ -0,0 +1,324 @@
|
||||
package convolve
|
||||
|
||||
import (
|
||||
"log"
|
||||
"math"
|
||||
"math/cmplx"
|
||||
|
||||
"github.com/mjibson/go-dsp/fft"
|
||||
"gonum.org/v1/gonum/dsp/fourier"
|
||||
)
|
||||
|
||||
// nextPowerOfTwo returns the next power of two >= n
|
||||
func nextPowerOfTwo(n int) int {
|
||||
p := 1
|
||||
for p < n {
|
||||
p <<= 1
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// Convolve performs FFT-based convolution of two audio signals
|
||||
// Deprecated: Use Deconvolve for IR extraction from sweep and recorded signals
|
||||
func Convolve(signal1, signal2 []float64) []float64 {
|
||||
resultLen := len(signal1) + len(signal2) - 1
|
||||
fftLen := nextPowerOfTwo(resultLen)
|
||||
|
||||
log.Printf("[convolve] signal1: %d, signal2: %d, resultLen: %d, fftLen: %d", len(signal1), len(signal2), resultLen, fftLen)
|
||||
|
||||
// Zero-pad both signals to fftLen as float64
|
||||
x := make([]float64, fftLen)
|
||||
copy(x, signal1)
|
||||
y := make([]float64, fftLen)
|
||||
copy(y, signal2)
|
||||
|
||||
// FFT
|
||||
fft := fourier.NewFFT(fftLen)
|
||||
xFreq := fft.Coefficients(nil, x) // []complex128
|
||||
yFreq := fft.Coefficients(nil, y) // []complex128
|
||||
|
||||
log.Printf("[convolve] xFreq length: %d, yFreq length: %d", len(xFreq), len(yFreq))
|
||||
|
||||
// Multiply in frequency domain
|
||||
outFreq := make([]complex128, len(xFreq))
|
||||
for i := 0; i < len(xFreq) && i < len(yFreq); i++ {
|
||||
outFreq[i] = xFreq[i] * yFreq[i]
|
||||
}
|
||||
|
||||
// Inverse FFT (returns []float64)
|
||||
outTime := fft.Sequence(nil, outFreq)
|
||||
log.Printf("[convolve] outTime length: %d", len(outTime))
|
||||
|
||||
// Defensive: avoid index out of range
|
||||
copyLen := resultLen
|
||||
if len(outTime) < resultLen {
|
||||
log.Printf("[convolve] Warning: outTime length (%d) < resultLen (%d), truncating resultLen", len(outTime), resultLen)
|
||||
copyLen = len(outTime)
|
||||
}
|
||||
|
||||
result := make([]float64, copyLen)
|
||||
copy(result, outTime[:copyLen])
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// Deconvolve extracts the impulse response (IR) from a sweep and its recorded version
|
||||
// by dividing the FFT of the recorded by the FFT of the sweep, with regularization.
|
||||
func Deconvolve(sweep, recorded []float64) []float64 {
|
||||
resultLen := len(recorded)
|
||||
fftLen := nextPowerOfTwo(resultLen)
|
||||
|
||||
log.Printf("[deconvolve] sweep: %d, recorded: %d, resultLen: %d, fftLen: %d", len(sweep), len(recorded), resultLen, fftLen)
|
||||
|
||||
// Zero-pad both signals to fftLen
|
||||
sweepPadded := make([]float64, fftLen)
|
||||
recordedPadded := make([]float64, fftLen)
|
||||
copy(sweepPadded, sweep)
|
||||
copy(recordedPadded, recorded)
|
||||
|
||||
fft := fourier.NewFFT(fftLen)
|
||||
sweepFFT := fft.Coefficients(nil, sweepPadded)
|
||||
recordedFFT := fft.Coefficients(nil, recordedPadded)
|
||||
|
||||
log.Printf("[deconvolve] sweepFFT length: %d, recordedFFT length: %d", len(sweepFFT), len(recordedFFT))
|
||||
|
||||
// Regularization epsilon to avoid division by zero
|
||||
const epsilon = 1e-10
|
||||
minLen := len(sweepFFT)
|
||||
if len(recordedFFT) < minLen {
|
||||
minLen = len(recordedFFT)
|
||||
}
|
||||
irFFT := make([]complex128, minLen)
|
||||
for i := 0; i < minLen; i++ {
|
||||
denom := sweepFFT[i]
|
||||
if cmplx.Abs(denom) < epsilon {
|
||||
denom = complex(epsilon, 0)
|
||||
}
|
||||
irFFT[i] = recordedFFT[i] / denom
|
||||
}
|
||||
|
||||
irTime := fft.Sequence(nil, irFFT)
|
||||
log.Printf("[deconvolve] irTime length: %d", len(irTime))
|
||||
|
||||
// Defensive: avoid index out of range
|
||||
copyLen := resultLen
|
||||
if len(irTime) < resultLen {
|
||||
log.Printf("[deconvolve] Warning: irTime length (%d) < resultLen (%d), truncating resultLen", len(irTime), resultLen)
|
||||
copyLen = len(irTime)
|
||||
}
|
||||
|
||||
result := make([]float64, copyLen)
|
||||
copy(result, irTime[:copyLen])
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// Normalize normalizes the audio data to prevent clipping
|
||||
// targetPeak is the maximum peak value (e.g., 0.95 for 95% of full scale)
|
||||
func Normalize(data []float64, targetPeak float64) []float64 {
|
||||
if len(data) == 0 {
|
||||
return data
|
||||
}
|
||||
// Find the maximum absolute value
|
||||
maxVal := 0.0
|
||||
for _, sample := range data {
|
||||
absVal := math.Abs(sample)
|
||||
if absVal > maxVal {
|
||||
maxVal = absVal
|
||||
}
|
||||
}
|
||||
if maxVal == 0 {
|
||||
return data
|
||||
}
|
||||
// Calculate normalization factor
|
||||
normFactor := targetPeak / maxVal
|
||||
// Apply normalization
|
||||
normalized := make([]float64, len(data))
|
||||
for i, sample := range data {
|
||||
normalized[i] = sample * normFactor
|
||||
}
|
||||
return normalized
|
||||
}
|
||||
|
||||
// TrimSilence removes leading and trailing silence from the audio data
|
||||
// threshold is the amplitude threshold below which samples are considered silence
|
||||
func TrimSilence(data []float64, threshold float64) []float64 {
|
||||
if len(data) == 0 {
|
||||
return data
|
||||
}
|
||||
// Find start (first non-silent sample)
|
||||
start := 0
|
||||
for i, sample := range data {
|
||||
if math.Abs(sample) > threshold {
|
||||
start = i
|
||||
break
|
||||
}
|
||||
}
|
||||
// Find end (last non-silent sample)
|
||||
end := len(data) - 1
|
||||
for i := len(data) - 1; i >= 0; i-- {
|
||||
if math.Abs(data[i]) > threshold {
|
||||
end = i
|
||||
break
|
||||
}
|
||||
}
|
||||
if start >= end {
|
||||
return []float64{}
|
||||
}
|
||||
return data[start : end+1]
|
||||
}
|
||||
|
||||
// TrimOrPad trims or zero-pads the data to the specified number of samples
|
||||
func TrimOrPad(data []float64, targetSamples int) []float64 {
|
||||
if len(data) == targetSamples {
|
||||
return data
|
||||
} else if len(data) > targetSamples {
|
||||
return data[:targetSamples]
|
||||
} else {
|
||||
out := make([]float64, targetSamples)
|
||||
copy(out, data)
|
||||
// zero-padding is default
|
||||
return out
|
||||
}
|
||||
}
|
||||
|
||||
// padOrTruncate ensures a slice is exactly n elements long
|
||||
func padOrTruncate[T any](in []T, n int) []T {
|
||||
if len(in) == n {
|
||||
return in
|
||||
} else if len(in) > n {
|
||||
return in[:n]
|
||||
}
|
||||
out := make([]T, n)
|
||||
copy(out, in)
|
||||
return out
|
||||
}
|
||||
|
||||
// Helper to reconstruct full Hermitian spectrum from N/2+1 real FFT
|
||||
func hermitianSymmetric(fullLen int, halfSpec []complex128) []complex128 {
|
||||
full := make([]complex128, fullLen)
|
||||
N := fullLen
|
||||
// DC
|
||||
full[0] = halfSpec[0]
|
||||
// Positive freqs
|
||||
for k := 1; k < N/2; k++ {
|
||||
full[k] = halfSpec[k]
|
||||
full[N-k] = cmplx.Conj(halfSpec[k])
|
||||
}
|
||||
// Nyquist (if even)
|
||||
if N%2 == 0 {
|
||||
full[N/2] = halfSpec[N/2]
|
||||
}
|
||||
return full
|
||||
}
|
||||
|
||||
// MinimumPhaseTransform using go-dsp/fft for full complex FFT/IFFT
|
||||
func MinimumPhaseTransform(ir []float64) []float64 {
|
||||
if len(ir) == 0 {
|
||||
return ir
|
||||
}
|
||||
|
||||
origLen := len(ir)
|
||||
fftLen := nextPowerOfTwo(origLen)
|
||||
padded := padOrTruncate(ir, fftLen)
|
||||
log.Printf("[MPT] fftLen: %d, padded len: %d", fftLen, len(padded))
|
||||
|
||||
// Convert to complex
|
||||
signal := make([]complex128, fftLen)
|
||||
for i, v := range padded {
|
||||
signal[i] = complex(v, 0)
|
||||
}
|
||||
|
||||
// FFT
|
||||
X := fft.FFT(signal)
|
||||
|
||||
// Log-magnitude spectrum (complex)
|
||||
logMag := make([]complex128, fftLen)
|
||||
for i, v := range X {
|
||||
mag := cmplx.Abs(v)
|
||||
if mag < 1e-12 {
|
||||
mag = 1e-12
|
||||
}
|
||||
logMag[i] = complex(math.Log(mag), 0)
|
||||
}
|
||||
|
||||
// IFFT to get real cepstrum
|
||||
cepstrumC := fft.IFFT(logMag)
|
||||
|
||||
// Minimum phase cepstrum
|
||||
minPhaseCep := make([]complex128, fftLen)
|
||||
minPhaseCep[0] = cepstrumC[0] // DC
|
||||
for i := 1; i < fftLen/2; i++ {
|
||||
minPhaseCep[i] = 2 * cepstrumC[i]
|
||||
}
|
||||
if fftLen%2 == 0 {
|
||||
minPhaseCep[fftLen/2] = cepstrumC[fftLen/2] // Nyquist (if even)
|
||||
}
|
||||
// Negative quefrency: zero (already zero by default)
|
||||
|
||||
// FFT of minimum phase cepstrum
|
||||
minPhaseSpec := fft.FFT(minPhaseCep)
|
||||
|
||||
// Exponentiate to get minimum phase spectrum
|
||||
for i := range minPhaseSpec {
|
||||
minPhaseSpec[i] = cmplx.Exp(minPhaseSpec[i])
|
||||
}
|
||||
|
||||
// IFFT to get minimum phase IR
|
||||
minPhaseIR := fft.IFFT(minPhaseSpec)
|
||||
|
||||
// Return the real part, original length
|
||||
result := make([]float64, origLen)
|
||||
for i := range result {
|
||||
result[i] = real(minPhaseIR[i])
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// realSlice extracts the real part of a []complex128 as []float64
|
||||
func realSlice(in []complex128) []float64 {
|
||||
out := make([]float64, len(in))
|
||||
for i, v := range in {
|
||||
out[i] = real(v)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Resample resamples audio data from one sample rate to another using linear interpolation
|
||||
func Resample(data []float64, fromSampleRate, toSampleRate int) []float64 {
|
||||
if fromSampleRate == toSampleRate {
|
||||
return data
|
||||
}
|
||||
|
||||
// Calculate the resampling ratio
|
||||
ratio := float64(toSampleRate) / float64(fromSampleRate)
|
||||
newLength := int(float64(len(data)) * ratio)
|
||||
|
||||
if newLength == 0 {
|
||||
return []float64{}
|
||||
}
|
||||
|
||||
result := make([]float64, newLength)
|
||||
|
||||
for i := 0; i < newLength; i++ {
|
||||
// Calculate the position in the original data
|
||||
pos := float64(i) / ratio
|
||||
|
||||
// Get the integer and fractional parts
|
||||
posInt := int(pos)
|
||||
posFrac := pos - float64(posInt)
|
||||
|
||||
// Linear interpolation
|
||||
if posInt >= len(data)-1 {
|
||||
// Beyond the end of the data, use the last sample
|
||||
result[i] = data[len(data)-1]
|
||||
} else {
|
||||
// Interpolate between two samples
|
||||
sample1 := data[posInt]
|
||||
sample2 := data[posInt+1]
|
||||
result[i] = sample1 + posFrac*(sample2-sample1)
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
104
pkg/wav/reader.go
Normal file
104
pkg/wav/reader.go
Normal file
@@ -0,0 +1,104 @@
|
||||
package wav
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"valhallir-convoluter/pkg/convolve"
|
||||
|
||||
"github.com/go-audio/audio"
|
||||
"github.com/go-audio/wav"
|
||||
)
|
||||
|
||||
// WAVData represents the PCM data and metadata from a WAV file
|
||||
type WAVData struct {
|
||||
SampleRate int
|
||||
BitDepth int
|
||||
Channels int
|
||||
PCMData []float64
|
||||
}
|
||||
|
||||
// toMono averages all channels to mono
|
||||
func toMono(data []float64, channels int) []float64 {
|
||||
if channels == 1 {
|
||||
return data
|
||||
}
|
||||
mono := make([]float64, len(data)/channels)
|
||||
for i := 0; i < len(mono); i++ {
|
||||
sum := 0.0
|
||||
for c := 0; c < channels; c++ {
|
||||
sum += data[i*channels+c]
|
||||
}
|
||||
mono[i] = sum / float64(channels)
|
||||
}
|
||||
return mono
|
||||
}
|
||||
|
||||
// ReadWAVFile reads a WAV file and returns its PCM data as float64 (resampled to 96kHz mono)
|
||||
func ReadWAVFile(filePath string) (*WAVData, error) {
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open file %s: %w", filePath, err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
decoder := wav.NewDecoder(file)
|
||||
if !decoder.IsValidFile() {
|
||||
return nil, fmt.Errorf("file %s is not a valid WAV file", filePath)
|
||||
}
|
||||
|
||||
// Read all PCM data
|
||||
var pcmData []int32
|
||||
buf := &audio.IntBuffer{Data: make([]int, 4096), Format: &audio.Format{SampleRate: int(decoder.SampleRate), NumChannels: int(decoder.NumChans)}}
|
||||
|
||||
for {
|
||||
n, err := decoder.PCMBuffer(buf)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
if n == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
// Convert int samples to float64
|
||||
for i := 0; i < n; i++ {
|
||||
pcmData = append(pcmData, int32(buf.Data[i]))
|
||||
}
|
||||
}
|
||||
|
||||
// Convert int32 to float64 (-1.0 to 1.0 range, scale by bit depth)
|
||||
floatData := make([]float64, len(pcmData))
|
||||
var norm float64
|
||||
if decoder.BitDepth == 16 {
|
||||
norm = float64(1 << 15)
|
||||
} else if decoder.BitDepth == 24 {
|
||||
norm = float64(1 << 23)
|
||||
} else if decoder.BitDepth == 32 {
|
||||
norm = float64(1 << 31)
|
||||
} else {
|
||||
norm = float64(1 << 23) // fallback
|
||||
}
|
||||
for i, sample := range pcmData {
|
||||
floatData[i] = float64(sample) / norm
|
||||
}
|
||||
|
||||
// Convert to mono if needed
|
||||
channels := int(decoder.NumChans)
|
||||
if channels > 1 {
|
||||
floatData = toMono(floatData, channels)
|
||||
channels = 1
|
||||
}
|
||||
|
||||
// Resample to 96kHz if needed
|
||||
inSampleRate := int(decoder.SampleRate)
|
||||
if inSampleRate != 96000 {
|
||||
floatData = convolve.Resample(floatData, inSampleRate, 96000)
|
||||
}
|
||||
|
||||
return &WAVData{
|
||||
SampleRate: 96000,
|
||||
BitDepth: int(decoder.BitDepth), // original bit depth
|
||||
Channels: 1,
|
||||
PCMData: floatData,
|
||||
}, nil
|
||||
}
|
||||
90
pkg/wav/writer.go
Normal file
90
pkg/wav/writer.go
Normal file
@@ -0,0 +1,90 @@
|
||||
package wav
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/go-audio/audio"
|
||||
"github.com/go-audio/wav"
|
||||
)
|
||||
|
||||
// WriteWAVFileWithOptions writes float64 audio data to a WAV file with specified sample rate and bit depth
|
||||
func WriteWAVFileWithOptions(filePath string, data []float64, sampleRate, bitDepth int) error {
|
||||
file, err := os.Create(filePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create file %s: %w", filePath, err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Convert float64 to appropriate integer format based on bit depth
|
||||
var intData []int
|
||||
switch bitDepth {
|
||||
case 16:
|
||||
intData = make([]int, len(data))
|
||||
for i, sample := range data {
|
||||
// Clamp to [-1, 1] range
|
||||
if sample > 1.0 {
|
||||
sample = 1.0
|
||||
} else if sample < -1.0 {
|
||||
sample = -1.0
|
||||
}
|
||||
// Convert to 16-bit integer
|
||||
intSample := int(sample * float64(1<<15))
|
||||
intData[i] = intSample
|
||||
}
|
||||
case 24:
|
||||
intData = make([]int, len(data))
|
||||
for i, sample := range data {
|
||||
// Clamp to [-1, 1] range
|
||||
if sample > 1.0 {
|
||||
sample = 1.0
|
||||
} else if sample < -1.0 {
|
||||
sample = -1.0
|
||||
}
|
||||
// Convert to 24-bit integer
|
||||
intSample := int(sample * float64(1<<23))
|
||||
intData[i] = intSample
|
||||
}
|
||||
case 32:
|
||||
intData = make([]int, len(data))
|
||||
for i, sample := range data {
|
||||
// Clamp to [-1, 1] range
|
||||
if sample > 1.0 {
|
||||
sample = 1.0
|
||||
} else if sample < -1.0 {
|
||||
sample = -1.0
|
||||
}
|
||||
// Convert to 32-bit integer
|
||||
intSample := int(sample * float64(1<<31))
|
||||
intData[i] = intSample
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unsupported bit depth: %d", bitDepth)
|
||||
}
|
||||
|
||||
// Create audio buffer
|
||||
audioBuf := &audio.IntBuffer{
|
||||
Format: &audio.Format{
|
||||
NumChannels: 1,
|
||||
SampleRate: sampleRate,
|
||||
},
|
||||
Data: intData,
|
||||
SourceBitDepth: bitDepth,
|
||||
}
|
||||
|
||||
// Create WAV encoder
|
||||
encoder := wav.NewEncoder(file, sampleRate, bitDepth, 1, 1)
|
||||
defer encoder.Close()
|
||||
|
||||
// Write audio data
|
||||
if err := encoder.Write(audioBuf); err != nil {
|
||||
return fmt.Errorf("failed to write audio data: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteWAVFile writes float64 audio data to a 96kHz 24-bit WAV file (default format)
|
||||
func WriteWAVFile(filePath string, data []float64, sampleRate int) error {
|
||||
return WriteWAVFileWithOptions(filePath, data, sampleRate, 24)
|
||||
}
|
||||
Reference in New Issue
Block a user