commit a2f510e0628bd0ff36f44fad5ffac99534135fe0 Author: Bastian Bührig Date: Fri Jul 11 08:55:27 2025 +0200 Initial commit. Complete first version of the deconvolver diff --git a/README.md b/README.md new file mode 100644 index 0000000..e4339f2 --- /dev/null +++ b/README.md @@ -0,0 +1,237 @@ +# Valhallir Convoluter + +A CLI tool for processing WAV files to generate impulse responses (IR) from sweep and recorded WAV files, designed for guitar speaker IR creation. + +## Features + +- **Fast FFT-based deconvolution** for accurate IR extraction +- **Automatic input conversion:** Accepts any WAV sample rate, bit depth, or channel count +- **Optional output IR length:** Specify output IR length in milliseconds with --length-ms +- **96kHz 24-bit WAV file support** for high-quality audio processing +- **Multiple output formats** with configurable sample rates and bit depths +- **Minimum Phase Transform (MPT)** option for reduced latency IRs +- **Automatic silence trimming** and normalization +- **Modular design** with separate packages for WAV I/O and convolution +- **Robust error handling** and validation + +## Installation + +```sh +# Clone the repository +git clone +cd valhallir-convoluter + +# Build the application +go build -o valhallir-convoluter +``` + +## Usage + +### Basic IR Generation + +Generate a standard impulse response from sweep and recorded files (any WAV format): + +```sh +./valhallir-convoluter --sweep sweep.wav --recorded recorded.wav --output ir.wav +``` + +### With Minimum Phase Transform + +Generate both regular and minimum phase IRs: + +```sh +./valhallir-convoluter --sweep sweep.wav --recorded recorded.wav --output ir.wav --mpt +``` + +This creates: +- `ir.wav` - Standard impulse response +- `ir_mpt.wav` - Minimum phase transform version + +### Limit Output IR Length + +Trim or zero-pad the output IR to a specific length (in milliseconds): + +```sh +./valhallir-convoluter --sweep sweep.wav --recorded recorded.wav --output ir.wav --length-ms 100 +``` + +This will ensure the output IR is exactly 100 ms long (trimming or zero-padding as needed). + +### Different Output Formats + +Generate IRs in different sample rates and bit depths: + +```sh +# 44kHz 16-bit (CD quality) +./valhallir-convoluter \ + --sweep sweep.wav \ + --recorded recorded.wav \ + --output ir_cd.wav \ + --sample-rate 44100 \ + --bit-depth 16 + +# 48kHz 32-bit (studio quality) +./valhallir-convoluter \ + --sweep sweep.wav \ + --recorded recorded.wav \ + --output ir_studio.wav \ + --sample-rate 48000 \ + --bit-depth 32 \ + --mpt + +# 96kHz 24-bit (high resolution) +./valhallir-convoluter \ + --sweep sweep.wav \ + --recorded recorded.wav \ + --output ir_hires.wav \ + --sample-rate 96000 \ + --bit-depth 24 +``` + +### Advanced Options + +```sh +./valhallir-convoluter \ + --sweep sweep.wav \ + --recorded recorded.wav \ + --output ir.wav \ + --mpt \ + --sample-rate 48000 \ + --bit-depth 24 \ + --normalize 0.95 \ + --trim-threshold 0.001 \ + --length-ms 50 +``` + +## Command Line Options + +| Flag | Description | Default | Required | +|------|-------------|---------|----------| +| `--sweep` | Path to sweep WAV file (any format) | - | Yes | +| `--recorded` | Path to recorded WAV file (any format) | - | Yes | +| `--output` | Path to output IR WAV file | - | Yes | +| `--mpt` | Generate minimum phase transform IR | false | No | +| `--sample-rate` | Output sample rate (44, 48, 88, 96 kHz) | 96000 | No | +| `--bit-depth` | Output bit depth (16, 24, 32 bit) | 24 | No | +| `--normalize` | Normalize output to peak value (0.0-1.0) | 0.95 | No | +| `--trim-threshold` | Silence threshold for trimming (0.0-1.0) | 0.001 | No | +| `--length-ms` | Output IR length in milliseconds (trim or zero-pad) | - | No | + +## File Requirements + +### Input Files +- **Format:** Any uncompressed WAV file +- **Sample Rate:** Any (will be automatically resampled to 96kHz for processing) +- **Bit Depth:** Any (16, 24, 32-bit supported; will be converted to float64 internally) +- **Channels:** Any (mono, stereo, or multi-channel; will be converted to mono by averaging channels) + +### Output Files +- **Format:** WAV files +- **Sample Rate:** 44kHz, 48kHz, 88kHz, or 96kHz (set by `--sample-rate`) +- **Bit Depth:** 16-bit, 24-bit, or 32-bit (set by `--bit-depth`) +- **Channels:** Mono (1 channel) + +## Technical Details + +### Input Conversion +- All input files are automatically converted to mono, 96kHz, float64 for processing +- Stereo or multi-channel files are averaged to mono +- Sample rates are resampled to 96kHz using linear interpolation +- Bit depths are normalized to float64 + +### Output IR Length +- If `--length-ms` is set, the output IR (and MPT IR) will be trimmed or zero-padded to the specified length in milliseconds +- If not set, the full IR is used + +### Deconvolution Process +1. **FFT-based deconvolution** of recorded signal by sweep signal +2. **Regularization** to prevent division by zero +3. **Silence trimming** to remove leading/trailing silence +4. **Normalization** to prevent clipping + +### Minimum Phase Transform +- Uses **real cepstrum method** for accurate minimum phase conversion +- **Reduces latency** by minimizing pre-ringing +- **Maintains frequency response** while optimizing phase characteristics +- **Suitable for real-time applications** like guitar amp modeling + +### Output Format Options +- **Sample Rates:** 44.1kHz (CD), 48kHz (studio), 88.2kHz, 96kHz (high-res) +- **Bit Depths:** 16-bit (CD), 24-bit (studio), 32-bit (high-res) +- **File Sizes:** 16-bit ≈ 50% smaller, 32-bit ≈ 33% larger than 24-bit + +## Dependencies + +- [urfave/cli](https://github.com/urfave/cli) - CLI framework +- [go-audio/wav](https://github.com/go-audio/wav) - WAV file I/O +- [go-dsp/fft](https://github.com/mjibson/go-dsp) - FFT implementation +- [gonum](https://gonum.org/) - Numerical computing + +## Examples + +### Guitar Cabinet IR (CD Quality) +```sh +# Generate IR from guitar cab sweep and recording (any WAV format), 50ms length +./valhallir-convoluter \ + --sweep guitar_cab_sweep.wav \ + --recorded guitar_cab_recorded.wav \ + --output cab_ir_cd.wav \ + --sample-rate 44100 \ + --bit-depth 16 \ + --length-ms 50 \ + --mpt +``` + +### Room Acoustics IR (Studio Quality) +```sh +# Generate room impulse response +./valhallir-convoluter \ + --sweep room_sweep.wav \ + --recorded room_recorded.wav \ + --output room_ir_studio.wav \ + --sample-rate 48000 \ + --bit-depth 24 +``` + +### High-Resolution IR (Mastering) +```sh +# Generate high-resolution IR for mastering +./valhallir-convoluter \ + --sweep mastering_sweep.wav \ + --recorded mastering_recorded.wav \ + --output mastering_ir.wav \ + --sample-rate 96000 \ + --bit-depth 32 \ + --length-ms 100 \ + --mpt +``` + +## Troubleshooting + +### Common Issues + +**"File is not a valid WAV file" error** +- Check that files are uncompressed WAV format +- Avoid MP3, FLAC, or other compressed formats + +**"Invalid sample rate" error (output)** +- Use only supported output sample rates: 44100, 48000, 88200, 96000 +- Check the `--sample-rate` flag value + +**"Invalid bit depth" error (output)** +- Use only supported output bit depths: 16, 24, 32 +- Check the `--bit-depth` flag value + +### Performance +- Processing time depends on file length +- FFT-based deconvolution is much faster than time-domain methods +- Large files (>1GB) may take several minutes +- Higher bit depths require more memory but don't significantly affect processing time + +## License + +[Add your license information here] + +## Contributing + +[Add contribution guidelines here] \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..d330933 --- /dev/null +++ b/go.mod @@ -0,0 +1,15 @@ +module valhallir-convoluter + +go 1.24.1 + +require ( + github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect + github.com/go-audio/audio v1.0.0 // indirect + github.com/go-audio/riff v1.0.0 // indirect + github.com/go-audio/wav v1.1.0 // indirect + github.com/mjibson/go-dsp v0.0.0-20180508042940-11479a337f12 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/urfave/cli/v2 v2.27.7 // indirect + github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect + gonum.org/v1/gonum v0.13.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..ce1603b --- /dev/null +++ b/go.sum @@ -0,0 +1,18 @@ +github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo= +github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4= +github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs= +github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA= +github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498= +github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g= +github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE= +github.com/mjibson/go-dsp v0.0.0-20180508042940-11479a337f12 h1:dd7vnTDfjtwCETZDrRe+GPYNLA1jBtbZeyfyE8eZCyk= +github.com/mjibson/go-dsp v0.0.0-20180508042940-11479a337f12/go.mod h1:i/KKcxEWEO8Yyl11DYafRPKOPVYTrhxiTRigjtEEXZU= +github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU= +github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4= +github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= +github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= +gonum.org/v1/gonum v0.13.0 h1:a0T3bh+7fhRyqeNbiC3qVHYmkiQgit3wnNan/2c0HMM= +gonum.org/v1/gonum v0.13.0/go.mod h1:/WPYRckkfWrhWefxyYTfrTtQR0KH4iyHNuzxqXAKyAU= diff --git a/main.go b/main.go new file mode 100644 index 0000000..27f7d87 --- /dev/null +++ b/main.go @@ -0,0 +1,189 @@ +package main + +import ( + "fmt" + "log" + "os" + + "valhallir-convoluter/pkg/convolve" + "valhallir-convoluter/pkg/wav" + + "github.com/urfave/cli/v2" +) + +func main() { + app := &cli.App{ + Name: "valhallir-convoluter", + Usage: "Convolve sweep and recorded WAV files to create impulse responses", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "sweep", + Usage: "Path to the sweep WAV file (96kHz 24bit)", + Required: true, + }, + &cli.StringFlag{ + Name: "recorded", + Usage: "Path to the recorded WAV file (96kHz 24bit)", + Required: true, + }, + &cli.StringFlag{ + Name: "output", + Usage: "Path to the output IR WAV file (96kHz 24bit)", + Required: true, + }, + &cli.BoolFlag{ + Name: "mpt", + Usage: "Generate minimum phase transform IR in addition to regular IR", + }, + &cli.IntFlag{ + Name: "sample-rate", + Usage: "Output sample rate (44, 48, 88, 96 kHz)", + Value: 96000, + }, + &cli.IntFlag{ + Name: "bit-depth", + Usage: "Output bit depth (16, 24, 32 bit)", + Value: 24, + }, + &cli.Float64Flag{ + Name: "normalize", + Usage: "Normalize output to this peak value (0.0-1.0, default 0.95)", + Value: 0.95, + }, + &cli.Float64Flag{ + Name: "trim-threshold", + Usage: "Silence threshold for trimming (0.0-1.0, default 0.001)", + Value: 0.001, + }, + &cli.Float64Flag{ + Name: "length-ms", + Usage: "Optional: Output IR length in milliseconds (will trim or zero-pad as needed)", + }, + }, + Action: func(c *cli.Context) error { + // Read sweep WAV file + sweepData, err := wav.ReadWAVFile(c.String("sweep")) + if err != nil { + return err + } + + // Read recorded WAV file + recordedData, err := wav.ReadWAVFile(c.String("recorded")) + if err != nil { + return err + } + + log.Printf("Sweep: %d samples, %d channels", len(sweepData.PCMData), sweepData.Channels) + log.Printf("Recorded: %d samples, %d channels", len(recordedData.PCMData), recordedData.Channels) + + log.Println("Performing deconvolution...") + ir := convolve.Deconvolve(sweepData.PCMData, recordedData.PCMData) + log.Printf("Deconvolution result: %d samples", len(ir)) + + log.Println("Trimming silence...") + ir = convolve.TrimSilence(ir, 1e-5) + log.Printf("After trimming: %d samples", len(ir)) + + log.Println("Normalizing...") + ir = convolve.Normalize(ir, c.Float64("normalize")) + log.Printf("Final IR: %d samples", len(ir)) + + // Validate output format options + sampleRate := c.Int("sample-rate") + bitDepth := c.Int("bit-depth") + + // Validate sample rate + validSampleRates := []int{44100, 48000, 88200, 96000} + validSampleRate := false + for _, sr := range validSampleRates { + if sampleRate == sr { + validSampleRate = true + break + } + } + if !validSampleRate { + return fmt.Errorf("invalid sample rate: %d. Valid options: %v", sampleRate, validSampleRates) + } + + // Validate bit depth + validBitDepths := []int{16, 24, 32} + validBitDepth := false + for _, bd := range validBitDepths { + if bitDepth == bd { + validBitDepth = true + break + } + } + if !validBitDepth { + return fmt.Errorf("invalid bit depth: %d. Valid options: %v", bitDepth, validBitDepths) + } + + // Resample IR to target sample rate if different from input (96kHz) + targetSampleRate := sampleRate + if targetSampleRate != 96000 { + log.Printf("Resampling IR from 96kHz to %dHz...", targetSampleRate) + ir = convolve.Resample(ir, 96000, targetSampleRate) + log.Printf("Resampled IR: %d samples", len(ir)) + } + + // Trim or pad IR to requested length if --length-ms is set + lengthMs := c.Float64("length-ms") + if lengthMs > 0 { + targetSamples := int(float64(targetSampleRate) * lengthMs / 1000.0) + log.Printf("Trimming or padding IR to %d samples (%.2f ms)...", targetSamples, lengthMs) + ir = convolve.TrimOrPad(ir, targetSamples) + } + + // Write regular IR + log.Printf("Writing IR to: %s (%dHz, %d-bit WAV)", c.String("output"), sampleRate, bitDepth) + if err := wav.WriteWAVFileWithOptions(c.String("output"), ir, sampleRate, bitDepth); err != nil { + return err + } + + // Generate MPT IR if requested + if c.Bool("mpt") { + log.Println("Generating minimum phase transform...") + // Use the original 96kHz IR for MPT generation + originalIR := convolve.Deconvolve(sweepData.PCMData, recordedData.PCMData) + originalIR = convolve.TrimSilence(originalIR, 1e-5) + mptIR := convolve.MinimumPhaseTransform(originalIR) + mptIR = convolve.Normalize(mptIR, c.Float64("normalize")) + log.Printf("MPT IR: %d samples", len(mptIR)) + + // Resample MPT IR to target sample rate if different from input (96kHz) + if targetSampleRate != 96000 { + log.Printf("Resampling MPT IR from 96kHz to %dHz...", targetSampleRate) + mptIR = convolve.Resample(mptIR, 96000, targetSampleRate) + log.Printf("Resampled MPT IR: %d samples", len(mptIR)) + } + + // Trim or pad MPT IR to requested length if --length-ms is set + if lengthMs > 0 { + targetSamples := int(float64(targetSampleRate) * lengthMs / 1000.0) + log.Printf("Trimming or padding MPT IR to %d samples (%.2f ms)...", targetSamples, lengthMs) + mptIR = convolve.TrimOrPad(mptIR, targetSamples) + } + + // Generate MPT output filename + outputPath := c.String("output") + if len(outputPath) > 4 && outputPath[len(outputPath)-4:] == ".wav" { + outputPath = outputPath[:len(outputPath)-4] + } + mptOutputPath := outputPath + "_mpt.wav" + + log.Printf("Writing MPT IR to: %s (%dHz, %d-bit WAV)", mptOutputPath, sampleRate, bitDepth) + if err := wav.WriteWAVFileWithOptions(mptOutputPath, mptIR, sampleRate, bitDepth); err != nil { + return err + } + log.Println("Minimum phase transform IR generated successfully!") + } + + log.Println("Impulse response generated successfully!") + return nil + }, + } + + if err := app.Run(os.Args); err != nil { + log.Fatal(err) + } +} diff --git a/pkg/convolve/convolve.go b/pkg/convolve/convolve.go new file mode 100644 index 0000000..6b26fea --- /dev/null +++ b/pkg/convolve/convolve.go @@ -0,0 +1,324 @@ +package convolve + +import ( + "log" + "math" + "math/cmplx" + + "github.com/mjibson/go-dsp/fft" + "gonum.org/v1/gonum/dsp/fourier" +) + +// nextPowerOfTwo returns the next power of two >= n +func nextPowerOfTwo(n int) int { + p := 1 + for p < n { + p <<= 1 + } + return p +} + +// Convolve performs FFT-based convolution of two audio signals +// Deprecated: Use Deconvolve for IR extraction from sweep and recorded signals +func Convolve(signal1, signal2 []float64) []float64 { + resultLen := len(signal1) + len(signal2) - 1 + fftLen := nextPowerOfTwo(resultLen) + + log.Printf("[convolve] signal1: %d, signal2: %d, resultLen: %d, fftLen: %d", len(signal1), len(signal2), resultLen, fftLen) + + // Zero-pad both signals to fftLen as float64 + x := make([]float64, fftLen) + copy(x, signal1) + y := make([]float64, fftLen) + copy(y, signal2) + + // FFT + fft := fourier.NewFFT(fftLen) + xFreq := fft.Coefficients(nil, x) // []complex128 + yFreq := fft.Coefficients(nil, y) // []complex128 + + log.Printf("[convolve] xFreq length: %d, yFreq length: %d", len(xFreq), len(yFreq)) + + // Multiply in frequency domain + outFreq := make([]complex128, len(xFreq)) + for i := 0; i < len(xFreq) && i < len(yFreq); i++ { + outFreq[i] = xFreq[i] * yFreq[i] + } + + // Inverse FFT (returns []float64) + outTime := fft.Sequence(nil, outFreq) + log.Printf("[convolve] outTime length: %d", len(outTime)) + + // Defensive: avoid index out of range + copyLen := resultLen + if len(outTime) < resultLen { + log.Printf("[convolve] Warning: outTime length (%d) < resultLen (%d), truncating resultLen", len(outTime), resultLen) + copyLen = len(outTime) + } + + result := make([]float64, copyLen) + copy(result, outTime[:copyLen]) + + return result +} + +// Deconvolve extracts the impulse response (IR) from a sweep and its recorded version +// by dividing the FFT of the recorded by the FFT of the sweep, with regularization. +func Deconvolve(sweep, recorded []float64) []float64 { + resultLen := len(recorded) + fftLen := nextPowerOfTwo(resultLen) + + log.Printf("[deconvolve] sweep: %d, recorded: %d, resultLen: %d, fftLen: %d", len(sweep), len(recorded), resultLen, fftLen) + + // Zero-pad both signals to fftLen + sweepPadded := make([]float64, fftLen) + recordedPadded := make([]float64, fftLen) + copy(sweepPadded, sweep) + copy(recordedPadded, recorded) + + fft := fourier.NewFFT(fftLen) + sweepFFT := fft.Coefficients(nil, sweepPadded) + recordedFFT := fft.Coefficients(nil, recordedPadded) + + log.Printf("[deconvolve] sweepFFT length: %d, recordedFFT length: %d", len(sweepFFT), len(recordedFFT)) + + // Regularization epsilon to avoid division by zero + const epsilon = 1e-10 + minLen := len(sweepFFT) + if len(recordedFFT) < minLen { + minLen = len(recordedFFT) + } + irFFT := make([]complex128, minLen) + for i := 0; i < minLen; i++ { + denom := sweepFFT[i] + if cmplx.Abs(denom) < epsilon { + denom = complex(epsilon, 0) + } + irFFT[i] = recordedFFT[i] / denom + } + + irTime := fft.Sequence(nil, irFFT) + log.Printf("[deconvolve] irTime length: %d", len(irTime)) + + // Defensive: avoid index out of range + copyLen := resultLen + if len(irTime) < resultLen { + log.Printf("[deconvolve] Warning: irTime length (%d) < resultLen (%d), truncating resultLen", len(irTime), resultLen) + copyLen = len(irTime) + } + + result := make([]float64, copyLen) + copy(result, irTime[:copyLen]) + + return result +} + +// Normalize normalizes the audio data to prevent clipping +// targetPeak is the maximum peak value (e.g., 0.95 for 95% of full scale) +func Normalize(data []float64, targetPeak float64) []float64 { + if len(data) == 0 { + return data + } + // Find the maximum absolute value + maxVal := 0.0 + for _, sample := range data { + absVal := math.Abs(sample) + if absVal > maxVal { + maxVal = absVal + } + } + if maxVal == 0 { + return data + } + // Calculate normalization factor + normFactor := targetPeak / maxVal + // Apply normalization + normalized := make([]float64, len(data)) + for i, sample := range data { + normalized[i] = sample * normFactor + } + return normalized +} + +// TrimSilence removes leading and trailing silence from the audio data +// threshold is the amplitude threshold below which samples are considered silence +func TrimSilence(data []float64, threshold float64) []float64 { + if len(data) == 0 { + return data + } + // Find start (first non-silent sample) + start := 0 + for i, sample := range data { + if math.Abs(sample) > threshold { + start = i + break + } + } + // Find end (last non-silent sample) + end := len(data) - 1 + for i := len(data) - 1; i >= 0; i-- { + if math.Abs(data[i]) > threshold { + end = i + break + } + } + if start >= end { + return []float64{} + } + return data[start : end+1] +} + +// TrimOrPad trims or zero-pads the data to the specified number of samples +func TrimOrPad(data []float64, targetSamples int) []float64 { + if len(data) == targetSamples { + return data + } else if len(data) > targetSamples { + return data[:targetSamples] + } else { + out := make([]float64, targetSamples) + copy(out, data) + // zero-padding is default + return out + } +} + +// padOrTruncate ensures a slice is exactly n elements long +func padOrTruncate[T any](in []T, n int) []T { + if len(in) == n { + return in + } else if len(in) > n { + return in[:n] + } + out := make([]T, n) + copy(out, in) + return out +} + +// Helper to reconstruct full Hermitian spectrum from N/2+1 real FFT +func hermitianSymmetric(fullLen int, halfSpec []complex128) []complex128 { + full := make([]complex128, fullLen) + N := fullLen + // DC + full[0] = halfSpec[0] + // Positive freqs + for k := 1; k < N/2; k++ { + full[k] = halfSpec[k] + full[N-k] = cmplx.Conj(halfSpec[k]) + } + // Nyquist (if even) + if N%2 == 0 { + full[N/2] = halfSpec[N/2] + } + return full +} + +// MinimumPhaseTransform using go-dsp/fft for full complex FFT/IFFT +func MinimumPhaseTransform(ir []float64) []float64 { + if len(ir) == 0 { + return ir + } + + origLen := len(ir) + fftLen := nextPowerOfTwo(origLen) + padded := padOrTruncate(ir, fftLen) + log.Printf("[MPT] fftLen: %d, padded len: %d", fftLen, len(padded)) + + // Convert to complex + signal := make([]complex128, fftLen) + for i, v := range padded { + signal[i] = complex(v, 0) + } + + // FFT + X := fft.FFT(signal) + + // Log-magnitude spectrum (complex) + logMag := make([]complex128, fftLen) + for i, v := range X { + mag := cmplx.Abs(v) + if mag < 1e-12 { + mag = 1e-12 + } + logMag[i] = complex(math.Log(mag), 0) + } + + // IFFT to get real cepstrum + cepstrumC := fft.IFFT(logMag) + + // Minimum phase cepstrum + minPhaseCep := make([]complex128, fftLen) + minPhaseCep[0] = cepstrumC[0] // DC + for i := 1; i < fftLen/2; i++ { + minPhaseCep[i] = 2 * cepstrumC[i] + } + if fftLen%2 == 0 { + minPhaseCep[fftLen/2] = cepstrumC[fftLen/2] // Nyquist (if even) + } + // Negative quefrency: zero (already zero by default) + + // FFT of minimum phase cepstrum + minPhaseSpec := fft.FFT(minPhaseCep) + + // Exponentiate to get minimum phase spectrum + for i := range minPhaseSpec { + minPhaseSpec[i] = cmplx.Exp(minPhaseSpec[i]) + } + + // IFFT to get minimum phase IR + minPhaseIR := fft.IFFT(minPhaseSpec) + + // Return the real part, original length + result := make([]float64, origLen) + for i := range result { + result[i] = real(minPhaseIR[i]) + } + return result +} + +// realSlice extracts the real part of a []complex128 as []float64 +func realSlice(in []complex128) []float64 { + out := make([]float64, len(in)) + for i, v := range in { + out[i] = real(v) + } + return out +} + +// Resample resamples audio data from one sample rate to another using linear interpolation +func Resample(data []float64, fromSampleRate, toSampleRate int) []float64 { + if fromSampleRate == toSampleRate { + return data + } + + // Calculate the resampling ratio + ratio := float64(toSampleRate) / float64(fromSampleRate) + newLength := int(float64(len(data)) * ratio) + + if newLength == 0 { + return []float64{} + } + + result := make([]float64, newLength) + + for i := 0; i < newLength; i++ { + // Calculate the position in the original data + pos := float64(i) / ratio + + // Get the integer and fractional parts + posInt := int(pos) + posFrac := pos - float64(posInt) + + // Linear interpolation + if posInt >= len(data)-1 { + // Beyond the end of the data, use the last sample + result[i] = data[len(data)-1] + } else { + // Interpolate between two samples + sample1 := data[posInt] + sample2 := data[posInt+1] + result[i] = sample1 + posFrac*(sample2-sample1) + } + } + + return result +} diff --git a/pkg/wav/reader.go b/pkg/wav/reader.go new file mode 100644 index 0000000..db099a6 --- /dev/null +++ b/pkg/wav/reader.go @@ -0,0 +1,104 @@ +package wav + +import ( + "fmt" + "os" + + "valhallir-convoluter/pkg/convolve" + + "github.com/go-audio/audio" + "github.com/go-audio/wav" +) + +// WAVData represents the PCM data and metadata from a WAV file +type WAVData struct { + SampleRate int + BitDepth int + Channels int + PCMData []float64 +} + +// toMono averages all channels to mono +func toMono(data []float64, channels int) []float64 { + if channels == 1 { + return data + } + mono := make([]float64, len(data)/channels) + for i := 0; i < len(mono); i++ { + sum := 0.0 + for c := 0; c < channels; c++ { + sum += data[i*channels+c] + } + mono[i] = sum / float64(channels) + } + return mono +} + +// ReadWAVFile reads a WAV file and returns its PCM data as float64 (resampled to 96kHz mono) +func ReadWAVFile(filePath string) (*WAVData, error) { + file, err := os.Open(filePath) + if err != nil { + return nil, fmt.Errorf("failed to open file %s: %w", filePath, err) + } + defer file.Close() + + decoder := wav.NewDecoder(file) + if !decoder.IsValidFile() { + return nil, fmt.Errorf("file %s is not a valid WAV file", filePath) + } + + // Read all PCM data + var pcmData []int32 + buf := &audio.IntBuffer{Data: make([]int, 4096), Format: &audio.Format{SampleRate: int(decoder.SampleRate), NumChannels: int(decoder.NumChans)}} + + for { + n, err := decoder.PCMBuffer(buf) + if err != nil { + break + } + if n == 0 { + break + } + + // Convert int samples to float64 + for i := 0; i < n; i++ { + pcmData = append(pcmData, int32(buf.Data[i])) + } + } + + // Convert int32 to float64 (-1.0 to 1.0 range, scale by bit depth) + floatData := make([]float64, len(pcmData)) + var norm float64 + if decoder.BitDepth == 16 { + norm = float64(1 << 15) + } else if decoder.BitDepth == 24 { + norm = float64(1 << 23) + } else if decoder.BitDepth == 32 { + norm = float64(1 << 31) + } else { + norm = float64(1 << 23) // fallback + } + for i, sample := range pcmData { + floatData[i] = float64(sample) / norm + } + + // Convert to mono if needed + channels := int(decoder.NumChans) + if channels > 1 { + floatData = toMono(floatData, channels) + channels = 1 + } + + // Resample to 96kHz if needed + inSampleRate := int(decoder.SampleRate) + if inSampleRate != 96000 { + floatData = convolve.Resample(floatData, inSampleRate, 96000) + } + + return &WAVData{ + SampleRate: 96000, + BitDepth: int(decoder.BitDepth), // original bit depth + Channels: 1, + PCMData: floatData, + }, nil +} diff --git a/pkg/wav/writer.go b/pkg/wav/writer.go new file mode 100644 index 0000000..c6439a3 --- /dev/null +++ b/pkg/wav/writer.go @@ -0,0 +1,90 @@ +package wav + +import ( + "fmt" + "os" + + "github.com/go-audio/audio" + "github.com/go-audio/wav" +) + +// WriteWAVFileWithOptions writes float64 audio data to a WAV file with specified sample rate and bit depth +func WriteWAVFileWithOptions(filePath string, data []float64, sampleRate, bitDepth int) error { + file, err := os.Create(filePath) + if err != nil { + return fmt.Errorf("failed to create file %s: %w", filePath, err) + } + defer file.Close() + + // Convert float64 to appropriate integer format based on bit depth + var intData []int + switch bitDepth { + case 16: + intData = make([]int, len(data)) + for i, sample := range data { + // Clamp to [-1, 1] range + if sample > 1.0 { + sample = 1.0 + } else if sample < -1.0 { + sample = -1.0 + } + // Convert to 16-bit integer + intSample := int(sample * float64(1<<15)) + intData[i] = intSample + } + case 24: + intData = make([]int, len(data)) + for i, sample := range data { + // Clamp to [-1, 1] range + if sample > 1.0 { + sample = 1.0 + } else if sample < -1.0 { + sample = -1.0 + } + // Convert to 24-bit integer + intSample := int(sample * float64(1<<23)) + intData[i] = intSample + } + case 32: + intData = make([]int, len(data)) + for i, sample := range data { + // Clamp to [-1, 1] range + if sample > 1.0 { + sample = 1.0 + } else if sample < -1.0 { + sample = -1.0 + } + // Convert to 32-bit integer + intSample := int(sample * float64(1<<31)) + intData[i] = intSample + } + default: + return fmt.Errorf("unsupported bit depth: %d", bitDepth) + } + + // Create audio buffer + audioBuf := &audio.IntBuffer{ + Format: &audio.Format{ + NumChannels: 1, + SampleRate: sampleRate, + }, + Data: intData, + SourceBitDepth: bitDepth, + } + + // Create WAV encoder + encoder := wav.NewEncoder(file, sampleRate, bitDepth, 1, 1) + defer encoder.Close() + + // Write audio data + if err := encoder.Write(audioBuf); err != nil { + return fmt.Errorf("failed to write audio data: %w", err) + } + + return nil +} + +// WriteWAVFile writes float64 audio data to a 96kHz 24-bit WAV file (default format) +func WriteWAVFile(filePath string, data []float64, sampleRate int) error { + return WriteWAVFileWithOptions(filePath, data, sampleRate, 24) +} diff --git a/testdata/ir_48k_32bit.wav b/testdata/ir_48k_32bit.wav new file mode 100644 index 0000000..f3470de Binary files /dev/null and b/testdata/ir_48k_32bit.wav differ diff --git a/testdata/ir_48k_32bit_mpt.wav b/testdata/ir_48k_32bit_mpt.wav new file mode 100644 index 0000000..b112fe0 Binary files /dev/null and b/testdata/ir_48k_32bit_mpt.wav differ diff --git a/testdata/recorded.wav b/testdata/recorded.wav new file mode 100644 index 0000000..109dde1 Binary files /dev/null and b/testdata/recorded.wav differ diff --git a/testdata/sweep.wav b/testdata/sweep.wav new file mode 100644 index 0000000..ba40367 Binary files /dev/null and b/testdata/sweep.wav differ diff --git a/valhallir-convoluter b/valhallir-convoluter new file mode 100755 index 0000000..d64714d Binary files /dev/null and b/valhallir-convoluter differ