From b65e416a137fa82c1d8aedcd6f8f6783ec15d4e9 Mon Sep 17 00:00:00 2001 From: Roderick van Domburg Date: Mon, 22 Dec 2025 21:49:03 +0100 Subject: [PATCH 1/2] fix: audio distortion at queue source boundaries Fixes incorrect sample_rate() and channels() metadata returned by SourcesQueueOutput when transitioning between sources with different formats or from an empty queue. This caused audio distortion at boundaries where metadata changes occurred. Changes: - Clarified Source::current_span_len() contract: returns total span size, Some(0) when exhausted - Fixed SamplesBuffer to return Some(total_size) or Some(0) when exhausted (was None) - Fixed SamplesBuffer::size_hint() to return remaining samples - Updated SourcesQueueOutput to peek next source metadata when current is exhausted - Added Source::is_exhausted() helper method for cleaner exhaustion checks throughout codebase - Implemented ExactSizeIterator for SamplesBuffer - Improved SkipDuration precision to avoid rounding errors This supersedes PR #812 with a cleaner implementation following the proper current_span_len() contract. Enabled previously ignored queue::tests::basic test to prevent regressions. Fixes #811 Co-authored-by: 0----0 <9070490+0----0@users.noreply.github.com> --- CHANGELOG.md | 6 ++++++ src/buffer.rs | 11 ++++++++-- src/queue.rs | 45 +++++++++++++++++++++++++++++------------ src/source/from_iter.rs | 6 ++---- src/source/mod.rs | 16 ++++++++++++--- src/source/repeat.rs | 21 ++++++++++--------- src/source/skip.rs | 16 +++++++++------ 7 files changed, 84 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 48573720..b7bfc2d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - `Chirp` now implements `Iterator::size_hint` and `ExactSizeIterator`. +- `SamplesBuffer` now implements `ExactSizeIterator`. +- Added `Source::is_exhausted()` helper method to check if a source has no more samples. - Added `Red` noise generator that is more practical than `Brownian` noise. - Added `std_dev()` to `WhiteUniform` and `WhiteTriangular`. - Added a macro `nz!` which facilitates creating NonZero's for `SampleRate` and @@ -31,6 +33,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `Chirp::next` now returns `None` when the total duration has been reached, and will work correctly for a number of samples greater than 2^24. - `PeriodicAccess` is slightly more accurate for 44.1 kHz sample rate families. +- Fixed audio distortion when queueing sources with different sample rates/channel counts or transitioning from empty queue. +- Fixed `SamplesBuffer` to correctly report exhaustion and remaining samples. +- Improved precision in `SkipDuration` to avoid off-by-a-few-samples errors. ### Changed - `output_to_wav` renamed to `wav_to_file` and now takes ownership of the `Source`. @@ -38,6 +43,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `Gaussian` noise generator has standard deviation of 0.6 for perceptual equivalence. - `Velvet` noise generator takes density in Hz as `usize` instead of `f32`. - Upgrade `cpal` to v0.17. +- Clarified `Source::current_span_len()` contract documentation. ## Version [0.21.1] (2025-07-14) diff --git a/src/buffer.rs b/src/buffer.rs index 305f6b1f..923a8336 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -73,7 +73,11 @@ impl SamplesBuffer { impl Source for SamplesBuffer { #[inline] fn current_span_len(&self) -> Option { - None + if self.pos >= self.data.len() { + Some(0) + } else { + Some(self.data.len()) + } } #[inline] @@ -126,10 +130,13 @@ impl Iterator for SamplesBuffer { #[inline] fn size_hint(&self) -> (usize, Option) { - (self.data.len(), Some(self.data.len())) + let remaining = self.data.len() - self.pos; + (remaining, Some(remaining)) } } +impl ExactSizeIterator for SamplesBuffer {} + #[cfg(test)] mod tests { use crate::buffer::SamplesBuffer; diff --git a/src/queue.rs b/src/queue.rs index 495606b4..82dcdd63 100644 --- a/src/queue.rs +++ b/src/queue.rs @@ -113,6 +113,8 @@ pub struct SourcesQueueOutput { } const THRESHOLD: usize = 512; +const SILENCE_SAMPLE_RATE: SampleRate = nz!(44100); +const SILENCE_CHANNELS: ChannelCount = nz!(1); impl Source for SourcesQueueOutput { #[inline] @@ -129,15 +131,13 @@ impl Source for SourcesQueueOutput { // constant. // Try the current `current_span_len`. - if let Some(val) = self.current.current_span_len() { - if val != 0 { - return Some(val); - } else if self.input.keep_alive_if_empty.load(Ordering::Acquire) - && self.input.next_sounds.lock().unwrap().is_empty() - { - // The next source will be a filler silence which will have the length of `THRESHOLD` - return Some(THRESHOLD); - } + if !self.current.is_exhausted() { + return self.current.current_span_len(); + } else if self.input.keep_alive_if_empty.load(Ordering::Acquire) + && self.input.next_sounds.lock().unwrap().is_empty() + { + // The next source will be a filler silence which will have the length of `THRESHOLD` + return Some(THRESHOLD); } // Try the size hint. @@ -154,12 +154,28 @@ impl Source for SourcesQueueOutput { #[inline] fn channels(&self) -> ChannelCount { - self.current.channels() + // When current source is exhausted, peek at the next source's metadata + if !self.current.is_exhausted() { + self.current.channels() + } else if let Some((next, _)) = self.input.next_sounds.lock().unwrap().first() { + next.channels() + } else { + // Queue is empty - return silence metadata + SILENCE_CHANNELS + } } #[inline] fn sample_rate(&self) -> SampleRate { - self.current.sample_rate() + // When current source is exhausted, peek at the next source's metadata + if !self.current.is_exhausted() { + self.current.sample_rate() + } else if let Some((next, _)) = self.input.next_sounds.lock().unwrap().first() { + next.sample_rate() + } else { + // Queue is empty - return silence metadata + SILENCE_SAMPLE_RATE + } } #[inline] @@ -221,7 +237,11 @@ impl SourcesQueueOutput { let mut next = self.input.next_sounds.lock().unwrap(); if next.is_empty() { - let silence = Box::new(Zero::new_samples(nz!(1), nz!(44100), THRESHOLD)) as Box<_>; + let silence = Box::new(Zero::new_samples( + SILENCE_CHANNELS, + SILENCE_SAMPLE_RATE, + THRESHOLD, + )) as Box<_>; if self.input.keep_alive_if_empty.load(Ordering::Acquire) { // Play a short silence in order to avoid spinlocking. (silence, None) @@ -247,7 +267,6 @@ mod tests { use crate::source::Source; #[test] - #[ignore] // FIXME: samples rate and channel not updated immediately after transition fn basic() { let (tx, mut rx) = queue::queue(false); diff --git a/src/source/from_iter.rs b/src/source/from_iter.rs index 59845758..c8c14062 100644 --- a/src/source/from_iter.rs +++ b/src/source/from_iter.rs @@ -92,10 +92,8 @@ where // Try the current `current_span_len`. if let Some(src) = &self.current_source { - if let Some(val) = src.current_span_len() { - if val != 0 { - return Some(val); - } + if !src.is_exhausted() { + return src.current_span_len(); } } diff --git a/src/source/mod.rs b/src/source/mod.rs index c7d469f8..bf222b92 100644 --- a/src/source/mod.rs +++ b/src/source/mod.rs @@ -168,14 +168,24 @@ pub use self::noise::{Pink, WhiteUniform}; /// channels can potentially change. /// pub trait Source: Iterator { - /// Returns the number of samples before the current span ends. `None` means "infinite" or - /// "until the sound ends". - /// Should never return 0 unless there's no more data. + /// Returns the number of samples before the current span ends. + /// + /// `None` means "infinite" or "until the sound ends". Sources that return `Some(x)` should + /// return `Some(0)` if and only if when there's no more data. /// /// After the engine has finished reading the specified number of samples, it will check /// whether the value of `channels()` and/or `sample_rate()` have changed. + /// + /// Note: This returns the total span size, not the remaining samples. Use `Iterator::size_hint` + /// to determine how many samples remain in the iterator. fn current_span_len(&self) -> Option; + /// Returns true if the source is exhausted (has no more samples available). + #[inline] + fn is_exhausted(&self) -> bool { + self.current_span_len() == Some(0) + } + /// Returns the number of channels. Channels are always interleaved. /// Should never be Zero fn channels(&self) -> ChannelCount; diff --git a/src/source/repeat.rs b/src/source/repeat.rs index 12a6b947..ef29968e 100644 --- a/src/source/repeat.rs +++ b/src/source/repeat.rs @@ -56,25 +56,28 @@ where { #[inline] fn current_span_len(&self) -> Option { - match self.inner.current_span_len() { - Some(0) => self.next.current_span_len(), - a => a, + if self.inner.is_exhausted() { + self.next.current_span_len() + } else { + self.inner.current_span_len() } } #[inline] fn channels(&self) -> ChannelCount { - match self.inner.current_span_len() { - Some(0) => self.next.channels(), - _ => self.inner.channels(), + if self.inner.is_exhausted() { + self.next.channels() + } else { + self.inner.channels() } } #[inline] fn sample_rate(&self) -> SampleRate { - match self.inner.current_span_len() { - Some(0) => self.next.sample_rate(), - _ => self.inner.sample_rate(), + if self.inner.is_exhausted() { + self.next.sample_rate() + } else { + self.inner.sample_rate() } } diff --git a/src/source/skip.rs b/src/source/skip.rs index 36f8d210..d3c43857 100644 --- a/src/source/skip.rs +++ b/src/source/skip.rs @@ -39,18 +39,22 @@ where return; } - let ns_per_sample: u128 = - NS_PER_SECOND / input.sample_rate().get() as u128 / input.channels().get() as u128; + let sample_rate = input.sample_rate().get() as u128; + let channels = input.channels().get() as u128; + + let samples_per_channel = duration.as_nanos() * sample_rate / NS_PER_SECOND; + let samples_to_skip: u128 = samples_per_channel * channels; // Check if we need to skip only part of the current span. - if span_len as u128 * ns_per_sample > duration.as_nanos() { - skip_samples(input, (duration.as_nanos() / ns_per_sample) as usize); + if span_len as u128 > samples_to_skip { + skip_samples(input, samples_to_skip as usize); return; } + duration -= Duration::from_nanos( + (NS_PER_SECOND * span_len as u128 / channels / sample_rate) as u64, + ); skip_samples(input, span_len); - - duration -= Duration::from_nanos((span_len * ns_per_sample as usize) as u64); } } From 1514256f523b5bea4f4243d26c922e31fd5052ce Mon Sep 17 00:00:00 2001 From: Roderick van Domburg Date: Mon, 22 Dec 2025 23:10:02 +0100 Subject: [PATCH 2/2] fix: ensure frame-aligned spans in queue to prevent channel misalignment Fixes two related issues with frame alignment in the queue: 1. Frame alignment for non-power-of-2 channel counts: The queue used a fixed threshold of 512 samples which could cause channel misalignment when the channel count doesn't evenly divide 512 (e.g., 6 channels: 512 % 6 = 2 sample offset). Replaced with dynamic threshold function that rounds to nearest frame boundary. 2. Mid-span ending detection: Sources that end before their promised current_span_len() would cause the queue to immediately transition to the next source, potentially mid-frame. Added sample tracking and silence padding to complete frames before transitioning. Changes: - Replace THRESHOLD constant with threshold(channels) helper function that ensures frame-aligned span lengths - Add sample tracking to detect mid-span source endings - Inject silence padding to complete incomplete frames - Add span_ending_mid_frame test to verify padding behavior Supersedes #684 --- CHANGELOG.md | 2 + src/queue.rs | 163 +++++++++++++++++++++++++++++++++++++--- src/source/mod.rs | 6 ++ tests/channel_volume.rs | 63 ++++++++++++++++ 4 files changed, 225 insertions(+), 9 deletions(-) create mode 100644 tests/channel_volume.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index b7bfc2d6..11eefdb8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fixed audio distortion when queueing sources with different sample rates/channel counts or transitioning from empty queue. - Fixed `SamplesBuffer` to correctly report exhaustion and remaining samples. - Improved precision in `SkipDuration` to avoid off-by-a-few-samples errors. +- Fixed channel misalignment in queue with non-power-of-2 channel counts (e.g., 6 channels) by ensuring frame-aligned span lengths. +- Fixed channel misalignment when sources end before their promised span length by padding with silence to complete frames. ### Changed - `output_to_wav` renamed to `wav_to_file` and now takes ownership of the `Source`. diff --git a/src/queue.rs b/src/queue.rs index 82dcdd63..7f869586 100644 --- a/src/queue.rs +++ b/src/queue.rs @@ -35,6 +35,8 @@ pub fn queue(keep_alive_if_empty: bool) -> (Arc, SourcesQueue current: Box::new(Empty::new()) as Box<_>, signal_after_end: None, input: input.clone(), + samples_consumed_in_span: 0, + padding_samples_remaining: 0, }; (input, output) @@ -110,12 +112,29 @@ pub struct SourcesQueueOutput { // The next sounds. input: Arc, + + // Track samples consumed in the current span to detect mid-span endings. + samples_consumed_in_span: usize, + + // When a source ends mid-frame, this counts how many silence samples to inject + // to complete the frame before transitioning to the next source. + padding_samples_remaining: usize, } -const THRESHOLD: usize = 512; const SILENCE_SAMPLE_RATE: SampleRate = nz!(44100); const SILENCE_CHANNELS: ChannelCount = nz!(1); +/// Returns a threshold span length that ensures frame alignment. +/// +/// Spans must end on frame boundaries (multiples of channel count) to prevent +/// channel misalignment. Returns ~512 samples rounded to the nearest frame. +#[inline] +fn threshold(channels: ChannelCount) -> usize { + const BASE_SAMPLES: usize = 512; + let ch = channels.get() as usize; + BASE_SAMPLES.div_ceil(ch) * ch +} + impl Source for SourcesQueueOutput { #[inline] fn current_span_len(&self) -> Option { @@ -136,8 +155,8 @@ impl Source for SourcesQueueOutput { } else if self.input.keep_alive_if_empty.load(Ordering::Acquire) && self.input.next_sounds.lock().unwrap().is_empty() { - // The next source will be a filler silence which will have the length of `THRESHOLD` - return Some(THRESHOLD); + // The next source will be a filler silence which will have a frame-aligned length + return Some(threshold(self.current.channels())); } // Try the size hint. @@ -148,8 +167,8 @@ impl Source for SourcesQueueOutput { return Some(lower_bound); } - // Otherwise we use the constant value. - Some(THRESHOLD) + // Otherwise we use a frame-aligned threshold value. + Some(threshold(self.current.channels())) } #[inline] @@ -204,13 +223,33 @@ impl Iterator for SourcesQueueOutput { #[inline] fn next(&mut self) -> Option { loop { + // If we're padding to complete a frame, return silence. + if self.padding_samples_remaining > 0 { + self.padding_samples_remaining -= 1; + return Some(0.0); + } + // Basic situation that will happen most of the time. if let Some(sample) = self.current.next() { + self.samples_consumed_in_span += 1; return Some(sample); } - // Since `self.current` has finished, we need to pick the next sound. + // Source ended - check if we ended mid-frame and need padding. + let channels = self.current.channels().get() as usize; + let incomplete_frame_samples = self.samples_consumed_in_span % channels; + if incomplete_frame_samples > 0 { + // We're mid-frame - need to pad with silence to complete it. + self.padding_samples_remaining = channels - incomplete_frame_samples; + // Reset counter now since we're transitioning to a new span. + self.samples_consumed_in_span = 0; + // Continue loop - next iteration will inject silence. + continue; + } + + // Reset counter and move to next sound. // In order to avoid inlining this expensive operation, the code is in another function. + self.samples_consumed_in_span = 0; if self.go_next().is_err() { return None; } @@ -240,7 +279,7 @@ impl SourcesQueueOutput { let silence = Box::new(Zero::new_samples( SILENCE_CHANNELS, SILENCE_SAMPLE_RATE, - THRESHOLD, + threshold(SILENCE_CHANNELS), )) as Box<_>; if self.input.keep_alive_if_empty.load(Ordering::Acquire) { // Play a short silence in order to avoid spinlocking. @@ -263,8 +302,9 @@ impl SourcesQueueOutput { mod tests { use crate::buffer::SamplesBuffer; use crate::math::nz; - use crate::queue; - use crate::source::Source; + use crate::source::{SeekError, Source}; + use crate::{queue, ChannelCount, Sample, SampleRate}; + use std::time::Duration; #[test] fn basic() { @@ -340,4 +380,109 @@ mod tests { assert_eq!(rx.next(), Some(10.0)); assert_eq!(rx.next(), Some(-10.0)); } + + #[test] + fn span_ending_mid_frame() { + let mut test_source1 = TestSource::new(&[0.1, 0.2, 0.1, 0.2, 0.1]) + .with_channels(nz!(2)) + .with_false_span_len(Some(6)); + let mut test_source2 = TestSource::new(&[0.3, 0.4, 0.3, 0.4]).with_channels(nz!(2)); + + let (controls, mut source) = queue::queue(true); + controls.append(test_source1.clone()); + controls.append(test_source2.clone()); + + assert_eq!(source.next(), test_source1.next()); + assert_eq!(source.next(), test_source1.next()); + assert_eq!(source.next(), test_source1.next()); + assert_eq!(source.next(), test_source1.next()); + assert_eq!(source.next(), test_source1.next()); + assert_eq!(None, test_source1.next()); + + // Source promised span of 6 but only delivered 5 samples. + // With 2 channels, that's 2.5 frames. Queue should pad with silence. + assert_eq!( + source.next(), + Some(0.0), + "Expected silence to complete frame" + ); + + assert_eq!(source.next(), test_source2.next()); + assert_eq!(source.next(), test_source2.next()); + assert_eq!(source.next(), test_source2.next()); + assert_eq!(source.next(), test_source2.next()); + } + + /// Test helper source that allows setting false span length to simulate + /// sources that end before their promised span length. + #[derive(Debug, Clone)] + struct TestSource { + samples: Vec, + pos: usize, + channels: ChannelCount, + sample_rate: SampleRate, + total_span_len: Option, + } + + impl TestSource { + fn new(samples: &[Sample]) -> Self { + let samples = samples.to_vec(); + Self { + total_span_len: Some(samples.len()), + pos: 0, + channels: nz!(1), + sample_rate: nz!(44100), + samples, + } + } + + fn with_channels(mut self, count: ChannelCount) -> Self { + self.channels = count; + self + } + + fn with_false_span_len(mut self, total_len: Option) -> Self { + self.total_span_len = total_len; + self + } + } + + impl Iterator for TestSource { + type Item = Sample; + + fn next(&mut self) -> Option { + let res = self.samples.get(self.pos).copied(); + self.pos += 1; + res + } + + fn size_hint(&self) -> (usize, Option) { + let remaining = self.samples.len().saturating_sub(self.pos); + (remaining, Some(remaining)) + } + } + + impl Source for TestSource { + fn current_span_len(&self) -> Option { + self.total_span_len + } + + fn channels(&self) -> ChannelCount { + self.channels + } + + fn sample_rate(&self) -> SampleRate { + self.sample_rate + } + + fn total_duration(&self) -> Option { + None + } + + fn try_seek(&mut self, _: Duration) -> Result<(), SeekError> { + Err(SeekError::NotSupported { + underlying_source: std::any::type_name::(), + }) + } + } } diff --git a/src/source/mod.rs b/src/source/mod.rs index bf222b92..5dc79813 100644 --- a/src/source/mod.rs +++ b/src/source/mod.rs @@ -176,6 +176,12 @@ pub trait Source: Iterator { /// After the engine has finished reading the specified number of samples, it will check /// whether the value of `channels()` and/or `sample_rate()` have changed. /// + /// # Frame Alignment + /// + /// Span lengths must be multiples of the channel count to ensure spans end on frame + /// boundaries. A "frame" is one sample for each channel. Returning a span length + /// that is not a multiple of `channels()` will cause channel misalignment issues. + /// /// Note: This returns the total span size, not the remaining samples. Use `Iterator::size_hint` /// to determine how many samples remain in the iterator. fn current_span_len(&self) -> Option; diff --git a/tests/channel_volume.rs b/tests/channel_volume.rs new file mode 100644 index 00000000..e04ebeff --- /dev/null +++ b/tests/channel_volume.rs @@ -0,0 +1,63 @@ +use std::fs; +use std::io::BufReader; + +use rodio::source::ChannelVolume; +use rodio::{queue, Decoder, Sample, Source}; + +fn create_6_channel_source() -> ChannelVolume>> { + let file = fs::File::open("assets/music.mp3").unwrap(); + let decoder = Decoder::try_from(file).unwrap(); + assert_eq!(decoder.channels().get(), 2); + let channel_volume = ChannelVolume::new(decoder, vec![1.0, 1.0, 0.0, 0.0, 0.0, 0.0]); + assert_eq!(channel_volume.channels().get(), 6); + channel_volume +} + +#[test] +fn channel_volume_without_queue() { + let channel_volume = create_6_channel_source(); + assert_output_only_on_first_two_channels(channel_volume, 6); +} + +#[test] +fn channel_volume_with_queue() { + let channel_volume = create_6_channel_source(); + let (controls, queue) = queue::queue(false); + controls.append(channel_volume); + assert_output_only_on_first_two_channels(queue, 6); +} + +fn assert_output_only_on_first_two_channels( + mut source: impl Source, + channels: usize, +) { + let mut frame_number = 0; + let mut samples_in_frame = Vec::new(); + + while let Some(sample) = source.next() { + samples_in_frame.push(sample); + + if samples_in_frame.len() == channels { + // We have a complete frame - verify channels 2+ are zero + for (ch, &sample) in samples_in_frame[2..].iter().enumerate() { + assert_eq!( + sample, + 0.0, + "frame {} channel {} had nonzero value (should be zero)", + frame_number, + ch + 2 + ); + } + + samples_in_frame.clear(); + frame_number += 1; + } + } + + assert_eq!( + samples_in_frame.len(), + 0, + "Source ended with partial frame {} (should end on frame boundary)", + samples_in_frame.len() + ); +}