From 0410ae5bf74984e347fd8c9af97c22ab690eca0f Mon Sep 17 00:00:00 2001 From: nift4 Date: Thu, 11 Jun 2026 18:11:18 +0200 Subject: [PATCH] Fix some dodgy math I was expanding SilenceSkippingAudioProcessor to cover different sample formats and noticed a couple instances of dodgy math. - SilenceSkippingAudioProcessor was not considering `bytesPerFrame` when calculating `maybeSilenceBufferSize` which in practice led to `minimumSilenceDurationUs` being divided by 4 (channel count 2 and sample format 2). The test `skipInNoisySignalWithShortSilences_skipsNothing` should've caught this but didn't, due to a bug in the test: - In `Pcm16BitAudioBuilder`, `appendFrames` was only generating half of the supposed millisecond amounts (but due to the loop in the caller, this wasn't noticed as the output was still correct size, just with wrong amount of silence/noise between each change). This is because the loop was accepting a frame count but did `i += channelCount`, that is, treating it as sample count. Thus, `skipInNoisySignalWithShortSilences_skipsNothing` only generated 15ms of silence and noise, which is below the threshold of 25ms (which is the intended default 100ms but divided by 4 due to aforementioned bug), and thus passed. - In `modifyVolume`, the volume percentage was calculated from the byte index, which could lead to the left and right channel having a different percentage or prevent reaching 100% of volume in edge cases. - In PcmAudioUtil, 8-bit PCM is treated as signed, but Android defines 8-bit PCM to be unsigned. --- .../media3/exoplayer/audio/PcmAudioUtil.java | 4 +-- .../audio/SilenceSkippingAudioProcessor.java | 17 +++++++++--- .../exoplayer/audio/PcmAudioUtilTest.java | 4 +-- .../SilenceSkippingAudioProcessorTest.java | 27 +++++++++++++------ 4 files changed, 36 insertions(+), 16 deletions(-) diff --git a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/PcmAudioUtil.java b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/PcmAudioUtil.java index 7e43f29f8ca..6fda8676ff9 100644 --- a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/PcmAudioUtil.java +++ b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/PcmAudioUtil.java @@ -71,7 +71,7 @@ public static ByteBuffer rampUpVolume( public static int readAs32BitIntPcm(ByteBuffer buffer, @C.Encoding int pcmEncoding) { switch (pcmEncoding) { case C.ENCODING_PCM_8BIT: - return (buffer.get() & 0xFF) << 24; + return ((buffer.get() & 0xFF) - 128) << 24; case C.ENCODING_PCM_16BIT: return ((buffer.get() & 0xFF) << 16) | ((buffer.get() & 0xFF) << 24); case C.ENCODING_PCM_16BIT_BIG_ENDIAN: @@ -161,7 +161,7 @@ public static void write32BitIntPcm( buffer.put((byte) (pcm32bit >> 24)); return; case C.ENCODING_PCM_8BIT: - buffer.put((byte) (pcm32bit >> 24)); + buffer.put((byte) ((pcm32bit >> 24) + 128)); return; case C.ENCODING_PCM_32BIT_BIG_ENDIAN: buffer.put((byte) (pcm32bit >> 24)); diff --git a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/SilenceSkippingAudioProcessor.java b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/SilenceSkippingAudioProcessor.java index 3a9dc605fe4..20a3e8fbb5e 100644 --- a/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/SilenceSkippingAudioProcessor.java +++ b/libraries/exoplayer/src/main/java/androidx/media3/exoplayer/audio/SilenceSkippingAudioProcessor.java @@ -62,7 +62,7 @@ public final class SilenceSkippingAudioProcessor extends BaseAudioProcessor { * Default minimum duration of audio that must be below {@code silenceThresholdLevel} before * silence starts being trimmed. Specified in microseconds. */ - public static final long DEFAULT_MINIMUM_SILENCE_DURATION_US = 100_000; + public static final long DEFAULT_MINIMUM_SILENCE_DURATION_US = 25_000; /** * Default maximum silence to keep in microseconds. This maximum is applied after {@code @@ -309,7 +309,9 @@ public void onFlush(StreamMetadata streamMetadata) { bytesPerFrame = inputAudioFormat.channelCount * 2; // Divide by 2 to allow the buffer to be split into two bytesPerFrame aligned parts. int maybeSilenceBufferSize = - alignToBytePerFrameBoundary(durationUsToFrames(minimumSilenceDurationUs) / 2) * 2; + alignToBytePerFrameBoundary( + durationUsToFrames(minimumSilenceDurationUs) * bytesPerFrame / 2) + * 2; if (maybeSilenceBuffer.length != maybeSilenceBufferSize) { maybeSilenceBuffer = new byte[maybeSilenceBufferSize]; contiguousOutputBuffer = new byte[maybeSilenceBufferSize]; @@ -647,6 +649,7 @@ private void modifyVolume(byte[] sampleBuffer, int size, @VolumeChangeType int v return; } + int lastFrameIdx = (size / bytesPerFrame) - 1; for (int idx = 0; idx < size; idx += 2) { byte mostSignificantByte = sampleBuffer[idx + 1]; byte leastSignificantByte = sampleBuffer[idx]; @@ -655,10 +658,10 @@ private void modifyVolume(byte[] sampleBuffer, int size, @VolumeChangeType int v int volumeModificationPercentage; if (volumeChangeType == FADE_OUT) { volumeModificationPercentage = - calculateFadeOutPercentage(/* value= */ idx, /* max= */ size - 1); + calculateFadeOutPercentage(/* value= */ idx / bytesPerFrame, /* max= */ lastFrameIdx); } else if (volumeChangeType == FADE_IN) { volumeModificationPercentage = - calculateFadeInPercentage(/* value= */ idx, /* max= */ size - 1); + calculateFadeInPercentage(/* value= */ idx / bytesPerFrame, /* max= */ lastFrameIdx); } else { volumeModificationPercentage = minVolumeToKeepPercentageWhenMuting; } @@ -669,12 +672,18 @@ private void modifyVolume(byte[] sampleBuffer, int size, @VolumeChangeType int v } private int calculateFadeOutPercentage(int value, int max) { + if (max == 0) { + return 0; + } return ((minVolumeToKeepPercentageWhenMuting - 100) * ((AVOID_TRUNCATION_FACTOR * value) / max)) / AVOID_TRUNCATION_FACTOR + 100; } private int calculateFadeInPercentage(int value, int max) { + if (max == 0) { + return 0; + } return (minVolumeToKeepPercentageWhenMuting + ((100 - minVolumeToKeepPercentageWhenMuting) * (AVOID_TRUNCATION_FACTOR * value) / max) / AVOID_TRUNCATION_FACTOR); diff --git a/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/audio/PcmAudioUtilTest.java b/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/audio/PcmAudioUtilTest.java index eb0c092c80f..dab5af10c1e 100644 --- a/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/audio/PcmAudioUtilTest.java +++ b/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/audio/PcmAudioUtilTest.java @@ -33,7 +33,7 @@ public final class PcmAudioUtilTest { @Test public void readAs32BitIntPcm_read8Bit_returnsExpectedValues() { ByteBuffer buffer = ByteBuffer.allocate(5); - buffer.put(hexToBytes("80" + "AB" + "00" + "12" + "7F")); + buffer.put(hexToBytes("00" + "2B" + "80" + "92" + "FF")); buffer.flip(); assertThat(PcmAudioUtil.readAs32BitIntPcm(buffer, C.ENCODING_PCM_8BIT)) @@ -197,7 +197,7 @@ public void write32BitIntPcm_write8Bit_returnsExpectedValues() { PcmAudioUtil.write32BitIntPcm(buffer, Integer.MAX_VALUE, C.ENCODING_PCM_8BIT); buffer.flip(); - assertThat(byteBufferToHex(buffer)).isEqualTo("80" + "AB" + "00" + "12" + "7F"); + assertThat(byteBufferToHex(buffer)).isEqualTo("00" + "2B" + "80" + "92" + "FF"); } @Test diff --git a/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/audio/SilenceSkippingAudioProcessorTest.java b/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/audio/SilenceSkippingAudioProcessorTest.java index d37de3cd098..0edb4f14785 100644 --- a/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/audio/SilenceSkippingAudioProcessorTest.java +++ b/libraries/exoplayer/src/test/java/androidx/media3/exoplayer/audio/SilenceSkippingAudioProcessorTest.java @@ -133,8 +133,8 @@ public void skipInNoisySignalWithShortSilences_skipsNothing() throws Exception { // Given a signal with only noise. InputBufferProvider inputBufferProvider = getInputBufferProviderForAlternatingSilenceAndNoise( - /* silenceDurationMs= */ 30, - TEST_SIGNAL_NOISE_DURATION_MS - 30, + /* silenceDurationMs= */ 24, + TEST_SIGNAL_NOISE_DURATION_MS - 24, TEST_SIGNAL_FRAME_COUNT); // When processing the entire signal. @@ -172,7 +172,7 @@ public void skipInAlternatingTestSignal_hasCorrectOutputAndSkippedFrameCounts() process(silenceSkippingAudioProcessor, inputBufferProvider, INPUT_BUFFER_SIZE); // The output has 50000 frames of noise, plus 50 * 0.2 * 1000 padding (plus rounding errors). - assertThat(totalOutputFrames).isIn(Range.closed(60000L - 500L, 60000L + 500L)); + assertThat(totalOutputFrames).isIn(Range.closed(60000L - 1500L, 60000L + 1500L)); assertThat(silenceSkippingAudioProcessor.getSkippedFrames()) .isEqualTo(TEST_SIGNAL_FRAME_COUNT - totalOutputFrames); } @@ -201,7 +201,7 @@ public void skipInAlternatingTestSignal_hasCorrectOutputAndSkippedFrameCounts() process(silenceSkippingAudioProcessor, inputBufferProvider, INPUT_BUFFER_SIZE); // The output has 50000 frames of noise, plus 50 * 0.2 * 1000 padding (plus rounding errors). - assertThat(totalOutputFrames).isIn(Range.closed(60000L - 500L, 60000L + 500L)); + assertThat(totalOutputFrames).isIn(Range.closed(60000L - 1500L, 60000L + 1500L)); assertThat(silenceSkippingAudioProcessor.getSkippedFrames()) .isEqualTo(TEST_SIGNAL_FRAME_COUNT - totalOutputFrames); } @@ -227,7 +227,7 @@ public void skipWithSmallerInputBufferSize_hasCorrectOutputAndSkippedFrameCounts process(silenceSkippingAudioProcessor, inputBufferProvider, /* inputBufferSize= */ 80); // The output has 50000 frames of noise, plus 50 * 0.2 * 1000 padding (plus rounding errors). - assertThat(totalOutputFrames).isIn(Range.closed(60000L - 500L, 60000L + 500L)); + assertThat(totalOutputFrames).isIn(Range.closed(60000L - 1500L, 60000L + 1500L)); assertThat(silenceSkippingAudioProcessor.getSkippedFrames()) .isEqualTo(TEST_SIGNAL_FRAME_COUNT - totalOutputFrames); } @@ -253,7 +253,7 @@ public void skipWithLargerInputBufferSize_hasCorrectOutputAndSkippedFrameCounts( process(silenceSkippingAudioProcessor, inputBufferProvider, /* inputBufferSize= */ 120); // The output has 50000 frames of noise, plus 50 * 0.2 * 1000 padding (plus rounding errors). - assertThat(totalOutputFrames).isIn(Range.closed(60000L - 500L, 60000L + 500L)); + assertThat(totalOutputFrames).isIn(Range.closed(60000L - 1500L, 60000L + 1500L)); assertThat(silenceSkippingAudioProcessor.getSkippedFrames()) .isEqualTo(TEST_SIGNAL_FRAME_COUNT - totalOutputFrames); } @@ -283,7 +283,7 @@ public void customSilenceRetentionValue_hasCorrectOutputAndSkippedFrameCounts() process(silenceSkippingAudioProcessor, inputBufferProvider, /* inputBufferSize= */ 120); // The output has 50000 frames of noise, plus 50 * 0.05 * 1000 padding (plus rounding errors). - assertThat(totalOutputFrames).isIn(Range.closed(52500L - 500L, 52500L + 500L)); + assertThat(totalOutputFrames).isIn(Range.closed(52500L - 1500L, 52500L + 1500L)); assertThat(silenceSkippingAudioProcessor.getSkippedFrames()) .isEqualTo(TEST_SIGNAL_FRAME_COUNT - totalOutputFrames); } @@ -347,16 +347,21 @@ private static InputBufferProvider getInputBufferProviderForAlternatingSilenceAn int silenceDurationMs, int noiseDurationMs, int totalFrameCount) { int sampleRate = AUDIO_FORMAT.sampleRate; int channelCount = AUDIO_FORMAT.channelCount; + int lastSize = 0; Pcm16BitAudioBuilder audioBuilder = new Pcm16BitAudioBuilder(channelCount, totalFrameCount); while (!audioBuilder.isFull()) { int silenceDurationFrames = (silenceDurationMs * sampleRate) / 1000; // Append stereo silence. audioBuilder.appendFrames( /* count= */ silenceDurationFrames, /* channelLevels...= */ (short) 0, (short) 0); + lastSize += silenceDurationFrames * channelCount * 2; + assertThat(audioBuilder.getSize()).isEqualTo(lastSize); int noiseDurationFrames = (noiseDurationMs * sampleRate) / 1000; // Append stereo noise. audioBuilder.appendFrames( /* count= */ noiseDurationFrames, /* channelLevels...= */ MAX_VALUE, MAX_VALUE); + lastSize += noiseDurationFrames * channelCount * 2; + assertThat(audioBuilder.getSize()).isEqualTo(lastSize); } return new InputBufferProvider(audioBuilder.build()); } @@ -410,13 +415,19 @@ public Pcm16BitAudioBuilder(int channelCount, int frameCount) { public void appendFrames(int count, short... channelLevels) { checkState(!built); checkState(channelLevels.length == channelCount); - for (int i = 0; i < count; i += channelCount) { + for (int i = 0; i < count; i++) { for (short channelLevel : channelLevels) { buffer.put(channelLevel); } } } + /** Returns how many bytes the buffer contains so far. */ + public int getSize() { + checkState(!built); + return buffer.position() * 2; + } + /** Returns whether the buffer is full. */ public boolean isFull() { checkState(!built);