Fix A/V desync when resuming HLS with video transcode + audio copy (#16580)
Some checks failed
CodeQL / Analyze (csharp) (push) Has been cancelled
Format / format-check (push) Has been cancelled
Tests / run-tests (macos-latest) (push) Has been cancelled
Tests / run-tests (ubuntu-latest) (push) Has been cancelled
Tests / run-tests (windows-latest) (push) Has been cancelled
OpenAPI Publish / OpenAPI - Publish Artifact (push) Has been cancelled
OpenAPI Publish / OpenAPI - Publish Unstable Spec (push) Has been cancelled
OpenAPI Publish / OpenAPI - Publish Stable Spec (push) Has been cancelled
Project Automation / Project board (push) Has been cancelled
Merge Conflict Labeler / Labeling (push) Has been cancelled
Stale PR Check / Check PRs with merge conflicts (push) Has been cancelled
Stale Issue Labeler / Check for stale issues (push) Has been cancelled

Fix A/V desync when resuming HLS with video transcode + audio copy
This commit is contained in:
Piotr Niełacny
2026-05-28 19:26:28 +02:00
committed by GitHub
parent ef9468e857
commit 8d544e4842
4 changed files with 153 additions and 28 deletions

View File

@@ -86,6 +86,7 @@ namespace MediaBrowser.Controller.MediaEncoding
private readonly Version _minFFmpegQsvVppScaleModeOption = new Version(6, 0);
private readonly Version _minFFmpegRkmppHevcDecDoviRpu = new Version(7, 1, 1);
private readonly Version _minFFmpegReadrateCatchupOption = new Version(8, 0);
private readonly Version _minFFmpegNoiseBsfDrop = new Version(5, 0);
private static readonly string[] _videoProfilesH264 =
[
@@ -1547,20 +1548,61 @@ namespace MediaBrowser.Controller.MediaEncoding
public string GetAudioBitStreamArguments(EncodingJobInfo state, string segmentContainer, string mediaSourceContainer)
{
var bitStreamArgs = string.Empty;
var filters = new List<string>();
var noiseFilter = GetCopiedAudioTrimBsf(state);
if (!string.IsNullOrEmpty(noiseFilter))
{
filters.Add(noiseFilter);
}
var segmentFormat = GetSegmentFileExtension(segmentContainer).TrimStart('.');
// Apply aac_adtstoasc bitstream filter when media source is in mpegts.
if (string.Equals(segmentFormat, "mp4", StringComparison.OrdinalIgnoreCase)
&& (string.Equals(mediaSourceContainer, "ts", StringComparison.OrdinalIgnoreCase)
|| string.Equals(mediaSourceContainer, "aac", StringComparison.OrdinalIgnoreCase)
|| string.Equals(mediaSourceContainer, "hls", StringComparison.OrdinalIgnoreCase)))
|| string.Equals(mediaSourceContainer, "hls", StringComparison.OrdinalIgnoreCase))
&& IsAAC(state.AudioStream))
{
bitStreamArgs = GetBitStreamArgs(state, MediaStreamType.Audio);
bitStreamArgs = string.IsNullOrEmpty(bitStreamArgs) ? string.Empty : " " + bitStreamArgs;
filters.Add("aac_adtstoasc");
}
return bitStreamArgs;
return filters.Count == 0
? string.Empty
: " -bsf:a " + string.Join(',', filters);
}
// When video is transcoded, accurate_seek (the default) trims video to the
// exact seek point via decoder-side frame discard. But stream-copied audio
// bypasses the decoder, so it starts from the nearest keyframe — potentially
// seconds before the target. Use the noise bsf to drop copied audio packets
// before the seek target, achieving the same trim precision without
// re-encoding. The noise bsf's drop= parameter requires ffmpeg >= 5.0.
// Important: make sure not to use it with wtv because it breaks seeking
private string GetCopiedAudioTrimBsf(EncodingJobInfo state)
{
if (state.TranscodingType is not TranscodingJobType.Hls
|| !state.IsVideoRequest
|| IsCopyCodec(state.OutputVideoCodec)
|| !IsCopyCodec(state.OutputAudioCodec)
|| string.Equals(state.InputContainer, "wtv", StringComparison.OrdinalIgnoreCase)
|| _mediaEncoder.EncoderVersion < _minFFmpegNoiseBsfDrop)
{
return null;
}
var startTicks = state.BaseRequest.StartTimeTicks ?? 0;
if (startTicks <= 0)
{
return null;
}
var seekSeconds = startTicks / (double)TimeSpan.TicksPerSecond;
return string.Format(
CultureInfo.InvariantCulture,
"noise=drop='lt(pts*tb\\,{0:F3})'",
seekSeconds);
}
public static string GetSegmentFileExtension(string segmentContainer)
@@ -3006,23 +3048,6 @@ namespace MediaBrowser.Controller.MediaEncoding
}
seekParam += string.Format(CultureInfo.InvariantCulture, "-ss {0}", _mediaEncoder.GetTimeParameter(seekTick));
if (state.IsVideoRequest)
{
// If we are remuxing, then the copied stream cannot be seeked accurately (it will seek to the nearest
// keyframe). If we are using fMP4, then force all other streams to use the same inaccurate seeking to
// avoid A/V sync issues which cause playback issues on some devices.
// When remuxing video, the segment start times correspond to key frames in the source stream, so this
// option shouldn't change the seeked point that much.
// Important: make sure not to use it with wtv because it breaks seeking
if (state.TranscodingType is TranscodingJobType.Hls
&& string.Equals(segmentContainer, "mp4", StringComparison.OrdinalIgnoreCase)
&& (IsCopyCodec(state.OutputVideoCodec) || IsCopyCodec(state.OutputAudioCodec))
&& !string.Equals(state.InputContainer, "wtv", StringComparison.OrdinalIgnoreCase))
{
seekParam += " -noaccurate_seek";
}
}
}
return seekParam;

View File

@@ -61,7 +61,7 @@ public class EncodingOptions
SubtitleExtractionTimeoutMinutes = 30;
AllowOnDemandMetadataBasedKeyframeExtractionForExtensions = ["mkv"];
HardwareDecodingCodecs = ["h264", "vc1"];
HlsAudioSeekStrategy = HlsAudioSeekStrategy.DisableAccurateSeek;
HlsAudioSeekStrategy = HlsAudioSeekStrategy.TrimCopiedAudio;
}
/// <summary>
@@ -307,6 +307,6 @@ public class EncodingOptions
/// <summary>
/// Gets or sets the method used for audio seeking in HLS.
/// </summary>
[DefaultValue(HlsAudioSeekStrategy.DisableAccurateSeek)]
[DefaultValue(HlsAudioSeekStrategy.TrimCopiedAudio)]
public HlsAudioSeekStrategy HlsAudioSeekStrategy { get; set; }
}

View File

@@ -7,11 +7,12 @@ namespace MediaBrowser.Model.Configuration
public enum HlsAudioSeekStrategy
{
/// <summary>
/// If the video stream is transcoded and the audio stream is copied,
/// seek the video stream to the same keyframe as the audio stream. The
/// resulting timestamps in the output streams may be inaccurate.
/// When video is transcoded and audio is copied, use a bitstream filter
/// to drop copied audio packets before the seek point, aligning them
/// with the accurately-seeked video. Timestamps are accurate and audio
/// remains stream-copied (no re-encoding overhead).
/// </summary>
DisableAccurateSeek = 0,
TrimCopiedAudio = 0,
/// <summary>
/// Prevent audio streams from being copied if the video stream is transcoded.

View File

@@ -0,0 +1,99 @@
using System;
using System.Globalization;
using MediaBrowser.Common.Configuration;
using MediaBrowser.Controller.IO;
using MediaBrowser.Controller.MediaEncoding;
using MediaBrowser.Model.Dlna;
using MediaBrowser.Model.Entities;
using Microsoft.Extensions.Configuration;
using Moq;
using Xunit;
using IConfigurationManager = MediaBrowser.Common.Configuration.IConfigurationManager;
namespace Jellyfin.Controller.Tests.MediaEncoding
{
public class EncodingHelperAudioBitStreamTests
{
private const string BothFilters = " -bsf:a noise=drop='lt(pts*tb\\,63.063)',aac_adtstoasc";
private const string NoiseOnly = " -bsf:a noise=drop='lt(pts*tb\\,63.063)'";
private const string AdtsOnly = " -bsf:a aac_adtstoasc";
private const long DefaultSeekTicks = 630_630_000L;
private const string DefaultFfmpegVersion = "5.0";
private static EncodingHelper CreateHelper(string ffmpegVersion)
{
var mediaEncoder = new Mock<IMediaEncoder>();
mediaEncoder
.Setup(e => e.GetTimeParameter(It.IsAny<long>()))
.Returns((long ticks) => TimeSpan.FromTicks(ticks).ToString(@"hh\:mm\:ss\.fff", CultureInfo.InvariantCulture));
mediaEncoder
.SetupGet(e => e.EncoderVersion)
.Returns(Version.Parse(ffmpegVersion));
return new EncodingHelper(
Mock.Of<IApplicationPaths>(),
mediaEncoder.Object,
Mock.Of<ISubtitleEncoder>(),
Mock.Of<IConfiguration>(),
Mock.Of<IConfigurationManager>(),
Mock.Of<IPathManager>());
}
private static EncodingJobInfo CreateState(
TranscodingJobType jobType,
string outputVideoCodec,
string outputAudioCodec,
string audioStreamCodec,
string inputContainer,
long startTimeTicks)
{
return new EncodingJobInfo(jobType)
{
IsVideoRequest = true,
OutputVideoCodec = outputVideoCodec,
OutputAudioCodec = outputAudioCodec,
InputContainer = inputContainer,
RunTimeTicks = TimeSpan.FromMinutes(10).Ticks,
AudioStream = new MediaStream
{
Type = MediaStreamType.Audio,
Codec = audioStreamCodec
},
BaseRequest = new BaseEncodingJobOptions
{
StartTimeTicks = startTimeTicks
}
};
}
[Theory]
[InlineData(TranscodingJobType.Hls, "libx264", "copy", "aac", "ts", DefaultSeekTicks, DefaultFfmpegVersion, "mp4", "ts", BothFilters)]
[InlineData(TranscodingJobType.Hls, "libx264", "copy", "aac", "ts", DefaultSeekTicks, DefaultFfmpegVersion, "mp4", "aac", BothFilters)]
[InlineData(TranscodingJobType.Hls, "libx264", "copy", "aac", "ts", DefaultSeekTicks, DefaultFfmpegVersion, "mp4", "hls", BothFilters)]
[InlineData(TranscodingJobType.Progressive, "libx264", "copy", "aac", "ts", DefaultSeekTicks, DefaultFfmpegVersion, "mp4", "ts", AdtsOnly)]
[InlineData(TranscodingJobType.Hls, "copy", "copy", "aac", "ts", DefaultSeekTicks, DefaultFfmpegVersion, "mp4", "ts", AdtsOnly)]
[InlineData(TranscodingJobType.Hls, "libx264", "aac", "aac", "ts", DefaultSeekTicks, DefaultFfmpegVersion, "mp4", "ts", AdtsOnly)]
[InlineData(TranscodingJobType.Hls, "libx264", "copy", "aac", "wtv", DefaultSeekTicks, DefaultFfmpegVersion, "mp4", "ts", AdtsOnly)]
[InlineData(TranscodingJobType.Hls, "libx264", "copy", "aac", "ts", 0L, DefaultFfmpegVersion, "mp4", "ts", AdtsOnly)]
[InlineData(TranscodingJobType.Hls, "libx264", "copy", "aac", "ts", DefaultSeekTicks, "4.4.6", "mp4", "ts", AdtsOnly)]
[InlineData(TranscodingJobType.Hls, "libx264", "copy", "aac", "ts", DefaultSeekTicks, DefaultFfmpegVersion, "ts", "ts", NoiseOnly)]
[InlineData(TranscodingJobType.Hls, "libx264", "copy", "aac", "ts", DefaultSeekTicks, DefaultFfmpegVersion, "mp4", "mkv", NoiseOnly)]
[InlineData(TranscodingJobType.Hls, "libx264", "copy", "ac3", "ts", DefaultSeekTicks, DefaultFfmpegVersion, "mp4", "ts", NoiseOnly)]
public void AudioBitStreamArguments_AppliesGates(
TranscodingJobType jobType,
string outputVideoCodec,
string outputAudioCodec,
string audioStreamCodec,
string inputContainer,
long startTicks,
string ffmpegVersion,
string segmentContainer,
string mediaSourceContainer,
string expected)
{
var state = CreateState(jobType, outputVideoCodec, outputAudioCodec, audioStreamCodec, inputContainer, startTicks);
var result = CreateHelper(ffmpegVersion).GetAudioBitStreamArguments(state, segmentContainer, mediaSourceContainer);
Assert.Equal(expected, result);
}
}
}