1#![cfg_attr(feature = "docs", doc = "\n\nSee the [changelog][changelog] for a full release history.")]
3#![cfg_attr(feature = "docs", doc = "## Feature flags")]
4#![cfg_attr(feature = "docs", doc = document_features::document_features!())]
5#![allow(clippy::single_match)]
12#![deny(unsafe_code)]
14#![deny(unreachable_pub)]
15#![deny(clippy::mod_module_files)]
16
17use std::collections::VecDeque;
18use std::fmt::Debug;
19use std::io;
20
21use byteorder::{BigEndian, ReadBytesExt};
22use bytes::{Buf, Bytes};
23use isobmff::boxes::{
24 Brand, ChunkOffsetBox, FileTypeBox, HandlerBox, HandlerType, MediaBox, MediaDataBox, MediaHeaderBox,
25 MediaInformationBox, MovieBox, MovieExtendsBox, MovieFragmentBox, MovieFragmentHeaderBox, MovieHeaderBox,
26 SampleDescriptionBox, SampleSizeBox, SampleTableBox, SampleToChunkBox, SoundMediaHeaderBox, TimeToSampleBox, TrackBox,
27 TrackExtendsBox, TrackFragmentBaseMediaDecodeTimeBox, TrackFragmentBox, TrackFragmentHeaderBox, TrackHeaderBox,
28 TrackRunBox, VideoMediaHeaderBox,
29};
30use isobmff::{IsoSized, UnknownBox};
31use scuffle_bytes_util::zero_copy::Serialize;
32use scuffle_flv::audio::AudioData;
33use scuffle_flv::audio::body::AudioTagBody;
34use scuffle_flv::audio::body::legacy::LegacyAudioTagBody;
35use scuffle_flv::audio::body::legacy::aac::AacAudioData;
36use scuffle_flv::audio::header::AudioTagHeader;
37use scuffle_flv::audio::header::legacy::{LegacyAudioTagHeader, SoundType};
38use scuffle_flv::script::{OnMetaData, ScriptData};
39use scuffle_flv::tag::{FlvTag, FlvTagData};
40use scuffle_flv::video::VideoData;
41use scuffle_flv::video::body::VideoTagBody;
42use scuffle_flv::video::body::enhanced::{ExVideoTagBody, VideoPacket, VideoPacketCodedFrames, VideoPacketSequenceStart};
43use scuffle_flv::video::body::legacy::LegacyVideoTagBody;
44use scuffle_flv::video::header::enhanced::VideoFourCc;
45use scuffle_flv::video::header::legacy::{LegacyVideoTagHeader, LegacyVideoTagHeaderAvcPacket};
46use scuffle_flv::video::header::{VideoFrameType, VideoTagHeader, VideoTagHeaderData};
47use scuffle_h264::Sps;
48
49mod codecs;
50mod define;
51mod errors;
52
53pub use define::*;
54pub use errors::TransmuxError;
55
56struct Tags<'a> {
57 video_sequence_header: Option<VideoSequenceHeader<'a>>,
58 audio_sequence_header: Option<AudioSequenceHeader>,
59 scriptdata_tag: Option<OnMetaData<'a>>,
60}
61
62#[derive(Debug, Clone)]
63pub struct Transmuxer<'a> {
64 audio_duration: u64,
67 video_duration: u64,
69 sequence_number: u32,
70 last_video_timestamp: u32,
71 settings: Option<(VideoSettings, AudioSettings)>,
72 tags: VecDeque<FlvTag<'a>>,
73}
74
75impl Default for Transmuxer<'_> {
76 fn default() -> Self {
77 Self::new()
78 }
79}
80
81impl<'a> Transmuxer<'a> {
82 pub fn new() -> Self {
83 Self {
84 sequence_number: 1,
85 tags: VecDeque::new(),
86 audio_duration: 0,
87 video_duration: 0,
88 last_video_timestamp: 0,
89 settings: None,
90 }
91 }
92
93 pub fn demux(&mut self, data: Bytes) -> Result<(), TransmuxError> {
95 let mut cursor = io::Cursor::new(data);
96 while cursor.has_remaining() {
97 cursor.read_u32::<BigEndian>()?; if !cursor.has_remaining() {
99 break;
100 }
101
102 let tag = FlvTag::demux(&mut cursor)?;
103 self.tags.push_back(tag);
104 }
105
106 Ok(())
107 }
108
109 pub fn add_tag(&mut self, tag: FlvTag<'a>) {
111 self.tags.push_back(tag);
112 }
113
114 pub fn mux(&mut self) -> Result<Option<TransmuxResult>, TransmuxError> {
117 let mut writer = Vec::new();
118
119 let Some((video_settings, _)) = &self.settings else {
120 let Some((video_settings, audio_settings)) = self.init_sequence(&mut writer)? else {
121 if self.tags.len() > 30 {
122 return Err(TransmuxError::NoSequenceHeaders);
124 }
125
126 return Ok(None);
128 };
129
130 self.settings = Some((video_settings.clone(), audio_settings.clone()));
131
132 return Ok(Some(TransmuxResult::InitSegment {
133 data: Bytes::from(writer),
134 audio_settings,
135 video_settings,
136 }));
137 };
138
139 loop {
140 let Some(tag) = self.tags.pop_front() else {
141 return Ok(None);
142 };
143
144 let mdat_data;
145 let total_duration;
146 let trun_sample;
147 let mut is_audio = false;
148 let mut is_keyframe = false;
149
150 let duration =
151 if self.last_video_timestamp == 0 || tag.timestamp_ms == 0 || tag.timestamp_ms < self.last_video_timestamp {
152 1000 } else {
155 let delta = tag.timestamp_ms as f64 - self.last_video_timestamp as f64;
165 let expected_delta = 1000.0 / video_settings.framerate;
166 if (delta - expected_delta).abs() <= 1.0 {
167 1000
168 } else {
169 (delta * video_settings.framerate) as u32
170 }
171 };
172
173 match tag.data {
174 FlvTagData::Audio(AudioData {
175 body: AudioTagBody::Legacy(LegacyAudioTagBody::Aac(AacAudioData::Raw(data))),
176 ..
177 }) => {
178 let (sample, duration) = codecs::aac::trun_sample(&data)?;
179
180 trun_sample = sample;
181 mdat_data = data;
182 total_duration = duration;
183 is_audio = true;
184 }
185 FlvTagData::Video(VideoData {
186 header:
187 VideoTagHeader {
188 frame_type,
189 data:
190 VideoTagHeaderData::Legacy(LegacyVideoTagHeader::AvcPacket(
191 LegacyVideoTagHeaderAvcPacket::Nalu { composition_time_offset },
192 )),
193 },
194 body: VideoTagBody::Legacy(LegacyVideoTagBody::Other { data }),
195 ..
196 }) => {
197 let composition_time =
198 ((composition_time_offset as f64 * video_settings.framerate) / 1000.0).floor() * 1000.0;
199
200 let sample = codecs::avc::trun_sample(frame_type, composition_time as u32, duration, &data)?;
201
202 trun_sample = sample;
203 total_duration = duration;
204 mdat_data = data;
205
206 is_keyframe = frame_type == VideoFrameType::KeyFrame;
207 }
208 FlvTagData::Video(VideoData {
209 header: VideoTagHeader { frame_type, .. },
210 body:
211 VideoTagBody::Enhanced(ExVideoTagBody::NoMultitrack {
212 video_four_cc: VideoFourCc::Av1,
213 packet: VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(data)),
214 }),
215 ..
216 }) => {
217 let sample = codecs::av1::trun_sample(frame_type, duration, &data)?;
218
219 trun_sample = sample;
220 total_duration = duration;
221 mdat_data = data;
222
223 is_keyframe = frame_type == VideoFrameType::KeyFrame;
224 }
225 FlvTagData::Video(VideoData {
226 header: VideoTagHeader { frame_type, .. },
227 body:
228 VideoTagBody::Enhanced(ExVideoTagBody::NoMultitrack {
229 video_four_cc: VideoFourCc::Hevc,
230 packet,
231 }),
232 ..
233 }) => {
234 let (composition_time, data) = match packet {
235 VideoPacket::CodedFrames(VideoPacketCodedFrames::Hevc {
236 composition_time_offset,
237 data,
238 }) => (Some(composition_time_offset), data),
239 VideoPacket::CodedFramesX { data } => (None, data),
240 _ => continue,
241 };
242
243 let composition_time =
244 ((composition_time.unwrap_or_default() as f64 * video_settings.framerate) / 1000.0).floor() * 1000.0;
245
246 let sample = codecs::hevc::trun_sample(frame_type, composition_time as i32, duration, &data)?;
247
248 trun_sample = sample;
249 total_duration = duration;
250 mdat_data = data;
251
252 is_keyframe = frame_type == VideoFrameType::KeyFrame;
253 }
254 _ => {
255 continue;
257 }
258 }
259
260 let trafs = {
261 let (main_duration, main_id) = if is_audio {
262 (self.audio_duration, 2)
263 } else {
264 (self.video_duration, 1)
265 };
266
267 let traf = TrackFragmentBox {
268 tfhd: TrackFragmentHeaderBox::new(main_id, None, None, None, None, None),
269 trun: vec![TrackRunBox::new(vec![trun_sample], None)],
270 sbgp: vec![],
271 sgpd: vec![],
272 subs: vec![],
273 saiz: vec![],
274 saio: vec![],
275 tfdt: Some(TrackFragmentBaseMediaDecodeTimeBox::new(main_duration)),
276 meta: None,
277 udta: None,
278 };
279 vec![traf]
282 };
283
284 let mut moof = MovieFragmentBox {
285 mfhd: MovieFragmentHeaderBox::new(self.sequence_number),
286 meta: None,
287 traf: trafs,
288 udta: None,
289 };
290
291 let moof_size = moof.size();
293
294 let traf = moof.traf.first_mut().expect("we just created the moof with a traf");
298
299 let trun = traf.trun.first_mut().expect("we just created the video traf with a trun");
301
302 trun.data_offset = Some(moof_size as i32 + 8);
306
307 moof.serialize(&mut writer)?;
309
310 MediaDataBox::new(mdat_data.into()).serialize(&mut writer)?;
312
313 self.sequence_number += 1;
315
316 if is_audio {
317 self.audio_duration += total_duration as u64;
318 return Ok(Some(TransmuxResult::MediaSegment(MediaSegment {
319 data: Bytes::from(writer),
320 ty: MediaType::Audio,
321 keyframe: false,
322 timestamp: self.audio_duration - total_duration as u64,
323 })));
324 } else {
325 self.video_duration += total_duration as u64;
326 self.last_video_timestamp = tag.timestamp_ms;
327 return Ok(Some(TransmuxResult::MediaSegment(MediaSegment {
328 data: Bytes::from(writer),
329 ty: MediaType::Video,
330 keyframe: is_keyframe,
331 timestamp: self.video_duration - total_duration as u64,
332 })));
333 }
334 }
335 }
336
337 fn find_tags(&self) -> Tags<'a> {
339 let tags = self.tags.iter();
340 let mut video_sequence_header = None;
341 let mut audio_sequence_header = None;
342 let mut scriptdata_tag = None;
343
344 for tag in tags {
345 if video_sequence_header.is_some() && audio_sequence_header.is_some() && scriptdata_tag.is_some() {
346 break;
347 }
348
349 match &tag.data {
350 FlvTagData::Video(VideoData {
351 body: VideoTagBody::Legacy(LegacyVideoTagBody::AvcVideoPacketSeqHdr(data)),
352 ..
353 }) => {
354 video_sequence_header = Some(VideoSequenceHeader::Avc(data.clone()));
355 }
356 FlvTagData::Video(VideoData {
357 body:
358 VideoTagBody::Enhanced(ExVideoTagBody::NoMultitrack {
359 video_four_cc: VideoFourCc::Av1,
360 packet: VideoPacket::SequenceStart(VideoPacketSequenceStart::Av1(config)),
361 }),
362 ..
363 }) => {
364 video_sequence_header = Some(VideoSequenceHeader::Av1(config.clone()));
365 }
366 FlvTagData::Video(VideoData {
367 body:
368 VideoTagBody::Enhanced(ExVideoTagBody::NoMultitrack {
369 video_four_cc: VideoFourCc::Hevc,
370 packet: VideoPacket::SequenceStart(VideoPacketSequenceStart::Hevc(config)),
371 }),
372 ..
373 }) => {
374 video_sequence_header = Some(VideoSequenceHeader::Hevc(config.clone()));
375 }
376 FlvTagData::Audio(AudioData {
377 body: AudioTagBody::Legacy(LegacyAudioTagBody::Aac(AacAudioData::SequenceHeader(data))),
378 header:
379 AudioTagHeader::Legacy(LegacyAudioTagHeader {
380 sound_size, sound_type, ..
381 }),
382 ..
383 }) => {
384 audio_sequence_header = Some(AudioSequenceHeader {
385 data: AudioSequenceHeaderData::Aac(data.clone()),
386 sound_size: *sound_size,
387 sound_type: *sound_type,
388 });
389 }
390 FlvTagData::ScriptData(ScriptData::OnMetaData(metadata)) => {
391 scriptdata_tag = Some(*metadata.clone());
392 }
393 _ => {}
394 }
395 }
396
397 Tags {
398 video_sequence_header,
399 audio_sequence_header,
400 scriptdata_tag,
401 }
402 }
403
404 fn init_sequence(
406 &mut self,
407 writer: &mut impl io::Write,
408 ) -> Result<Option<(VideoSettings, AudioSettings)>, TransmuxError> {
409 let Tags {
412 video_sequence_header,
413 audio_sequence_header,
414 scriptdata_tag,
415 } = self.find_tags();
416
417 let Some(video_sequence_header) = video_sequence_header else {
418 return Ok(None);
419 };
420 let Some(audio_sequence_header) = audio_sequence_header else {
421 return Ok(None);
422 };
423
424 let video_codec;
425 let audio_codec;
426 let video_width;
427 let video_height;
428 let audio_channels;
429 let audio_sample_rate;
430 let mut video_fps = 0.0;
431
432 let mut estimated_video_bitrate = 0;
433 let mut estimated_audio_bitrate = 0;
434
435 if let Some(scriptdata_tag) = scriptdata_tag {
436 video_fps = scriptdata_tag.framerate.unwrap_or(0.0);
437 estimated_video_bitrate = scriptdata_tag.videodatarate.map(|v| (v * 1024.0) as u32).unwrap_or(0);
438 estimated_audio_bitrate = scriptdata_tag.audiodatarate.map(|v| (v * 1024.0) as u32).unwrap_or(0);
439 }
440
441 let mut compatible_brands = vec![Brand::Iso5, Brand::Iso6];
442
443 let video_stsd_entry = match video_sequence_header {
444 VideoSequenceHeader::Avc(config) => {
445 compatible_brands.push(Brand::Avc1);
446 video_codec = VideoCodec::Avc {
447 constraint_set: config.profile_compatibility,
448 level: config.level_indication,
449 profile: config.profile_indication,
450 };
451
452 let sps = Sps::parse_with_emulation_prevention(io::Cursor::new(&config.sps[0]))
453 .map_err(|_| TransmuxError::InvalidAVCDecoderConfigurationRecord)?;
454 video_width = sps.width() as u32;
455 video_height = sps.height() as u32;
456
457 let frame_rate = sps.frame_rate();
458 if let Some(frame_rate) = frame_rate {
459 video_fps = frame_rate;
460 }
461
462 UnknownBox::try_from_box(codecs::avc::stsd_entry(config, &sps)?)?
463 }
464 VideoSequenceHeader::Av1(config) => {
465 compatible_brands.push(Brand(*b"av01"));
466 let (entry, seq_obu) = codecs::av1::stsd_entry(config)?;
467
468 video_height = seq_obu.max_frame_height as u32;
469 video_width = seq_obu.max_frame_width as u32;
470
471 let op_point = &seq_obu.operating_points[0];
472
473 video_codec = VideoCodec::Av1 {
474 profile: seq_obu.seq_profile,
475 level: op_point.seq_level_idx,
476 tier: op_point.seq_tier,
477 depth: seq_obu.color_config.bit_depth as u8,
478 monochrome: seq_obu.color_config.mono_chrome,
479 sub_sampling_x: seq_obu.color_config.subsampling_x,
480 sub_sampling_y: seq_obu.color_config.subsampling_y,
481 color_primaries: seq_obu.color_config.color_primaries,
482 transfer_characteristics: seq_obu.color_config.transfer_characteristics,
483 matrix_coefficients: seq_obu.color_config.matrix_coefficients,
484 full_range_flag: seq_obu.color_config.full_color_range,
485 };
486
487 UnknownBox::try_from_box(entry)?
488 }
489 VideoSequenceHeader::Hevc(config) => {
490 compatible_brands.push(Brand(*b"hev1"));
491 video_codec = VideoCodec::Hevc {
492 constraint_indicator: config.general_constraint_indicator_flags,
493 level: config.general_level_idc,
494 profile: config.general_profile_idc,
495 profile_compatibility: config.general_profile_compatibility_flags,
496 tier: config.general_tier_flag,
497 general_profile_space: config.general_profile_space,
498 };
499
500 let (entry, sps) = codecs::hevc::stsd_entry(config)?;
501 if let Some(info) = sps.vui_parameters.as_ref().and_then(|p| p.vui_timing_info.as_ref()) {
502 video_fps = info.time_scale.get() as f64 / info.num_units_in_tick.get() as f64;
503 }
504
505 video_width = sps.cropped_width() as u32;
506 video_height = sps.cropped_height() as u32;
507
508 UnknownBox::try_from_box(entry)?
509 }
510 };
511
512 let audio_stsd_entry = match audio_sequence_header.data {
513 AudioSequenceHeaderData::Aac(data) => {
514 compatible_brands.push(Brand::Mp41);
515 let (entry, config) =
516 codecs::aac::stsd_entry(audio_sequence_header.sound_size, audio_sequence_header.sound_type, data)?;
517
518 audio_sample_rate = config.sampling_frequency;
519
520 audio_codec = AudioCodec::Aac {
521 object_type: config.audio_object_type,
522 };
523 audio_channels = match audio_sequence_header.sound_type {
524 SoundType::Mono => 1,
525 SoundType::Stereo => 2,
526 _ => return Err(TransmuxError::InvalidAudioChannels),
527 };
528
529 entry
530 }
531 };
532
533 if video_fps == 0.0 {
534 return Err(TransmuxError::InvalidVideoFrameRate);
535 }
536
537 if video_width == 0 || video_height == 0 {
538 return Err(TransmuxError::InvalidVideoDimensions);
539 }
540
541 if audio_sample_rate == 0 {
542 return Err(TransmuxError::InvalidAudioSampleRate);
543 }
544
545 let video_timescale = (1000.0 * video_fps) as u32;
551
552 FileTypeBox {
554 major_brand: Brand::Iso5,
555 minor_version: 512,
556 compatible_brands,
557 }
558 .serialize(&mut *writer)?;
559
560 MovieBox {
562 mvhd: MovieHeaderBox::new(0, 0, 1000, 0, 1),
563 meta: None,
564 trak: vec![
565 TrackBox::new(
566 TrackHeaderBox::new(0, 0, 1, 0, Some((video_width, video_height))), None, MediaBox::new(
570 MediaHeaderBox::new(0, 0, video_timescale, 0), HandlerBox::new(HandlerType::Video, "VideoHandler".to_string().into()), MediaInformationBox::new(
574 SampleTableBox::new(
576 SampleDescriptionBox::new(vec![video_stsd_entry]), TimeToSampleBox::default(), SampleToChunkBox::default(), Some(SampleSizeBox::default()), ChunkOffsetBox::default(), ),
582 Some(VideoMediaHeaderBox::default()), None, ),
585 ),
586 ),
587 TrackBox::new(
588 TrackHeaderBox::new(0, 0, 2, 0, None), None, MediaBox::new(
592 MediaHeaderBox::new(0, 0, audio_sample_rate, 0), HandlerBox::new(HandlerType::Audio, "SoundHandler".to_string().into()), MediaInformationBox::new(
596 SampleTableBox::new(
598 SampleDescriptionBox::new(vec![UnknownBox::try_from_box(audio_stsd_entry)?]), TimeToSampleBox::default(), SampleToChunkBox::default(), Some(SampleSizeBox::default()), ChunkOffsetBox::default(), ),
604 None, Some(SoundMediaHeaderBox::default()), ),
607 ),
608 ),
609 ],
610 mvex: Some(MovieExtendsBox {
611 mehd: None,
612 trex: vec![TrackExtendsBox::new(1), TrackExtendsBox::new(2)],
613 leva: None,
614 }),
615 unknown_boxes: vec![],
616 udta: None,
617 }
618 .serialize(writer)?;
619
620 Ok(Some((
621 VideoSettings {
622 width: video_width,
623 height: video_height,
624 framerate: video_fps,
625 codec: video_codec,
626 bitrate: estimated_video_bitrate,
627 timescale: video_timescale,
628 },
629 AudioSettings {
630 codec: audio_codec,
631 sample_rate: audio_sample_rate,
632 channels: audio_channels,
633 bitrate: estimated_audio_bitrate,
634 timescale: audio_sample_rate,
635 },
636 )))
637 }
638}
639
640#[cfg(feature = "docs")]
642#[scuffle_changelog::changelog]
643pub mod changelog {}
644
645#[cfg(test)]
646mod tests;