scuffle_flv/video/body/
enhanced.rs

1//! Enhanced video tag body
2//!
3//! Types and functions defined by the enhanced RTMP spec, page 29-31, ExVideoTagBody.
4
5use std::io::{self, Read};
6
7use byteorder::{BigEndian, ReadBytesExt};
8use bytes::{Buf, Bytes};
9use metadata::VideoPacketMetadataEntry;
10use scuffle_amf0::decoder::Amf0Decoder;
11use scuffle_av1::{AV1CodecConfigurationRecord, AV1VideoDescriptor};
12use scuffle_bytes_util::BytesCursorExt;
13use scuffle_bytes_util::zero_copy::Deserialize;
14use scuffle_h264::AVCDecoderConfigurationRecord;
15use scuffle_h265::HEVCDecoderConfigurationRecord;
16
17use crate::error::FlvError;
18use crate::video::header::enhanced::{ExVideoTagHeader, ExVideoTagHeaderContent, VideoFourCc, VideoPacketType};
19
20pub mod metadata;
21
22/// Sequence start video packet
23#[derive(Debug, Clone, PartialEq)]
24pub enum VideoPacketSequenceStart<'a> {
25    /// Av1 codec configuration record
26    Av1(AV1CodecConfigurationRecord<'a>),
27    /// H.264/AVC codec configuration record
28    Avc(AVCDecoderConfigurationRecord<'a>),
29    /// H.265/HEVC codec configuration record
30    Hevc(HEVCDecoderConfigurationRecord<'a>),
31    /// Other codecs like VP8 and VP9
32    Other(Bytes),
33}
34
35/// MPEG2-TS sequence start video packet
36#[derive(Debug, Clone, PartialEq)]
37pub enum VideoPacketMpeg2TsSequenceStart<'a> {
38    /// Av1 video descriptor
39    Av1(AV1VideoDescriptor<'a>),
40    /// Any other codecs
41    Other(Bytes),
42}
43
44/// Coded frames video packet
45#[derive(Debug, Clone, PartialEq)]
46pub enum VideoPacketCodedFrames {
47    /// H.264/AVC coded frames
48    Avc {
49        /// Composition time offset
50        composition_time_offset: i32,
51        /// Data
52        data: Bytes,
53    },
54    /// H.265/HEVC coded frames
55    Hevc {
56        /// Composition time offset
57        composition_time_offset: i32,
58        /// Data
59        data: Bytes,
60    },
61    /// Coded frames of any other codec
62    Other(Bytes),
63}
64
65/// Video packet
66///
67/// Appears as part of the [`ExVideoTagBody`].
68///
69/// Defined by:
70/// - Enhanced RTMP spec, page 29-31, ExVideoTagBody
71#[derive(Debug, Clone, PartialEq)]
72pub enum VideoPacket<'a> {
73    /// Metadata
74    Metadata(Vec<VideoPacketMetadataEntry<'a>>),
75    /// Indicates the end of a sequence of video packets.
76    SequenceEnd,
77    /// Indicates the start of a sequence of video packets.
78    SequenceStart(VideoPacketSequenceStart<'a>),
79    /// Indicates the start of a sequence of video packets in MPEG2-TS format.
80    Mpeg2TsSequenceStart(VideoPacketMpeg2TsSequenceStart<'a>),
81    /// Coded video frames.
82    CodedFrames(VideoPacketCodedFrames),
83    /// Coded video frames without extra data.
84    CodedFramesX {
85        /// The video data.
86        data: Bytes,
87    },
88    /// An unknown [`VideoPacketType`].
89    Unknown {
90        /// The unknown packet type.
91        video_packet_type: VideoPacketType,
92        /// The data.
93        data: Bytes,
94    },
95}
96
97impl VideoPacket<'_> {
98    /// Demux a [`VideoPacket`] from the given reader.
99    ///
100    /// This is implemented as per spec, Enhanced RTMP page 29-31, ExVideoTagBody.
101    pub fn demux(
102        header: &ExVideoTagHeader,
103        video_four_cc: VideoFourCc,
104        reader: &mut io::Cursor<Bytes>,
105    ) -> Result<Self, FlvError> {
106        let size_of_video_track = if !matches!(
107            header.content,
108            ExVideoTagHeaderContent::NoMultiTrack(_) | ExVideoTagHeaderContent::OneTrack(_)
109        ) {
110            Some(reader.read_u24::<BigEndian>()? as usize)
111        } else {
112            None
113        };
114
115        match header.video_packet_type {
116            VideoPacketType::Metadata => {
117                let data = reader.extract_bytes(size_of_video_track.unwrap_or(reader.remaining()))?;
118                let mut decoder = Amf0Decoder::from_buf(data);
119
120                let metadata = decoder
121                    .deserialize_stream::<metadata::VideoPacketMetadataEntry>()
122                    .collect::<Result<Vec<_>, _>>()?;
123
124                Ok(Self::Metadata(metadata))
125            }
126            VideoPacketType::SequenceEnd => Ok(Self::SequenceEnd),
127            VideoPacketType::SequenceStart => {
128                let data = reader.extract_bytes(size_of_video_track.unwrap_or(reader.remaining()))?;
129
130                let seq_start = match video_four_cc {
131                    VideoFourCc::Av1 => {
132                        let reader = scuffle_bytes_util::zero_copy::BytesBuf::from(data);
133                        let record = AV1CodecConfigurationRecord::deserialize(reader)?;
134                        VideoPacketSequenceStart::Av1(record)
135                    }
136                    VideoFourCc::Avc => {
137                        let reader = scuffle_bytes_util::zero_copy::BytesBuf::from(data);
138                        let record = AVCDecoderConfigurationRecord::deserialize(reader)?;
139                        VideoPacketSequenceStart::Avc(record)
140                    }
141                    VideoFourCc::Hevc => {
142                        let reader = scuffle_bytes_util::zero_copy::BytesBuf::from(data);
143                        let record = HEVCDecoderConfigurationRecord::deserialize(reader)?;
144                        VideoPacketSequenceStart::Hevc(record)
145                    }
146                    _ => VideoPacketSequenceStart::Other(data),
147                };
148
149                Ok(Self::SequenceStart(seq_start))
150            }
151            VideoPacketType::Mpeg2TsSequenceStart => {
152                let data = reader.extract_bytes(size_of_video_track.unwrap_or(reader.remaining()))?;
153
154                let seq_start = match video_four_cc {
155                    VideoFourCc::Av1 => {
156                        let reader = scuffle_bytes_util::zero_copy::BytesBuf::from(data);
157                        let descriptor = AV1VideoDescriptor::deserialize(reader)?;
158                        VideoPacketMpeg2TsSequenceStart::Av1(descriptor)
159                    }
160                    _ => VideoPacketMpeg2TsSequenceStart::Other(data),
161                };
162
163                Ok(Self::Mpeg2TsSequenceStart(seq_start))
164            }
165            VideoPacketType::CodedFrames => {
166                let coded_frames = match video_four_cc {
167                    VideoFourCc::Avc => {
168                        let composition_time_offset = reader.read_i24::<BigEndian>()?;
169                        let data = reader
170                            .extract_bytes(size_of_video_track.map(|s| s.saturating_sub(3)).unwrap_or(reader.remaining()))?;
171
172                        VideoPacketCodedFrames::Avc {
173                            composition_time_offset,
174                            data,
175                        }
176                    }
177                    VideoFourCc::Hevc => {
178                        let composition_time_offset = reader.read_i24::<BigEndian>()?;
179                        let data = reader
180                            .extract_bytes(size_of_video_track.map(|s| s.saturating_sub(3)).unwrap_or(reader.remaining()))?;
181
182                        VideoPacketCodedFrames::Hevc {
183                            composition_time_offset,
184                            data,
185                        }
186                    }
187                    _ => {
188                        let data = reader.extract_bytes(size_of_video_track.unwrap_or(reader.remaining()))?;
189
190                        VideoPacketCodedFrames::Other(data)
191                    }
192                };
193
194                Ok(Self::CodedFrames(coded_frames))
195            }
196            VideoPacketType::CodedFramesX => {
197                let data = reader.extract_bytes(size_of_video_track.unwrap_or(reader.remaining()))?;
198
199                Ok(Self::CodedFramesX { data })
200            }
201            _ => {
202                let data = reader.extract_bytes(size_of_video_track.unwrap_or(reader.remaining()))?;
203
204                Ok(Self::Unknown {
205                    video_packet_type: header.video_packet_type,
206                    data,
207                })
208            }
209        }
210    }
211}
212
213/// One video track contained in a multitrack video.
214#[derive(Debug, Clone, PartialEq)]
215pub struct VideoTrack<'a> {
216    /// The video FOURCC of this track.
217    pub video_four_cc: VideoFourCc,
218    /// The video track ID.
219    ///
220    /// > For identifying the highest priority (a.k.a., default track)
221    /// > or highest quality track, it is RECOMMENDED to use trackId
222    /// > set to zero. For tracks of lesser priority or quality, use
223    /// > multiple instances of trackId with ascending numerical values.
224    /// > The concept of priority or quality can have multiple
225    /// > interpretations, including but not limited to bitrate,
226    /// > resolution, default angle, and language. This recommendation
227    /// > serves as a guideline intended to standardize track numbering
228    /// > across various applications.
229    pub video_track_id: u8,
230    /// The video packet contained in this track.
231    pub packet: VideoPacket<'a>,
232}
233
234/// `ExVideoTagBody`
235///
236/// Defined by:
237/// - Enhanced RTMP spec, page 29-31, ExVideoTagBody
238#[derive(Debug, Clone, PartialEq)]
239pub enum ExVideoTagBody<'a> {
240    /// Empty body because the header contains a [`VideoCommand`](crate::video::header::VideoCommand).
241    Command,
242    /// The body is not a multitrack body.
243    NoMultitrack {
244        /// The video FOURCC of this body.
245        video_four_cc: VideoFourCc,
246        /// The video packet contained in this body.
247        packet: VideoPacket<'a>,
248    },
249    /// The body is a multitrack body.
250    ///
251    /// This variant contains multiple video tracks.
252    /// See [`VideoTrack`] for more information.
253    ManyTracks(Vec<VideoTrack<'a>>),
254}
255
256impl ExVideoTagBody<'_> {
257    /// Demux an [`ExVideoTagBody`] from the given reader.
258    ///
259    /// This is implemented as per Enhanced RTMP spec, page 29-31, ExVideoTagBody.
260    pub fn demux(header: &ExVideoTagHeader, reader: &mut io::Cursor<Bytes>) -> Result<Self, FlvError> {
261        let mut tracks = Vec::new();
262
263        loop {
264            let video_four_cc = match header.content {
265                ExVideoTagHeaderContent::VideoCommand(_) => return Ok(ExVideoTagBody::Command),
266                ExVideoTagHeaderContent::ManyTracksManyCodecs => {
267                    let mut video_four_cc = [0; 4];
268                    reader.read_exact(&mut video_four_cc)?;
269                    VideoFourCc::from(video_four_cc)
270                }
271                ExVideoTagHeaderContent::OneTrack(video_four_cc) => video_four_cc,
272                ExVideoTagHeaderContent::ManyTracks(video_four_cc) => video_four_cc,
273                ExVideoTagHeaderContent::NoMultiTrack(video_four_cc) => video_four_cc,
274                ExVideoTagHeaderContent::Unknown { video_four_cc, .. } => video_four_cc,
275            };
276
277            let video_track_id = if !matches!(header.content, ExVideoTagHeaderContent::NoMultiTrack(_)) {
278                Some(reader.read_u8()?)
279            } else {
280                None
281            };
282
283            let packet = VideoPacket::demux(header, video_four_cc, reader)?;
284
285            if let Some(video_track_id) = video_track_id {
286                // video_track_id is only set if this is a multitrack video, in other words, if `isVideoMultitrack` is true
287                tracks.push(VideoTrack {
288                    video_four_cc,
289                    video_track_id,
290                    packet,
291                });
292
293                // the loop only continues if there is still data to read and this is a video with multiple tracks
294                if !matches!(header.content, ExVideoTagHeaderContent::OneTrack(_)) && reader.has_remaining() {
295                    continue;
296                }
297
298                break;
299            } else {
300                // exit early if this is a single track video only completing one loop iteration
301                return Ok(Self::NoMultitrack { video_four_cc, packet });
302            }
303        }
304
305        // at this point we know this is a multitrack video because a single track video would have exited early
306        Ok(Self::ManyTracks(tracks))
307    }
308}
309
310#[cfg(test)]
311#[cfg_attr(all(test, coverage_nightly), coverage(off))]
312mod tests {
313    use bytes::Bytes;
314
315    use crate::common::AvMultitrackType;
316    use crate::video::body::enhanced::{
317        ExVideoTagBody, VideoPacket, VideoPacketCodedFrames, VideoPacketMpeg2TsSequenceStart, VideoPacketSequenceStart,
318        VideoTrack,
319    };
320    use crate::video::header::VideoCommand;
321    use crate::video::header::enhanced::{ExVideoTagHeader, ExVideoTagHeaderContent, VideoFourCc, VideoPacketType};
322
323    #[test]
324    fn simple_video_packets_demux() {
325        let data = &[42, 42, 42, 42];
326
327        let packet = VideoPacket::demux(
328            &ExVideoTagHeader {
329                video_packet_mod_exs: vec![],
330                video_packet_type: VideoPacketType::SequenceStart,
331                content: ExVideoTagHeaderContent::NoMultiTrack(VideoFourCc([0, 0, 0, 0])),
332            },
333            VideoFourCc([0, 0, 0, 0]),
334            &mut std::io::Cursor::new(Bytes::from_static(data)),
335        )
336        .unwrap();
337        assert_eq!(
338            packet,
339            VideoPacket::SequenceStart(VideoPacketSequenceStart::Other(Bytes::from_static(data))),
340        );
341
342        let packet = VideoPacket::demux(
343            &ExVideoTagHeader {
344                video_packet_mod_exs: vec![],
345                video_packet_type: VideoPacketType::CodedFrames,
346                content: ExVideoTagHeaderContent::NoMultiTrack(VideoFourCc([0, 0, 0, 0])),
347            },
348            VideoFourCc([0, 0, 0, 0]),
349            &mut std::io::Cursor::new(Bytes::from_static(data)),
350        )
351        .unwrap();
352        assert_eq!(
353            packet,
354            VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(Bytes::from_static(data))),
355        );
356
357        let packet = VideoPacket::demux(
358            &ExVideoTagHeader {
359                video_packet_mod_exs: vec![],
360                video_packet_type: VideoPacketType::SequenceEnd,
361                content: ExVideoTagHeaderContent::NoMultiTrack(VideoFourCc([0, 0, 0, 0])),
362            },
363            VideoFourCc([0, 0, 0, 0]),
364            &mut std::io::Cursor::new(Bytes::from_static(data)),
365        )
366        .unwrap();
367        assert_eq!(packet, VideoPacket::SequenceEnd);
368
369        let packet = VideoPacket::demux(
370            &ExVideoTagHeader {
371                video_packet_mod_exs: vec![],
372                video_packet_type: VideoPacketType(8),
373                content: ExVideoTagHeaderContent::NoMultiTrack(VideoFourCc([0, 0, 0, 0])),
374            },
375            VideoFourCc([0, 0, 0, 0]),
376            &mut std::io::Cursor::new(Bytes::from_static(data)),
377        )
378        .unwrap();
379        assert_eq!(
380            packet,
381            VideoPacket::Unknown {
382                video_packet_type: VideoPacketType(8),
383                data: Bytes::from_static(data),
384            },
385        );
386    }
387
388    #[test]
389    fn video_packet_with_size_demux() {
390        let data = &[
391            0, 0, 5, // size
392            0, 0, 1, // composition time offset
393            42, 42, // data
394            13, 37, // should be ignored
395        ];
396
397        let header = ExVideoTagHeader {
398            video_packet_mod_exs: vec![],
399            video_packet_type: VideoPacketType::CodedFrames,
400            content: ExVideoTagHeaderContent::ManyTracks(VideoFourCc::Avc),
401        };
402
403        let packet =
404            VideoPacket::demux(&header, VideoFourCc::Avc, &mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
405
406        assert_eq!(
407            packet,
408            VideoPacket::CodedFrames(VideoPacketCodedFrames::Avc {
409                composition_time_offset: 1,
410                data: Bytes::from_static(&[42, 42]),
411            }),
412        );
413    }
414
415    #[test]
416    fn video_packet_mpeg2_ts_demux() {
417        let data = &[
418            42, 42, // data
419        ];
420
421        let header = ExVideoTagHeader {
422            video_packet_mod_exs: vec![],
423            video_packet_type: VideoPacketType::Mpeg2TsSequenceStart,
424            content: ExVideoTagHeaderContent::NoMultiTrack(VideoFourCc::Avc),
425        };
426
427        let packet =
428            VideoPacket::demux(&header, VideoFourCc::Avc, &mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
429
430        assert_eq!(
431            packet,
432            VideoPacket::Mpeg2TsSequenceStart(VideoPacketMpeg2TsSequenceStart::Other(Bytes::from_static(data))),
433        );
434    }
435
436    #[test]
437    fn simple_body_demux() {
438        let data = &[
439            42, 42, // data
440        ];
441
442        let header = ExVideoTagHeader {
443            video_packet_mod_exs: vec![],
444            video_packet_type: VideoPacketType::CodedFrames,
445            content: ExVideoTagHeaderContent::NoMultiTrack(VideoFourCc([0, 0, 0, 0])),
446        };
447
448        let packet = ExVideoTagBody::demux(&header, &mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
449
450        assert_eq!(
451            packet,
452            ExVideoTagBody::NoMultitrack {
453                video_four_cc: VideoFourCc([0, 0, 0, 0]),
454                packet: VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(Bytes::from_static(data))),
455            },
456        );
457    }
458
459    #[test]
460    fn multitrack_many_codecs_body_demux() {
461        let data = &[
462            0, 0, 0, 0, // video four cc
463            1, // video track id
464            0, 0, 2, // size
465            42, 42, // data
466            0, 1, 0, 1, // video four cc
467            2, // video track id
468            0, 0, 2, // size
469            13, 37, // data
470        ];
471
472        let header = ExVideoTagHeader {
473            video_packet_mod_exs: vec![],
474            video_packet_type: VideoPacketType::CodedFrames,
475            content: ExVideoTagHeaderContent::ManyTracksManyCodecs,
476        };
477
478        let packet = ExVideoTagBody::demux(&header, &mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
479
480        assert_eq!(
481            packet,
482            ExVideoTagBody::ManyTracks(vec![
483                VideoTrack {
484                    video_four_cc: VideoFourCc([0, 0, 0, 0]),
485                    video_track_id: 1,
486                    packet: VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(Bytes::from_static(&[42, 42]))),
487                },
488                VideoTrack {
489                    video_four_cc: VideoFourCc([0, 1, 0, 1]),
490                    video_track_id: 2,
491                    packet: VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(Bytes::from_static(&[13, 37]))),
492                }
493            ]),
494        );
495    }
496
497    #[test]
498    fn multitrack_body_demux() {
499        let data = &[
500            1, // video track id
501            0, 0, 2, // size
502            42, 42, // data
503            2,  // video track id
504            0, 0, 2, // size
505            13, 37, // data
506        ];
507
508        let header = ExVideoTagHeader {
509            video_packet_mod_exs: vec![],
510            video_packet_type: VideoPacketType::CodedFrames,
511            content: ExVideoTagHeaderContent::ManyTracks(VideoFourCc([0, 0, 0, 0])),
512        };
513
514        let packet = ExVideoTagBody::demux(&header, &mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
515
516        assert_eq!(
517            packet,
518            ExVideoTagBody::ManyTracks(vec![
519                VideoTrack {
520                    video_four_cc: VideoFourCc([0, 0, 0, 0]),
521                    video_track_id: 1,
522                    packet: VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(Bytes::from_static(&[42, 42]))),
523                },
524                VideoTrack {
525                    video_four_cc: VideoFourCc([0, 0, 0, 0]),
526                    video_track_id: 2,
527                    packet: VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(Bytes::from_static(&[13, 37]))),
528                }
529            ]),
530        );
531    }
532
533    #[test]
534    fn multitrack_one_track_body_demux() {
535        let data = &[
536            1, // video track id
537            42, 42, // data
538        ];
539
540        let header = ExVideoTagHeader {
541            video_packet_mod_exs: vec![],
542            video_packet_type: VideoPacketType::CodedFrames,
543            content: ExVideoTagHeaderContent::OneTrack(VideoFourCc([0, 0, 0, 0])),
544        };
545
546        let packet = ExVideoTagBody::demux(&header, &mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
547
548        assert_eq!(
549            packet,
550            ExVideoTagBody::ManyTracks(vec![VideoTrack {
551                video_four_cc: VideoFourCc([0, 0, 0, 0]),
552                video_track_id: 1,
553                packet: VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(Bytes::from_static(&[42, 42]))),
554            }]),
555        );
556    }
557
558    #[test]
559    fn multitrack_unknown_body_demux() {
560        let data = &[
561            1, // video track id
562            0, 0, 2, // size
563            42, 42, // data
564        ];
565
566        let header = ExVideoTagHeader {
567            video_packet_mod_exs: vec![],
568            video_packet_type: VideoPacketType::CodedFrames,
569            content: ExVideoTagHeaderContent::Unknown {
570                video_four_cc: VideoFourCc([0, 0, 0, 0]),
571                video_multitrack_type: AvMultitrackType(4),
572            },
573        };
574
575        let packet = ExVideoTagBody::demux(&header, &mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
576
577        assert_eq!(
578            packet,
579            ExVideoTagBody::ManyTracks(vec![VideoTrack {
580                video_track_id: 1,
581                video_four_cc: VideoFourCc([0, 0, 0, 0]),
582                packet: VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(Bytes::from_static(&[42, 42]))),
583            }]),
584        );
585    }
586
587    #[test]
588    fn video_command() {
589        let data = &[
590            42, // should be ignored
591        ];
592
593        let header = ExVideoTagHeader {
594            video_packet_mod_exs: vec![],
595            video_packet_type: VideoPacketType::SequenceStart,
596            content: ExVideoTagHeaderContent::VideoCommand(VideoCommand::StartSeek),
597        };
598
599        let packet = ExVideoTagBody::demux(&header, &mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
600
601        assert_eq!(packet, ExVideoTagBody::Command);
602    }
603}