Skip to content

Commit d067933

Browse files
committed
MP4: Improve audio bitrate calculation
1 parent 4d1e7be commit d067933

File tree

3 files changed

+183
-49
lines changed

3 files changed

+183
-49
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
4848
```
4949
- Renamed `Popularimeter` -> `PopularimeterFrame`
5050
- Renamed `SynchronizedText` -> `SynchronizedTextFrame`
51+
- **MP4**: Bitrate calculation is now more accurate ([PR](https://github.com/Serial-ATA/lofty-rs/pull/398))
5152

5253
### Fixed
5354
- **ID3v2**: Disallow 4 character TXXX/WXXX frame descriptions from being converted to `ItemKey` ([issue](https://github.com/Serial-ATA/lofty-rs/issues/309)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/394))

lofty/src/mp4/properties.rs

Lines changed: 181 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -210,16 +210,15 @@ impl Mp4Properties {
210210
}
211211
}
212212

213-
pub(super) fn read_properties<R>(
214-
reader: &mut AtomReader<R>,
215-
traks: &[AtomInfo],
216-
file_length: u64,
217-
parse_mode: ParsingMode,
218-
) -> Result<Mp4Properties>
213+
struct TrakChildren {
214+
mdhd: AtomInfo,
215+
minf: Option<AtomInfo>,
216+
}
217+
218+
fn get_trak_children<R>(reader: &mut AtomReader<R>, traks: &[AtomInfo]) -> Result<TrakChildren>
219219
where
220220
R: Read + Seek,
221221
{
222-
// We need the mdhd and minf atoms from the audio track
223222
let mut audio_track = false;
224223
let mut mdhd = None;
225224
let mut minf = None;
@@ -278,8 +277,18 @@ where
278277
err!(BadAtom("Expected atom \"trak.mdia.mdhd\""));
279278
};
280279

281-
reader.seek(SeekFrom::Start(mdhd.start + 8))?;
280+
Ok(TrakChildren { mdhd, minf })
281+
}
282282

283+
struct Mdhd {
284+
timescale: u32,
285+
duration: u64,
286+
}
287+
288+
fn read_mdhd<R>(reader: &mut AtomReader<R>) -> Result<Mdhd>
289+
where
290+
R: Read + Seek,
291+
{
283292
let version = reader.read_u8()?;
284293
let _flags = reader.read_uint(3)?;
285294

@@ -302,44 +311,106 @@ where
302311
(timescale, u64::from(duration))
303312
};
304313

305-
let duration_millis = (duration * 1000).div_round(u64::from(timescale));
306-
let duration = Duration::from_millis(duration_millis);
307-
308-
// We create the properties here, since it is possible the other information isn't available
309-
let mut properties = Mp4Properties {
314+
Ok(Mdhd {
315+
timescale,
310316
duration,
311-
..Mp4Properties::default()
312-
};
317+
})
318+
}
313319

314-
let Some(minf) = minf else {
315-
return Ok(properties);
316-
};
320+
// TODO: Estimate duration from stts?
321+
// Since this has the number of samples and the duration of each sample,
322+
// it would be pretty simple to do, and would help in the case that we have
323+
// no timescale available.
324+
#[derive(Debug)]
325+
struct SttsEntry {
326+
_sample_count: u32,
327+
sample_duration: u32,
328+
}
317329

318-
reader.seek(SeekFrom::Start(minf.start + 8))?;
330+
fn read_stts<R>(reader: &mut R) -> Result<Vec<SttsEntry>>
331+
where
332+
R: Read,
333+
{
334+
let _version_and_flags = reader.read_uint::<BigEndian>(4)?;
319335

320-
let Some(stbl) = nested_atom(reader, minf.len, b"stbl", parse_mode)? else {
321-
return Ok(properties);
322-
};
336+
let entry_count = reader.read_u32::<BigEndian>()?;
337+
let mut entries = Vec::with_capacity(entry_count as usize);
323338

324-
let Some(stsd) = nested_atom(reader, stbl.len, b"stsd", parse_mode)? else {
325-
return Ok(properties);
339+
for _ in 0..entry_count {
340+
let sample_count = reader.read_u32::<BigEndian>()?;
341+
let sample_duration = reader.read_u32::<BigEndian>()?;
342+
343+
entries.push(SttsEntry {
344+
_sample_count: sample_count,
345+
sample_duration,
346+
});
347+
}
348+
349+
Ok(entries)
350+
}
351+
352+
struct Minf {
353+
stsd_data: Vec<u8>,
354+
stts: Option<Vec<SttsEntry>>,
355+
}
356+
357+
fn read_minf<R>(
358+
reader: &mut AtomReader<R>,
359+
len: u64,
360+
parse_mode: ParsingMode,
361+
) -> Result<Option<Minf>>
362+
where
363+
R: Read + Seek,
364+
{
365+
let Some(stbl) = nested_atom(reader, len, b"stbl", parse_mode)? else {
366+
return Ok(None);
326367
};
327368

328-
let mut stsd = try_vec![0; (stsd.len - 8) as usize];
329-
reader.read_exact(&mut stsd)?;
369+
let mut stsd_data = None;
370+
let mut stts = None;
371+
372+
let mut read = 8;
373+
while read < stbl.len {
374+
let Some(atom) = reader.next()? else { break };
375+
376+
read += atom.len;
377+
378+
if let AtomIdent::Fourcc(fourcc) = atom.ident {
379+
match &fourcc {
380+
b"stsd" => {
381+
let mut stsd = try_vec![0; (atom.len - 8) as usize];
382+
reader.read_exact(&mut stsd)?;
383+
stsd_data = Some(stsd);
384+
},
385+
b"stts" => stts = Some(read_stts(reader)?),
386+
_ => {
387+
skip_unneeded(reader, atom.extended, atom.len)?;
388+
},
389+
}
330390

331-
let mut cursor = Cursor::new(&*stsd);
391+
continue;
392+
}
393+
}
332394

333-
let mut stsd_reader = AtomReader::new(&mut cursor, parse_mode)?;
395+
let Some(stsd_data) = stsd_data else {
396+
return Ok(None);
397+
};
334398

399+
Ok(Some(Minf { stsd_data, stts }))
400+
}
401+
402+
fn read_stsd<R>(reader: &mut AtomReader<R>, properties: &mut Mp4Properties) -> Result<()>
403+
where
404+
R: Read + Seek,
405+
{
335406
// Skipping 4 bytes
336407
// Version (1)
337408
// Flags (3)
338-
stsd_reader.seek(SeekFrom::Current(4))?;
339-
let num_sample_entries = stsd_reader.read_u32()?;
409+
reader.seek(SeekFrom::Current(4))?;
410+
let num_sample_entries = reader.read_u32()?;
340411

341412
for _ in 0..num_sample_entries {
342-
let Some(atom) = stsd_reader.next()? else {
413+
let Some(atom) = reader.next()? else {
343414
err!(BadAtom("Expected sample entry atom in `stsd` atom"))
344415
};
345416

@@ -348,9 +419,9 @@ where
348419
};
349420

350421
match fourcc {
351-
b"mp4a" => mp4a_properties(&mut stsd_reader, &mut properties)?,
352-
b"alac" => alac_properties(&mut stsd_reader, &mut properties)?,
353-
b"fLaC" => flac_properties(&mut stsd_reader, &mut properties)?,
422+
b"mp4a" => mp4a_properties(reader, properties)?,
423+
b"alac" => alac_properties(reader, properties)?,
424+
b"fLaC" => flac_properties(reader, properties)?,
354425
// Maybe do these?
355426
// TODO: dops (opus)
356427
// TODO: wave (https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap3/qtff3.html#//apple_ref/doc/uid/TP40000939-CH205-134202)
@@ -371,23 +442,85 @@ where
371442
},
372443
}
373444

374-
// We do the mdat check up here, so we have access to the entire file
375-
let duration_millis = properties.duration.as_millis();
376-
if duration_millis > 0 {
377-
let overall_bitrate = u128::from(file_length * 8) / duration_millis;
378-
properties.overall_bitrate = overall_bitrate as u32;
445+
// We only want to read the properties of the first stream
446+
// that we can actually recognize
447+
break;
448+
}
379449

380-
if properties.audio_bitrate == 0 {
381-
log::warn!("Estimating audio bitrate from 'mdat' size");
450+
Ok(())
451+
}
452+
453+
pub(super) fn read_properties<R>(
454+
reader: &mut AtomReader<R>,
455+
traks: &[AtomInfo],
456+
file_length: u64,
457+
parse_mode: ParsingMode,
458+
) -> Result<Mp4Properties>
459+
where
460+
R: Read + Seek,
461+
{
462+
// We need the mdhd and minf atoms from the audio track
463+
let TrakChildren { mdhd, minf } = get_trak_children(reader, traks)?;
382464

383-
properties.audio_bitrate =
384-
(u128::from(mdat_length(reader)? * 8) / duration_millis) as u32;
465+
reader.seek(SeekFrom::Start(mdhd.start + 8))?;
466+
let Mdhd {
467+
timescale,
468+
duration,
469+
} = read_mdhd(reader)?;
470+
471+
// We create the properties here, since it is possible the other information isn't available
472+
let mut properties = Mp4Properties::default();
473+
474+
if timescale > 0 {
475+
let duration_millis = (duration * 1000).div_round(u64::from(timescale));
476+
properties.duration = Duration::from_millis(duration_millis);
477+
}
478+
479+
// We need an `mdhd` atom at the bare minimum, everything else can be optional.
480+
let Some(minf_info) = minf else {
481+
return Ok(properties);
482+
};
483+
484+
reader.seek(SeekFrom::Start(minf_info.start + 8))?;
485+
let Some(Minf { stsd_data, stts }) = read_minf(reader, minf_info.len, parse_mode)? else {
486+
return Ok(properties);
487+
};
488+
489+
// `stsd` contains the majority of the audio properties
490+
let mut cursor = Cursor::new(&*stsd_data);
491+
let mut stsd_reader = AtomReader::new(&mut cursor, parse_mode)?;
492+
read_stsd(&mut stsd_reader, &mut properties)?;
493+
494+
// We do the mdat check up here, so we have access to the entire file
495+
if duration > 0 {
496+
// TODO: We should keep track of the `mdat` length when first reading the file.
497+
// This extra read is unnecessary.
498+
let mdat_len = mdat_length(reader)?;
499+
500+
if let Some(stts) = stts {
501+
let stts_specifies_duration = !(stts.len() == 1 && stts[0].sample_duration == 1);
502+
if stts_specifies_duration {
503+
// We do a basic audio bitrate calculation below for each stream type.
504+
// Up here, we can do a more accurate calculation if the duration is available.
505+
let audio_bitrate_bps = (((u128::from(mdat_len) * 8) * u128::from(timescale))
506+
/ u128::from(duration)) as u32;
507+
508+
// kb/s
509+
properties.audio_bitrate = audio_bitrate_bps / 1000;
385510
}
386511
}
387512

388-
// We only want to read the properties of the first stream
389-
// that we can actually recognize
390-
break;
513+
let duration_millis = properties.duration.as_millis();
514+
515+
let overall_bitrate = u128::from(file_length * 8) / duration_millis;
516+
properties.overall_bitrate = overall_bitrate as u32;
517+
518+
if properties.audio_bitrate == 0 {
519+
log::warn!("Estimating audio bitrate from 'mdat' size");
520+
521+
properties.audio_bitrate =
522+
(u128::from(mdat_length(reader)? * 8) / duration_millis) as u32;
523+
}
391524
}
392525

393526
Ok(properties)
@@ -576,7 +709,7 @@ where
576709
return Ok(());
577710
}
578711

579-
// Unlike the mp4a atom, we cannot read the data that immediately follows it
712+
// Unlike the "mp4a" atom, we cannot read the data that immediately follows it
580713
// For ALAC, we have to skip the first "alac" atom entirely, and read the one that
581714
// immediately follows it.
582715
//
@@ -694,7 +827,7 @@ where
694827

695828
while let Ok(Some(atom)) = reader.next() {
696829
if atom.ident == AtomIdent::Fourcc(*b"mdat") {
697-
return Ok(atom.len);
830+
return Ok(atom.len - 8);
698831
}
699832

700833
skip_unneeded(reader, atom.extended, atom.len)?;

lofty/src/properties/tests.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ const MP4_ALAC_PROPERTIES: Mp4Properties = Mp4Properties {
129129
extended_audio_object_type: None,
130130
duration: Duration::from_millis(1428),
131131
overall_bitrate: 331,
132-
audio_bitrate: 1536,
132+
audio_bitrate: 326,
133133
sample_rate: 48000,
134134
bit_depth: Some(16),
135135
channels: 2,

0 commit comments

Comments
 (0)