codecs: m4a: improve seek accuracy

Seeking doesn't work well in M4A files with very few chunks due to the seek method used (chunk based using the info in the 'stco' atom). According to libm4a/demux.c the expected seek resolution using this method is 1/4 to 1/2 seconds. However, ffmpeg generates files with a 1 megabyte chunk size, so the resolution is much worse than expected on some files: around 30-40 seconds at 256kbps. There was a bug with the seek position reported back to Rockbox: the codec pretended it could seek exactly to the requested sample, but it would only seek to the start of a chunk. This could leave the UI in a confusing state because the real playback position was different from what the elapsed time showed. Fix this by recalculating the reported sample position using the chunk start. To fix the low seek accuracy, use the table in the 'stsz' atom to skip individual packets within a chunk. This is very accurate, but it takes a lot of RAM to allocate the table. Currently the table is not allowed to use more than half of the codec RAM, which should suffice for short files on most targets. On files where the table is too large the codec will fall back to the less accurate chunk-based seek method. Change-Id: Ide38ea846c1cdd69691e9b1e1cd87eb0fa11cf78
2022-04-18 14:21:12 +01:00 · 2022-04-18 14:21:12 +01:00 · 4dd3c2b33e
commit 4dd3c2b33e
parent b79eefc858
3 changed files with 127 additions and 53 deletions
--- a/lib/rbcodec/codecs/libm4a/demux.c
+++ b/lib/rbcodec/codecs/libm4a/demux.c
@ -349,6 +349,7 @@ static bool read_chunk_stts(qtmovie_t *qtmovie, size_t chunk_len)
 static bool read_chunk_stsz(qtmovie_t *qtmovie, size_t chunk_len)
 {
    size_t size_remaining = chunk_len - 8;
    uint32_t numsizes, i;
    /* version */
    stream_read_uint8(qtmovie->stream);
@ -369,9 +370,37 @@ static bool read_chunk_stsz(qtmovie_t *qtmovie, size_t chunk_len)
    }
    size_remaining -= 4;
-    qtmovie->res->num_sample_byte_sizes = stream_read_uint32(qtmovie->stream);
+    numsizes = stream_read_uint32(qtmovie->stream);
    size_remaining -= 4;
    /* Because this table can be really large and is only used to improve seek
     * accuracy, it's optional. In that case the seek code will fall back to a
     * less accurate seek method. */
    qtmovie->res->num_sample_byte_sizes = numsizes;
    if (numsizes * sizeof(uint32_t) < CODEC_SIZE * 1 / 2)
        qtmovie->res->sample_byte_sizes = malloc(numsizes * sizeof(uint32_t));
    else
        qtmovie->res->sample_byte_sizes = NULL;
    if (qtmovie->res->sample_byte_sizes)
    {
        for (i = 0; i < numsizes; ++i)
        {
            qtmovie->res->sample_byte_sizes[i] =
                stream_read_uint32(qtmovie->stream);
            size_remaining -= 4;
        }
        if (size_remaining)
        {
            DEBUGF("extra bytes after stsz\n");
        }
    }
    else
    {
        DEBUGF("stsz too large, ignoring it\n");
    }
    if (size_remaining)
    {
        stream_skip(qtmovie->stream, size_remaining);
--- a/lib/rbcodec/codecs/libm4a/m4a.c
+++ b/lib/rbcodec/codecs/libm4a/m4a.c
@ -23,6 +23,13 @@
 #include <inttypes.h>
 #include "m4a.h"
 #undef DEBUGF
 #if defined(DEBUG)
 #define DEBUGF stream->ci->debugf
 #else
 #define DEBUGF(...)
 #endif
 /* Implementation of the stream.h functions used by libalac */
 #define _Swap32(v) do { \
@ -127,76 +134,113 @@ int m4a_check_sample_offset(demux_res_t *demux_res, uint32_t frame, uint32_t *st
    return demux_res->lookup_table[i].offset;
 }
 /* Find the exact or preceding frame in lookup_table[]. Return both frame
 * and byte position of this match. */
 static void gather_offset(demux_res_t *demux_res, uint32_t *frame, uint32_t *offset)
 {
    uint32_t i = 0;
    for (i=0; i<demux_res->num_lookup_table; ++i)
    {
        if (demux_res->lookup_table[i].offset == 0)
            break;
        if (demux_res->lookup_table[i].sample > *frame)
            break;
    }
    i = (i>0) ? i-1 : 0; /* We want the last chunk _before_ *frame. */
    *frame  = demux_res->lookup_table[i].sample;
    *offset = demux_res->lookup_table[i].offset;
 }
 /* Seek to desired sound sample location. Return 1 on success (and modify
- * sound_samples_done and current_sample), 0 if failed.
+ * sound_samples_done and current_sample), 0 if failed. */
 *
 * Find the sample (=frame) that contains the given sound sample, find a best
 * fit for this sample in the lookup_table[], seek to the byte position. */
 unsigned int m4a_seek(demux_res_t* demux_res, stream_t* stream, 
    uint32_t sound_sample_loc, uint32_t* sound_samples_done, 
    int* current_sample)
 {
-    uint32_t i = 0;
+    uint32_t i, sample_i, sound_sample_i;
-    uint32_t tmp_var, tmp_cnt, tmp_dur;
+    uint32_t time, time_cnt, time_dur;
-    uint32_t new_sample = 0;       /* Holds the amount of chunks/frames. */
+    uint32_t chunk, chunk_first_sample;
-    uint32_t new_sound_sample = 0; /* Sums up total amount of samples. */
+    uint32_t offset;
-    uint32_t new_pos;              /* Holds the desired chunk/frame index. */
+    time_to_sample_t *tts_tab = demux_res->time_to_sample;
    sample_offset_t *tco_tab = demux_res->lookup_table;
    uint32_t *tsz_tab = demux_res->sample_byte_sizes;
-    /* First check we have the appropriate metadata - we should always
+    /* First check we have the required metadata - we should always have it. */
     * have it.
     */
    if (!demux_res->num_time_to_samples || !demux_res->num_sample_byte_sizes)
-    { 
+    {
-        return 0; 
+        return 0;
    }
-    /* Find the destination block from time_to_sample array */
+    /* The 'sound_sample_loc' we have is PCM-based and not directly usable.
-    time_to_sample_t *tab = demux_res->time_to_sample;
+     * We need to convert it to an MP4 sample number 'sample_i' first. */
-    while (i < demux_res->num_time_to_samples)
+    sample_i = sound_sample_i = 0;
    for (time = 0; time < demux_res->num_time_to_samples; ++time)
    {
-        tmp_cnt = tab[i].sample_count;
+        time_cnt = tts_tab[time].sample_count;
-        tmp_dur = tab[i].sample_duration;
+        time_dur = tts_tab[time].sample_duration;
-        tmp_var = tmp_cnt * tmp_dur;
+        uint32_t time_var = time_cnt * time_dur;
-        if (sound_sample_loc <= new_sound_sample + tmp_var)
+
        if (sound_sample_loc < sound_sample_i + time_var)
        {
-            tmp_var = (sound_sample_loc - new_sound_sample);
+            time_var = sound_sample_loc - sound_sample_i;
-            new_sample       += tmp_var / tmp_dur;
+            sample_i += time_var / time_dur;
            new_sound_sample += tmp_var;
            break;
        }
-        new_sample       += tmp_cnt;
+
-        new_sound_sample += tmp_var;
+        sample_i       += time_cnt;
-        ++i;
+        sound_sample_i += time_var;
    }
-    /* We know the new sample (=frame), now calculate the file position. */
+    /* Find the chunk after 'sample_i'. */
-    gather_offset(demux_res, &new_sample, &new_pos);
+    for (chunk = 1; chunk < demux_res->num_lookup_table; ++chunk)
    /* We know the new file position, so let's try to seek to it */
    if (stream->ci->seek_buffer(new_pos))
    {
-        *sound_samples_done = new_sound_sample;
+        if (tco_tab[chunk].offset == 0)
-        *current_sample = new_sample;
+            break;
        if (tco_tab[chunk].sample > sample_i)
            break;
    }
    /* The preceding chunk is the one that contains 'sample_i'. */
    chunk--;
    chunk_first_sample = tco_tab[chunk].sample;
    offset = tco_tab[chunk].offset;
    /* Compute the PCM sample number of the chunk's first sample
     * to get an accurate base for sound_sample_i. */
    i = sound_sample_i = 0;
    for (time = 0; time < demux_res->num_time_to_samples; ++time)
    {
        time_cnt = tts_tab[time].sample_count;
        time_dur = tts_tab[time].sample_duration;
        if (chunk_first_sample < i + time_cnt)
        {
            sound_sample_i += (chunk_first_sample - i) * time_dur;
            break;
        }
        i += time_cnt;
        sound_sample_i += time_cnt * time_dur;
    }
    DEBUGF("seek chunk=%lu, sample=%lu, soundsample=%lu, offset=%lu\n",
           (unsigned long)chunk, (unsigned long)chunk_first_sample,
           (unsigned long)sound_sample_i, (unsigned long)offset);
    if (tsz_tab) {
        /* We have a sample-to-bytes table available so we can do accurate
         * seeking. Move one sample at a time and update the file offset and
         * PCM sample offset as we go. */
        for (i = chunk_first_sample;
             i < sample_i && i < demux_res->num_sample_byte_sizes; ++i)
        {
            /* this could be unnecessary */
            if (time_cnt == 0 && ++time < demux_res->num_time_to_samples)
            {
                time_cnt = tts_tab[time].sample_count;
                time_dur = tts_tab[time].sample_duration;
            }
            offset += tsz_tab[i];
            sound_sample_i += time_dur;
            time_cnt--;
        }
    } else {
        /* No sample-to-bytes table available so we can only seek to the
         * start of a chunk, which is often much lower resolution. */
        sample_i = chunk_first_sample;
    }
    if (stream->ci->seek_buffer(offset))
    {
        *sound_samples_done = sound_sample_i;
        *current_sample = sample_i;
        return 1;
    }
-    
+
    return 0;
 }
--- a/lib/rbcodec/codecs/libm4a/m4a.h
+++ b/lib/rbcodec/codecs/libm4a/m4a.h
@ -80,6 +80,7 @@ typedef struct
    time_to_sample_t *time_to_sample;
    uint32_t num_time_to_samples;
    uint32_t *sample_byte_sizes;
    uint32_t num_sample_byte_sizes;
    uint32_t codecdata_len;