unicode: add iso_decode_ex with utf8 buffer size check

Make use of it in id3tags, playlist and cuesheet Change-Id: Ibc8abc0faf16688bc9b826b7a712d1dfe9bf75b2
2024-12-11 14:31:38 +02:00 · 2024-12-11 14:31:38 +02:00 · 004304dc65
commit 004304dc65
parent f8fa1e7d5a
5 changed files with 56 additions and 37 deletions
--- a/apps/cuesheet.c
+++ b/apps/cuesheet.c
@ -314,7 +314,6 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue)
                break;

            size_t count = MAX_NAME*3 + 1;
-            size_t count8859 = MAX_NAME;

            switch (option)
            {
@ -339,7 +338,6 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue)

                    dest = cue->file;
                    count = MAX_PATH;
-                    count8859 = MAX_PATH/3;
                    break;
                case eCS_TRACK:
                    /*Fall-Through*/
@ -357,8 +355,8 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue)
            {
                if (char_enc == CHAR_ENC_ISO_8859_1)
                {
-                    dest = iso_decode(string, dest, -1,
-                        MIN(strlen(string), count8859));
+                    dest = iso_decode_ex(string, dest, -1,
+                        strlen(string), count - 1);
                    *dest = '\0';
                }
                else
--- a/apps/playlist.c
+++ b/apps/playlist.c
@ -350,14 +350,7 @@ static int convert_m3u_name(char* buf, int buf_len, int buf_max, char* temp)
    buf_len = i;
    dest = temp;

-    /* Convert char by char, so as to not overflow temp (iso_decode should
-     * preferably handle this). No more than 4 bytes should be generated for
-     * each input char.
-     */
-    for (i = 0; i < buf_len && dest < (temp + buf_max - 4); i++)
-    {
-        dest = iso_decode(&buf[i], dest, -1, 1);
-    }
+    dest = iso_decode_ex(buf, dest, -1, buf_len, buf_max - 1);

    *dest = 0;
    strcpy(buf, temp);
--- a/firmware/common/unicode.c
+++ b/firmware/common/unicode.c
@ -245,8 +245,8 @@ static int alloc_and_load_cp_table(int cp, void *buf)
    return -1;
 }

-/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
-unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8)
+/* returns number of additional bytes required in encoded string (bytes_count - 1) */
+static int utf8_ucs_get_extra_bytes_count(unsigned long ucs)
 {
    int tail = 0;

@ -254,17 +254,41 @@ unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8)
        while (ucs >> (5*tail + 6))
            tail++;

+    return tail;
+}
+
+static unsigned char * utf8encode_internal(unsigned long ucs, unsigned char *utf8, int tail)
+{
    *utf8++ = (ucs >> (6*tail)) | utf8comp[tail];
    while (tail--)
        *utf8++ = ((ucs >> (6*tail)) & (MASK ^ 0xFF)) | COMP;
-
    return utf8;
 }

-/* Recode an iso encoded string to UTF-8 */
-unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
-                          int cp, int count)
+static unsigned char* utf8encode_ex(unsigned long ucs, unsigned char *utf8, int* utf8_size)
 {
+    const int tail = utf8_ucs_get_extra_bytes_count(ucs);
+    *utf8_size -= tail + 1;
+    return  *utf8_size < 0 ? utf8 : utf8encode_internal(ucs, utf8, tail);
+}
+
+/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
+unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8)
+{
+    return utf8encode_internal(ucs, utf8, utf8_ucs_get_extra_bytes_count(ucs));
+}
+
+unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, int cp, int count)
+{
+    return iso_decode_ex(iso, utf8, cp, count, -1);
+}
+
+/* Recode an iso encoded string to UTF-8 */
+unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int cp, int count, int utf8_size)
+{
+    if (utf8_size == -1)
+        utf8_size = INT_MAX;
+
    uint16_t *table = NULL;

    cp_lock_enter();
@ -322,11 +346,14 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,

    cp_lock_leave();

-    while (count--) {
+    while (count-- && utf8_size > 0) {
        unsigned short ucs, tmp;

        if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */
+        {
            *utf8++ = *iso++;
+            --utf8_size;
+        }

        else {
            /* tid tells us which table to use and how */
@ -375,7 +402,8 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,

            if (ucs == 0) /* unknown char, use replacement char */
                ucs = 0xfffd;
-            utf8 = utf8encode(ucs, utf8);
+
+            utf8 = utf8encode_ex(ucs, utf8, &utf8_size);
        }
    }

--- a/firmware/include/rbunicode.h
+++ b/firmware/include/rbunicode.h
@ -57,6 +57,8 @@ enum codepages {
 /* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
 unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8);
 unsigned char* iso_decode(const unsigned char *latin1, unsigned char *utf8, int cp, int count);
+unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int cp, int count, int utf8_size);
+
 unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
 unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
 bool utf16_has_bom(const unsigned char *utf16, bool *le);
--- a/lib/rbcodec/metadata/id3tags.c
+++ b/lib/rbcodec/metadata/id3tags.c
@ -568,22 +568,16 @@ static bool parse_as_utf8(char* string, int *len)

 /* Must be called after parse_as_utf8. Checks to see if the passed in string is a 16-bit wide Unicode v2
   string.  If it is, we convert it to a UTF-8 string.  If it's not unicode,
-   we convert from the default codepage */
-static void unicode_munge(char* string, char* utf8buf, int *len) {
+   we convert from the default codepage
+   NOTE: real UTF-8 buffer size is expected to be utf8buf_size + 1 (additional byte for string terminator) */
+static void unicode_munge(unsigned char* string, unsigned char* utf8buf, int *len, int utf8buf_size) {
+    unsigned char *str = string;
+    unsigned char* utf8 = utf8buf;
+
    int i = 0;
-    unsigned char *str = (unsigned char *)string;
    int templen = 0;
-    unsigned char* utf8 = (unsigned char *)utf8buf;

    switch (str[0]) {
-        case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */
-            str++;
-            (*len)--;
-            utf8 = iso_decode(str, utf8, -1, *len);
-            *utf8 = 0;
-            *len = (intptr_t)utf8 - (intptr_t)utf8buf;
-            break;
-
        case 0x01: /* Unicode with or without BOM */
        case 0x02:
            (*len)--;
@ -618,10 +612,15 @@ static void unicode_munge(char* string, char* utf8buf, int *len) {
            *len = templen - 1;
            break;
        /* case 0x03:  UTF-8 encoded string handled by parse_as_utf8 */
+
+        case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */
+            str++;
+            (*len)--;
+        //fallthrough
        default: /* Plain old string */
-            utf8 = iso_decode(str, utf8, -1, *len);
+            utf8 = iso_decode_ex(str, utf8, -1, *len, utf8buf_size);
            *utf8 = 0;
-            *len = (intptr_t)utf8 - (intptr_t)utf8buf;
+            *len = utf8 - utf8buf;
            break;
    }
 }
@ -1103,10 +1102,9 @@ retry_with_limit:
                        {
                            //limit stack allocation to avoid stack overflow
                            utf8_size = ID3V2_BUF_SIZE;
-                            bytesread = ID3V2_BUF_SIZE/3;
                        }
-                        char utf8buf[utf8_size + 1];
-                        unicode_munge( tag, utf8buf, &bytesread);
+                        unsigned char utf8buf[utf8_size + 1];
+                        unicode_munge( (unsigned char *)tag, utf8buf, &bytesread, utf8_size);
                        if(bytesread >= buffersize - bufferpos)
                            bytesread = buffersize - bufferpos - 1;