metadata: Normalize all metadata to Unicode NFC form

* Standalone database tool
 * Simulator builds
 * Target firmware (Hosted and Native, for all >2MB targets)

Change-Id: Ia7361affc2fc6a08e73c31ecc9ef3a4008c2415d
This commit is contained in:
Solomon Peachy 2025-11-08 18:51:43 -05:00
parent 8768266d27
commit 81fcb10f8f
6 changed files with 68 additions and 5 deletions

View file

@ -316,3 +316,7 @@ lto
#if defined(USB_ENABLE_AUDIO)
usbdac
#endif
#if defined(UTF8PROC_EXPORTS)
utf8proc
#endif

View file

@ -44,6 +44,10 @@
#define O_NOISODECODE 0
#endif
#ifdef UTF8PROC_EXPORTS
#include "utf8proc.h"
#endif
#define getle16(p) (p[0] | (p[1] << 8))
#define getbe16(p) ((p[0] << 8) | p[1])
@ -59,8 +63,9 @@
#define open_noiso_internal open
#endif /* !APPLICATION */
#if 0 /* not needed just now (will probably end up a spinlock) */
#include "mutex.h"
#if 0 /* not needed just now (will probably end up a spinlock) */
static struct mutex cp_mutex SHAREDBSS_ATTR;
#define cp_lock_init() mutex_init(&cp_mutex)
#define cp_lock_enter() mutex_lock(&cp_mutex)
@ -651,9 +656,32 @@ const char * get_codepage_name(int cp)
return cp_info[cp].name;
}
#if 0 /* not needed just now */
#ifdef UTF8PROC_EXPORTS
static utf8proc_int32_t normbuf[2048];
static struct mutex norm_mutex SHAREDBSS_ATTR;
void utf8_normalize(char *string)
{
utf8proc_ssize_t result, orig;
if (!string || !*string)
return;
mutex_lock(&norm_mutex);
orig = strlen(string);
result = utf8proc_decompose(string, 0, normbuf, sizeof(normbuf)/4 -1, UTF8PROC_NULLTERM);
if (result > 0) {
result = utf8proc_reencode(normbuf, result, UTF8PROC_NULLTERM|UTF8PROC_COMPOSE|UTF8PROC_STABLE);
if (result > 0 && result <= orig && strcmp((char*)normbuf, string))
strcpy(string, (char*)normbuf);
}
mutex_unlock(&norm_mutex);
}
void unicode_init(void)
{
cp_lock_init();
mutex_init(&norm_mutex);
}
#endif

View file

@ -76,8 +76,9 @@ const char *get_current_codepage_name_linux(void);
#endif
#endif /* APPLICATION */
#if 0 /* not needed just now */
void unicode_init(void);
#ifdef UTF8PROC_EXPORTS
void utf8_normalize(char *string);
void unicode_init(void) INIT_ATTR;
#else
#define unicode_init() do {} while (0)
#endif

View file

@ -27,6 +27,10 @@
#include "logf.h"
#include "metadata.h"
#ifdef UTF8PROC_EXPORTS
#include "rbunicode.h"
#endif
#include "metadata_parsers.h"
/* For trailing tag stripping and base audio data types */
@ -471,6 +475,22 @@ bool get_metadata_ex(struct mp3entry* id3, int fd, const char* trackname, int fl
wipe_mp3entry(id3); /* ensure the mp3entry is clear */
}
#ifdef UTF8PROC_EXPORTS
if (success) {
utf8_normalize(id3->title);
utf8_normalize(id3->artist);
utf8_normalize(id3->album);
utf8_normalize(id3->genre_string);
utf8_normalize(id3->disc_string);
utf8_normalize(id3->track_string);
utf8_normalize(id3->year_string);
utf8_normalize(id3->composer);
utf8_normalize(id3->comment);
utf8_normalize(id3->albumartist);
utf8_normalize(id3->grouping);
}
#endif
if ((flags & METADATA_CLOSE_FD_ON_EXIT))
close(fd);
else

View file

@ -9,6 +9,8 @@
GCCOPTS += -g -DDEBUG -D__PCTOOL__ -DDBTOOL
include $(ROOTDIR)/lib/utf8proc/utf8proc.make
METADATAS := $(wildcard $(ROOTDIR)/lib/rbcodec/metadata/*.c)
DATABASE_SRC = $(call preprocess, $(TOOLSDIR)/database/SOURCES) $(METADATAS)
@ -28,7 +30,7 @@ INCLUDES += -I$(ROOTDIR)/apps/gui \
-I$(APPSDIR) \
-I$(BUILDDIR)
OTHERLIBS := $(FIXEDPOINTLIB)
OTHERLIBS := $(FIXEDPOINTLIB) $(LIBUTF8PROC)
.SECONDEXPANSION: # $$(OBJ) is not populated until after this

View file

@ -128,6 +128,14 @@ else # core
include $(APPSDIR)/apps.make
include $(ROOTDIR)/lib/rbcodec/rbcodec.make
# bootloaders don't get utf8proc
ifeq (,$(findstring checkwps,$(APP_TYPE)))
IS_GREATER := $(shell [ $(MEMORYSIZE) -gt 2 ] && echo true || echo false)
ifeq ($(IS_GREATER),true)
include $(ROOTDIR)/lib/utf8proc/utf8proc.make
endif
endif
ifeq ($(ENABLEDPLUGINS),yes)
include $(APPSDIR)/plugins/bitmaps/pluginbitmaps.make
include $(APPSDIR)/plugins/plugins.make