forked from len0rd/rockbox
		
	git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12769 a1c6a512-1295-4272-9138-f99709370657
		
			
				
	
	
		
			620 lines
		
	
	
	
		
			16 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			620 lines
		
	
	
	
		
			16 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable file
		
	
	
	
	
| #!/usr/bin/perl -s
 | |
| #             __________               __   ___.
 | |
| #   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
 | |
| #   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
 | |
| #   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
 | |
| #   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
 | |
| #                     \/            \/     \/    \/            \/
 | |
| # $Id$
 | |
| #
 | |
| # Copyright (C) 2006 - 2007 by Daniel Stenberg
 | |
| #
 | |
| 
 | |
| # binary version for the binary lang file
 | |
| my $langversion = 3; # 3 was the latest one used in the v1 format
 | |
| 
 | |
| # A note for future users and readers: The original v1 language system allowed
 | |
| # the build to create and use a different language than english built-in. We
 | |
| # removed that feature from our build-system, but the build scripts still had
 | |
| # the ability. But, starting now, this ability is no longer provided since I
 | |
| # figured it was boring and unnecessary to write support for now since we
 | |
| # don't use it anymore.
 | |
| 
 | |
| if(!$ARGV[0]) {
 | |
|     print <<MOO
 | |
| Usage: genlang [options] <langv2 file>
 | |
| 
 | |
|  -p=<prefix>
 | |
|     Make the tool create a [prefix].c and [prefix].h file.
 | |
| 
 | |
|  -b=<outfile>
 | |
|     Make the tool create a binary language (.lng) file namaed [outfile].
 | |
|     The use of this option requires that you also use -e.
 | |
| 
 | |
|  -u
 | |
|     Update language file. Given the translated file and the most recent english
 | |
|     file, you\'ll get an updated version sent to stdout. Suitable action to do
 | |
|     when you intend to update a translation.
 | |
| 
 | |
|  -e=<english lang file>
 | |
|     Point out the english (original source) file, to use that as master
 | |
|     language template. Used in combination with -b or -u.
 | |
| 
 | |
|  -t=<target>
 | |
|     Specify which target you want the translations/phrases for. Required when
 | |
|     -b or -p is used.
 | |
| 
 | |
|     The target can in fact be specified as numerous different strings,
 | |
|     separated with colons. This will make genlang to use all the specified
 | |
|     strings when searching for a matching phrase.
 | |
| 
 | |
|  -o
 | |
|     Voice mode output. Outputs all id: and voice: lines for the given target!
 | |
| 
 | |
|  -v
 | |
|     Enables verbose (debug) output.
 | |
| MOO
 | |
| ;
 | |
|     exit;
 | |
| }
 | |
| 
 | |
| # How update works:
 | |
| #
 | |
| # 1) scan the english file, keep the whole <phrase> for each phrase.
 | |
| # 2) read the translated file, for each end of phrase, compare:
 | |
| #  A) all source strings, if there's any change there should be a comment about
 | |
| #     it output
 | |
| #  B) the desc fields
 | |
| #
 | |
| # 3) output the phrase with the comments from above
 | |
| # 4) check which phrases that the translated version didn't have, and spit out
 | |
| #    the english version of those
 | |
| #
 | |
| 
 | |
| my $prefix = $p;
 | |
| my $binary = $b;
 | |
| my $update = $u;
 | |
| 
 | |
| my $english = $e;
 | |
| my $voiceout = $o;
 | |
| 
 | |
| my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0);
 | |
| 
 | |
| if($check > 1) {
 | |
|     print "Please use only one of -p, -u, -o and -b\n";
 | |
|     exit;
 | |
| }
 | |
| if(!$check) {
 | |
|     print "Please use at least one of -p, -u, -o and -b\n";
 | |
|     exit;
 | |
| }
 | |
| if(($binary || $update || $voiceout) && !$english) {
 | |
|     print "Please use -e too when you use -b, -o or -u\n";
 | |
|     exit;
 | |
| }
 | |
| 
 | |
| my $target = $t;
 | |
| if(!$target && !$update) {
 | |
|     print "Please specify a target (with -t)!\n";
 | |
|     exit;
 | |
| }
 | |
| my $verbose=$v;
 | |
| 
 | |
| my %id; # string to num hash
 | |
| my @idnum; # num to string array
 | |
| 
 | |
| my %source; # id string to source phrase hash
 | |
| my %dest; # id string to dest phrase hash
 | |
| my %voice; # id string to voice phrase hash
 | |
| 
 | |
| my $input = $ARGV[0];
 | |
| 
 | |
| my @m;
 | |
| my $m="blank";
 | |
| 
 | |
| sub match {
 | |
|     my ($string, $pattern)=@_;
 | |
| 
 | |
|     $pattern =~ s/\*/.?*/g;
 | |
|     $pattern =~ s/\?/./g;
 | |
| 
 | |
|     return ($string =~ /^$pattern\z/);
 | |
| }
 | |
| 
 | |
| sub blank {
 | |
|     # nothing to do
 | |
| }
 | |
| 
 | |
| my %head;
 | |
| sub header {
 | |
|     my ($full, $n, $v)=@_;
 | |
|     $head{$n}=$v;
 | |
| }
 | |
| 
 | |
| my %phrase;
 | |
| sub phrase {
 | |
|     my ($full, $n, $v)=@_;
 | |
|     $phrase{$n}=$v;
 | |
| }
 | |
| 
 | |
| sub parsetarget {
 | |
|     my ($debug, $strref, $full, $n, $v)=@_;
 | |
|     my $string;
 | |
|     my @all= split(" *, *", $n);
 | |
|     my $test;
 | |
|     for $test (@all) {
 | |
| #        print "TEST ($debug) $target for $test\n";
 | |
|         for my $part (split(":", $target)) {
 | |
|             if(match($part, $test)) {
 | |
|                 $string = $v;
 | |
| #                print "MATCH: $test => $v\n";
 | |
|                 $$strref = $string;
 | |
|                 return $string;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| my $src;
 | |
| sub source {
 | |
|     parsetarget("src", \$src, @_);
 | |
| }
 | |
| 
 | |
| my $dest;
 | |
| sub dest {
 | |
|     parsetarget("dest", \$dest, @_);
 | |
| }
 | |
| 
 | |
| my $voice;
 | |
| sub voice {
 | |
|     parsetarget("voice", \$voice, @_);
 | |
| }
 | |
| 
 | |
| my %idmap;
 | |
| my %english;
 | |
| if($english) {
 | |
|     # For the cases where the english file needs to be scanned/read, we do
 | |
|     # it before we read the translated file. For -b it isn't necessary, but for
 | |
|     # -u it is convenient.
 | |
| 
 | |
|     my $idnum=0; # start with a true number
 | |
|     my $vidnum=0x8000; # first voice id
 | |
|     open(ENG, "<$english") || die "can't open $english";
 | |
|     my @phrase;
 | |
|     my $id;
 | |
|     while(<ENG>) {
 | |
| 
 | |
|         # get rid of DOS newlines
 | |
|         $_ =~ s/\r//g;
 | |
| 
 | |
|         if($_ =~ /^ *\<phrase\>/) {
 | |
|             # this is the start of a phrase
 | |
|         }
 | |
|         elsif($_ =~ /^ *\<\/phrase\>/) {
 | |
|             # this is the end of a phrase, add it to the english hash
 | |
|             $english{$id}=join("", @phrase);
 | |
|             undef @phrase;
 | |
|         }
 | |
|         elsif($_ ne "\n") {
 | |
|             # gather everything related to this phrase
 | |
|             push @phrase, $_;
 | |
|         }
 | |
| 
 | |
|         if($_ =~ /^ *id: ([^ \t\n]+)/i) {
 | |
|             $id=$1;
 | |
|             # voice-only entries get a difference range
 | |
|             if($id =~ /^VOICE_/) {
 | |
|                 # Assign an ID number to this entry
 | |
|                 $idmap{$id}=$vidnum;
 | |
|                 $vidnum++;
 | |
|             }
 | |
|             else {
 | |
|                 # Assign an ID number to this entry
 | |
|                 $idmap{$id}=$idnum;
 | |
|                 $idnum++;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     close(ENG);
 | |
| }
 | |
| 
 | |
| # a function that compares the english phrase with the translated one.
 | |
| # compare source strings and desc
 | |
| 
 | |
| # Then output the updated version!
 | |
| sub compare {
 | |
|     my ($idstr, $engref, $locref)=@_;
 | |
|     my ($edesc, $ldesc);
 | |
|     my ($esource, $lsource);
 | |
|     my $mode=0;
 | |
|     
 | |
|     for my $l (@$engref) {
 | |
|         if($l =~ /^ *desc: (.*)/) {
 | |
|             $edesc=$1;
 | |
|         }
 | |
|         elsif($l =~ / *\<source\>/i) {
 | |
|             $mode=1;
 | |
|         }
 | |
|         elsif($mode) {
 | |
|             if($l =~ / *\<\/source\>/i) {
 | |
|                 last;
 | |
|             }
 | |
|             $esource .= "$l\n";
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     my @show;
 | |
|     my @source;
 | |
| 
 | |
|     $mode = 0;
 | |
|     for my $l (@$locref) {
 | |
|         if($l =~ /^ *desc: (.*)/) {
 | |
|             $ldesc=$1;
 | |
|             if($edesc ne $ldesc) {
 | |
|                 $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n  desc: $edesc\n";
 | |
|             }
 | |
|             push @show, $l;
 | |
|         }
 | |
|         elsif($l =~ / *\<source\>/i) {
 | |
|             $mode=1;
 | |
|             push @show, $l;
 | |
|         }
 | |
|         elsif($mode) {
 | |
|             if($l =~ / *\<\/source\>/i) {
 | |
|                 $mode = 0;
 | |
|                 print @show;
 | |
|                 if($esource ne $lsource) {
 | |
|                     print "### The <source> section differs from the english!\n",
 | |
|                     "### the previously used one is commented below:\n";
 | |
|                     for(split("\n", $lsource)) {
 | |
|                         print "### $_\n";
 | |
|                     }
 | |
|                     print $esource;
 | |
|                 }
 | |
|                 else {
 | |
|                     print $lsource;
 | |
|                 }
 | |
|                 undef @show; # start over
 | |
| 
 | |
|                 push @show, $l;
 | |
|             }
 | |
|             else {
 | |
|                 $lsource .= "$l";
 | |
|             }
 | |
|         }
 | |
|         else {
 | |
|             push @show, $l;
 | |
|         }
 | |
|     }
 | |
| 
 | |
| 
 | |
|     print @show;
 | |
| }
 | |
| 
 | |
| my $idcount;        # counter for lang ID numbers
 | |
| my $voiceid=0x8000; # counter for voice-only ID numbers
 | |
| 
 | |
| #
 | |
| # Now start the scanning of the selected language string
 | |
| #
 | |
| 
 | |
| open(LANG, "<$input") || die "couldn't read language file named $input\n";
 | |
| my @phrase;
 | |
| while(<LANG>) {
 | |
| 
 | |
|     $line++;
 | |
| 
 | |
|     # get rid of DOS newlines
 | |
|     $_ =~ s/\r//g;
 | |
| 
 | |
|     if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
 | |
|         # comment or empty line
 | |
|         next;
 | |
|     }
 | |
| 
 | |
|     my $ll = $_;
 | |
| 
 | |
|     # print "M: $m\n";
 | |
| 
 | |
|     push @phrase, $ll;
 | |
| 
 | |
|     # this is an XML-lookalike tag
 | |
|     if (/^(<|[^\"<]+<)([^>]*)>/) {
 | |
|         my $part = $2;
 | |
|         # print "P: $part\n";
 | |
| 
 | |
|         if($part =~ /^\//) {
 | |
|             # this was a closing tag
 | |
| 
 | |
|             if($part eq "/phrase") {
 | |
|                 # closing the phrase
 | |
| 
 | |
|                 my $idstr = $phrase{'id'};
 | |
|                 my $idnum;
 | |
| 
 | |
|                 if($dest =~ /^none\z/i) {
 | |
|                     # "none" as dest (without quotes) means that this entire
 | |
|                     # phrase is to be ignored
 | |
|                 }
 | |
|                 elsif(!$update) {
 | |
|                     # we don't do the fully detailed analysis when we "update"
 | |
|                     # since we don't do it for a particular target etc
 | |
| 
 | |
|                     # allow the keyword 'deprecated' to be used on dest and
 | |
|                     # voice strings to mark that as deprecated. It will then
 | |
|                     # be replaced with "".
 | |
| 
 | |
|                     $dest =~ s/^deprecate(|d)\z/\"\"/i;
 | |
|                     $voice =~ s/^deprecate(|d)\z/\"\"/i;
 | |
| 
 | |
|                     # basic syntax error alerts, if there are no quotes we
 | |
|                     # will assume an empty string was intended
 | |
|                     if($dest !~ /^\"/) {
 | |
|                         print STDERR "Warning: dest before $input line $line lacks quotes ($dest)!\n";
 | |
|                         $dest='""';
 | |
|                     }
 | |
|                     if($src !~ /^\"/) {
 | |
|                         print STDERR "Warning: source before $input line $line lacks quotes ($src)!\n";
 | |
|                         $src='""';
 | |
|                     }
 | |
|                     if($voice !~ /^\"/) {
 | |
|                         print STDERR "Warning: voice before $input line $line lacks quotes ($voice)!\n";
 | |
|                         $voice='""';
 | |
|                     }
 | |
| 
 | |
|                     # Use the ID name to figure out which id number range we
 | |
|                     # should use for this phrase. Voice-only strings are
 | |
|                     # separated.
 | |
| 
 | |
|                     if($idstr =~ /^VOICE/) {
 | |
|                         $idnum = $voiceid++;
 | |
|                     }
 | |
|                     else {
 | |
|                         $idnum = $idcount++;
 | |
|                     }
 | |
|                     
 | |
|                     $id{$idstr} = $idnum;
 | |
|                     $idnum[$idnum]=$idstr;
 | |
|                     
 | |
|                     $source{$idstr}=$src;
 | |
|                     $dest{$idstr}=$dest;
 | |
|                     $voice{$idstr}=$voice;
 | |
|                     
 | |
|                     if($verbose) {
 | |
|                         print "id: $phrase{id} ($idnum)\n";
 | |
|                         print "source: $src\n";
 | |
|                         print "dest: $dest\n";
 | |
|                         print "voice: $voice\n";
 | |
|                     }
 | |
| 
 | |
|                     undef $src;
 | |
|                     undef $dest;
 | |
|                     undef $voice;
 | |
|                     undef %phrase;
 | |
|                 }
 | |
| 
 | |
|                 if($update) {
 | |
|                     my $e = $english{$idstr};
 | |
| 
 | |
|                     if($e) {
 | |
|                         # compare original english with this!
 | |
|                         my @eng = split("\n", $english{$idstr});
 | |
| 
 | |
|                         compare($idstr, \@eng, \@phrase);
 | |
| 
 | |
|                         $english{$idstr}=""; # clear it
 | |
|                     }
 | |
|                     else {
 | |
|                         print "### $idstr: The phrase is not used. Skipped\n";
 | |
|                     }
 | |
|                 }
 | |
|                 undef @phrase;
 | |
| 
 | |
|             } # end of </phrase>
 | |
| 
 | |
|             # starts with a slash, this _ends_ this section
 | |
|             $m = pop @m; # get back old value, the previous level's tag
 | |
|             next;
 | |
|         } # end of tag close
 | |
| 
 | |
|         # This is an opening (sub) tag
 | |
| 
 | |
|         push @m, $m; # store old value
 | |
|         $m = $part;
 | |
|         next;
 | |
|     }
 | |
| 
 | |
|     if(/^ *([^:]+): *(.*)/) {
 | |
|         my ($name, $val)=($1, $2);
 | |
|         &$m($_, $name, $val);
 | |
|     }
 | |
| }
 | |
| close(LANG);
 | |
| 
 | |
| if($update) {
 | |
|     my $any=0;
 | |
|     for(keys %english) {
 | |
|         if($english{$_}) {
 | |
|             print "###\n",
 | |
|             "### This phrase below was not present in the translated file\n",
 | |
|             "<phrase>\n";
 | |
|             print $english{$_};
 | |
|             print "</phrase>\n";
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| if($prefix) {
 | |
|     # We create a .c and .h file
 | |
| 
 | |
|     open(HFILE, ">$prefix.h") ||
 | |
|         die "couldn't create file $prefix.h\n";
 | |
|     open(CFILE, ">$prefix.c") ||
 | |
|         die "couldn't create file $prefix.c\n";        
 | |
| 
 | |
|     print HFILE <<MOO
 | |
| /* This file was automatically generated using genlang */
 | |
| /*
 | |
|  * The str() macro/functions is how to access strings that might be
 | |
|  * translated. Use it like str(MACRO) and expect a string to be
 | |
|  * returned!
 | |
|  */
 | |
| #define str(x) language_strings[x]
 | |
| 
 | |
| /* this is the array for holding the string pointers.
 | |
|    It will be initialized at runtime. */
 | |
| extern unsigned char *language_strings[];
 | |
| /* this contains the concatenation of all strings, separated by \\0 chars */
 | |
| extern const unsigned char language_builtin[];
 | |
| 
 | |
| /* The enum below contains all available strings */
 | |
| enum \{
 | |
| MOO
 | |
|     ;
 | |
| 
 | |
|     print CFILE <<MOO
 | |
| /* This file was automaticly generated using genlang, the strings come
 | |
|    from "$input" */
 | |
|    
 | |
| #include "$prefix.h"
 | |
| 
 | |
| unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
 | |
| const unsigned char language_builtin[] =
 | |
| MOO
 | |
| ;
 | |
| 
 | |
|     # Output the ID names for the enum in the header file
 | |
|     my $i;
 | |
|     for $i (1 .. $idcount) {
 | |
|         my $name=$idnum[$i - 1]; # get the ID name
 | |
|         
 | |
|         $name =~ s/\"//g; # cut off the quotes
 | |
|         
 | |
|         printf HFILE ("    %s,\n", $name);
 | |
|     }
 | |
| 
 | |
| # Output separation marker for last string ID and the upcoming voice IDs
 | |
| 
 | |
|     print HFILE <<MOO
 | |
|     LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
 | |
|     /* --- below this follows voice-only strings --- */
 | |
|     VOICEONLY_DELIMITER = 0x8000,
 | |
| MOO
 | |
|     ;
 | |
| 
 | |
| # Output the ID names for the enum in the header file
 | |
|     for $i (0x8000 .. ($voiceid-1)) {
 | |
|         my $name=$idnum[$i]; # get the ID name
 | |
|         
 | |
|         $name =~ s/\"//g; # cut off the quotes
 | |
|         
 | |
|         printf HFILE ("    %s,\n", $name);
 | |
|     }
 | |
| 
 | |
|     # Output end of enum
 | |
|     print HFILE "\n};\n/* end of generated enum list */\n";
 | |
| 
 | |
|     # Output the target phrases for the source file
 | |
|     for $i (1 .. $idcount) {
 | |
|         my $name=$idnum[$i - 1]; # get the ID
 | |
|         my $dest = $dest{$name}; # get the destination phrase
 | |
|         
 | |
|         $dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
 | |
| 
 | |
|         if(!$dest) {
 | |
|             # this is just to be on the safe side
 | |
|             $dest = '"\0"';
 | |
|         }
 | |
| 
 | |
|         printf CFILE ("    %s\n", $dest);
 | |
|     }
 | |
| 
 | |
| # Output end of string chunk
 | |
|     print CFILE <<MOO
 | |
| ;
 | |
| /* end of generated string list */
 | |
| MOO
 | |
| ;
 | |
| 
 | |
|     close(HFILE);
 | |
|     close(CFILE);
 | |
| } # end of the c/h file generation
 | |
| elsif($binary) {
 | |
|     # Creation of a binary lang file was requested
 | |
| 
 | |
|     # We must first scan the english file to get the correct order of the id
 | |
|     # numbers used there, as that is what sets the id order for all language
 | |
|     # files. The english file is scanned before the translated file was
 | |
|     # scanned.
 | |
| 
 | |
|     open(OUTF, ">$binary") or die "Can't create $binary";
 | |
|     binmode OUTF;
 | |
|     printf OUTF ("\x1a%c", $langversion); # magic lang file header
 | |
| 
 | |
|     # loop over the target phrases
 | |
|     for $i (1 .. $idcount) {
 | |
|         my $name=$idnum[$i - 1]; # get the ID
 | |
|         my $dest = $dest{$name}; # get the destination phrase
 | |
| 
 | |
|         if($dest) {
 | |
|             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 | |
| 
 | |
|             # Now, make sure we get the number from the english sort order:
 | |
|             $idnum = $idmap{$name};
 | |
| 
 | |
|             printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| elsif($voiceout) {
 | |
|     # voice output requested, display id: and voice: strings in a v1-like
 | |
|     # fashion
 | |
| 
 | |
|     my @engl;
 | |
| 
 | |
|     # This loops over the strings in the translated language file order
 | |
|     my @ids = ((0 .. ($idcount-1)));
 | |
|     push @ids, (0x8000 .. ($voiceid-1));
 | |
| 
 | |
|     #for my $id (@ids) {
 | |
|     #    print "$id\n";
 | |
|     #}
 | |
| 
 | |
|     for $i (@ids) {
 | |
|         my $name=$idnum[$i]; # get the ID
 | |
|         my $dest = $voice{$name}; # get the destination voice string
 | |
| 
 | |
|         if($dest) {
 | |
|             $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
 | |
| 
 | |
|             # Now, make sure we get the number from the english sort order:
 | |
|             $idnum = $idmap{$name};
 | |
| 
 | |
|             $engl[$idnum] = $i;
 | |
| 
 | |
|            # print "Input index $i output index $idnum\n";
 | |
| 
 | |
|         }
 | |
|     }
 | |
|     for my $i (@ids) {
 | |
| 
 | |
|         my $o = $engl[$i];
 | |
| 
 | |
|         my $name=$idnum[$o]; # get the ID
 | |
|         my $dest = $voice{$name}; # get the destination voice string
 | |
|         
 | |
|         print "#$i\nid: $name\nvoice: $dest\n";
 | |
|     }
 | |
|     
 | |
| }
 | |
| 
 | |
| 
 | |
| if($verbose) {
 | |
|     printf("%d ID strings scanned\n", $idcount);
 | |
| 
 | |
|     print "* head *\n";
 | |
|     for(keys %head) {
 | |
|         printf "$_: %s\n", $head{$_};
 | |
|     }
 | |
| }
 | |
| 
 |