4 # The Intltool Message Merger
6 # Copyright (C) 2000, 2002 Free Software Foundation.
7 # Copyright (C) 2000, 2001 Eazel, Inc
9 # Intltool is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU General Public License
11 # version 2 published by the Free Software Foundation.
13 # Intltool is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 # General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 # As a special exception to the GNU General Public License, if you
23 # distribute this file as part of a program that contains a
24 # configuration script generated by Autoconf, you may include it under
25 # the same distribution terms that you use for the rest of that program.
27 # Authors: Maciej Stachowiak <mjs@noisehavoc.org>
28 # Kenneth Christiansen <kenneth@gnu.org>
29 # Darin Adler <darin@bentspoon.com>
31 # Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
34 ## Release information
35 my $PROGRAM = "intltool-merge";
36 my $PACKAGE = "intltool";
43 ## Scalars used by the option stuff
47 my $XML_STYLE_ARG = 0;
48 my $KEYS_STYLE_ARG = 0;
49 my $DESKTOP_STYLE_ARG = 0;
51 my $PASS_THROUGH_ARG = 0;
59 "version" => \$VERSION_ARG,
60 "quiet|q" => \$QUIET_ARG,
61 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
62 "ba-style|b" => \$BA_STYLE_ARG,
63 "xml-style|x" => \$XML_STYLE_ARG,
64 "keys-style|k" => \$KEYS_STYLE_ARG,
65 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
66 "pass-through|p" => \$PASS_THROUGH_ARG,
67 "utf8|u" => \$UTF8_ARG,
68 "cache|c=s" => \$cache_file
75 my %po_files_by_lang = ();
76 my %translations = ();
78 # Use this instead of \w for XML files to handle more possible characters.
79 my $w = "[-A-Za-z0-9._:]";
81 # XML quoted string contents
90 } elsif ($BA_STYLE_ARG && @ARGV > 2) {
93 &ba_merge_translations;
95 } elsif ($XML_STYLE_ARG && @ARGV > 2) {
99 &xml_merge_translations;
101 } elsif ($KEYS_STYLE_ARG && @ARGV > 2) {
105 &keys_merge_translations;
107 } elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) {
110 &desktop_merge_translations;
118 ## Sub for printing release information
121 print "${PROGRAM} (${PACKAGE}) ${VERSION}\n";
122 print "Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.\n\n";
123 print "Copyright (C) 2000-2002 Free Software Foundation, Inc.\n";
124 print "Copyright (C) 2000-2001 Eazel, Inc.\n";
125 print "This is free software; see the source for copying conditions. There is NO\n";
126 print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
130 ## Sub for printing usage information
133 print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n";
134 print "Generates an output file that includes translated versions of some attributes,\n";
135 print "from an untranslated source and a po directory that includes translations.\n\n";
136 print " -b, --ba-style includes translations in the bonobo-activation style\n";
137 print " -d, --desktop-style includes translations in the desktop style\n";
138 print " -k, --keys-style includes translations in the keys style\n";
139 print " -x, --xml-style includes translations in the standard xml style\n";
140 print " -u, --utf8 convert all strings to UTF-8 before merging\n";
141 print " -p, --pass-through use strings as found in .po files, without\n";
142 print " conversion (STRONGLY unrecommended with -x)\n";
143 print " -q, --quiet suppress most messages\n";
144 print " --help display this help and exit\n";
145 print " --version output version information and exit\n";
146 print "\nReport bugs to bugzilla.gnome.org, module intltool, or contact us through \n";
147 print "<xml-i18n-tools-list\@gnome.org>.\n";
152 ## Sub for printing error messages
155 print "Try `${PROGRAM} --help' for more information.\n";
162 print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
173 &get_translation_database;
176 # General-purpose code for looking up translations in .po files
181 $tmp =~ s/^.*\/(.*)\.po$/$1/;
187 for my $po_file (glob "$PO_DIR/*.po") {
188 $po_files_by_lang{po_file2lang($po_file)} = $po_file;
194 my ($in_po_file) = @_;
197 open IN_PO_FILE, $in_po_file or die;
198 while (<IN_PO_FILE>) {
199 ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
200 if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) {
208 print "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n";
209 $encoding = "ISO-8859-1";
214 sub utf8_sanity_check
217 if (!$PASS_THROUGH_ARG) {
218 $PASS_THROUGH_ARG="1";
223 sub get_translation_database
226 &get_cached_translation_database;
228 &create_translation_database;
232 sub get_newest_po_age
236 foreach my $file (values %po_files_by_lang) {
237 my $file_age = -M $file;
238 $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
246 print "Generating and caching the translation database\n" unless $QUIET_ARG;
248 &create_translation_database;
250 open CACHE, ">$cache_file" || die;
251 print CACHE join "\x01", %translations;
257 print "Found cached translation database\n" unless $QUIET_ARG;
260 open CACHE, "<$cache_file" || die;
266 %translations = split "\x01", $contents;
269 sub get_cached_translation_database
271 my $cache_file_age = -M $cache_file;
272 if (defined $cache_file_age) {
273 if ($cache_file_age <= &get_newest_po_age) {
277 print "Found too-old cached translation database\n" unless $QUIET_ARG;
283 sub create_translation_database
285 for my $lang (keys %po_files_by_lang) {
286 my $po_file = $po_files_by_lang{$lang};
289 my $encoding = get_po_encoding ($po_file);
290 if (lc $encoding eq "utf-8") {
291 open PO_FILE, "<$po_file";
293 my $iconv = $ENV{"INTLTOOL_ICONV"} || "iconv";
294 open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
297 open PO_FILE, "<$po_file";
306 $nextfuzzy = 1 if /^#, fuzzy/;
307 if (/^msgid "((\\.|[^\\])*)"/ ) {
308 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
315 $msgid = unescape_po_string($1);
321 if (/^msgstr "((\\.|[^\\])*)"/) {
322 $msgstr = unescape_po_string($1);
326 if (/^"((\\.|[^\\])*)"/) {
327 $msgid .= unescape_po_string($1) if $inmsgid;
328 $msgstr .= unescape_po_string($1) if $inmsgstr;
331 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
339 sub unescape_one_sequence
343 return "\\" if $sequence eq "\\\\";
344 return "\"" if $sequence eq "\\\"";
346 # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
347 # \xXX (hex) and has a comment saying they want to handle \u and \U.
352 sub unescape_po_string
356 $string =~ s/(\\.)/unescape_one_sequence($1)/eg;
374 my ($pre_encoded) = @_;
376 my @list_of_chars = unpack ('C*', $pre_encoded);
378 if ($PASS_THROUGH_ARG) {
379 return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
381 return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
385 sub entity_encode_int_minimalist
387 return """ if $_ == 34;
388 return "&" if $_ == 38;
389 return "'" if $_ == 39;
393 sub entity_encode_int_even_high_bit
395 if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39) {
396 # the ($_ > 127) should probably be removed
397 return "&#" . $_ . ";";
403 sub entity_encoded_translation
405 my ($lang, $string) = @_;
407 my $translation = $translations{$lang, $string};
408 return $string if !$translation;
409 return entity_encode ($translation);
412 ## XML (bonobo-activation specific) merge code
414 sub ba_merge_translations
419 local $/; # slurp mode
420 open INPUT, "<$FILE" or die "can't open $FILE: $!";
425 open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
427 while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) {
430 my $node = $2 . "\n";
434 while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
435 push @strings, entity_decode($3);
440 for my $string (@strings) {
441 for my $lang (keys %po_files_by_lang) {
442 $langs{$lang} = 1 if $translations{$lang, $string};
446 for my $lang (sort keys %langs) {
448 s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
449 s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
454 print OUTPUT $source;
460 ## XML (non-bonobo-activation) merge code
462 sub xml_merge_translations
467 local $/; # slurp mode
468 open INPUT, "<$FILE" or die "can't open $FILE: $!";
473 open OUTPUT, ">$OUTFILE" or die;
475 # FIXME: support attribute translations
477 # Empty nodes never need translation, so unmark all of them.
478 # For example, <_foo/> is just replaced by <foo/>.
479 $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
481 # Support for <_foo>blah</_foo> style translations.
482 while ($source =~ s|^(.*?)([ \t]*)<\s*_($w+)\s*>(.*?)<\s*/_\3\s*>([ \t]*\n)?||s) {
489 print OUTPUT "$spaces<$tag>$string</$tag>\n";
491 $string =~ s/\s+/ /g;
494 $string = entity_decode($string);
496 for my $lang (sort keys %po_files_by_lang) {
497 my $translation = $translations{$lang, $string};
498 next if !$translation;
499 $translation = entity_encode($translation);
500 print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation</$tag>\n";
504 print OUTPUT $source;
509 sub keys_merge_translations
511 open INPUT, "<${FILE}" or die;
512 open OUTPUT, ">${OUTFILE}" or die;
515 if (s/^(\s*)_(\w+=(.*))/$1$2/) {
520 my $non_translated_line = $_;
522 for my $lang (sort keys %po_files_by_lang) {
523 my $translation = $translations{$lang, $string};
524 next if !$translation;
526 $_ = $non_translated_line;
527 s/(\w+)=.*/[$lang]$1=$translation/;
539 sub desktop_merge_translations
541 open INPUT, "<${FILE}" or die;
542 open OUTPUT, ">${OUTFILE}" or die;
545 if (s/^(\s*)_(\w+=(.*))/$1$2/) {
550 my $non_translated_line = $_;
552 for my $lang (sort keys %po_files_by_lang) {
553 my $translation = $translations{$lang, $string};
554 next if !$translation;
556 $_ = $non_translated_line;
557 s/(\w+)=.*/${1}[$lang]=$translation/;