#!/usr/local/bin/perl
# $Id: /mirror/trunk/bin/g2b.pl 39 2006-12-12T15:44:19.891338Z kcwu  $

$VERSION = '0.12';

=head1 NAME

g2b.pl - Simplified to Traditional Chinese converter

=head1 SYNOPSIS

B<g2b.pl> [ B<-p> ] [ B<-u> ] [ I<inputfile> ...] > I<outputfile>

=head1 USAGE

    % g2b.pl -p < gbk.txt > big5.txt
    % g2b.pl -pu < simp.txt > trad.txt

=head1 DESCRIPTION

The B<g2b.pl> utility reads files sequentially, converts them from
Simplified to Traditional Chinese, then writes them to the standard
output.  The I<inputfile> arguments are processed in command-line order.
If I<inputfile> is a single dash (C<->) or absent, this program reads
from the standard input.

The C<-p> switch enables rudimentary phrase-oriented substition via a
small built-in lexicon.

The C<-u> switch specifies that both the input and output streams should
be UTF-8 encoded.  If not specified, the input stream is assumed to be
in GBK, and the output will be encoded in Big5.

=head1 CAVEATS

In pure-perl implementations (pre-5.8 perl or without a C compiler),
C<-p> and C<-u> cannot be used together.

=cut

use strict;
use Getopt::Std;

sub MAP ();

my %opts;
BEGIN {
    getopts('hup', \%opts);
    if ($opts{h}) { system("perldoc", $0); exit }
    $SIG{__WARN__} = sub {};
}

use constant UTF8 => $opts{u};
use constant DICT => ($opts{p} and (!UTF8 or $] >= 5.008));

use Encode::HanConvert;

my $KEYS = join('|', map quotemeta, sort { length($b) <=> length($a) } keys %{+MAP}) if DICT;
my $MAP  = +MAP if DICT;

if (@ARGV) {
    for (@ARGV) {
	unless(open F, $_) {
	    warn "Can't open $_: $!";
	    next;
	}
	convert(\*F);
	close F;
    }
} else {
    convert(\*STDIN);
}

sub convert {
    my ($fh) = @_;
    if ($] >= 5.008) {
	if (UTF8) {
	    binmode($fh, ':encoding(simp-trad)'); binmode(STDOUT, ':utf8')
	} else {
	    binmode($fh, ':encoding(gbk-trad)'); binmode(STDOUT, ':encoding(big5)')
	}
    }
    while (<$fh>) {
	unless ($] >= 5.008) {
	    if (UTF8) { Encode::HanConvert::simp_to_trad($_) }
	    else { Encode::HanConvert::gb_to_big5($_) }
	}
	if (DICT) { s/($KEYS)/$MAP->{$1}/g }
	print;
    }
}

use constant MAP => DICT && {
    map { UTF8 ? Encode::decode(big5 => $_) : $_ } reverse (
'A' => 'H',
'AӺ' => 'HӺ',
'EEk' => 'EE',
'K' => 'K`',
'XC' => 'C',
'_I' => '_I',
'H' => '`',
'' => 'f',
'' => 'ե',
'ƥ]' => ']',
'ɵҸ' => 'ɯŦҸ',
'ɾ' => 'ɧ',
'̧x' => 'ˮx',
'ӪŤH' => 'ѭ',
'ӪŦ' => 'tzA',
'Ӫű' => 'ѭ',
'ӪŲ' => 'tz',
'ګ' => 'ຸڳ',
'fWï' => 'fï',
'䴩' => '',
'' => '',
'麸ҥ' => 'հҥ',
'ı׶' => '',
'' => '',
'y' => '',
'\\' => '',
']' => 'c',
'vFL' => 'jL',
'~Ӫ' => '~hŶ',
'' => 'e',
'' => '',
'@~' => 'æާ@',
'' => 'æݤf',
'u' => 'æu',
'X' => '',
'' => '',
'' => 'ƾ',
'hy' => '𴵴',
'' => '',
'о' => 'X',
'' => '',
'@' => '@',
'B' => 'ΦB',
'CL' => 'L',
'L' => 'L',
'V' => 'Vۥ',
']' => ']l',
'j' => '`',
'^' => 'T',
'h[' => 'h̥[',
'r' => 'rŦ',
'r' => 'e',
's' => 'sL',
'Ⱦ' => 'ھ',
'жq' => 'q',
'' => 'JX',
'褸' => '',
'}' => 'a}',
'C' => 'C',
'' => '',
'A' => 'AȾ',
'@~t' => 'ާ@t',
'Bu' => 'B',
'Cy' => 'CŻy',
'Y' => '',
'l' => 'lۥ',
'Դ' => 'xԴ',
'ǦC' => 'ݤf',
'˰' => '',
'FԪo' => 'Ԫo',
'FQaԧB' => 'FSԧB',
'HA' => 'IA',
'M' => '^M',
'n' => 'n',
'g' => 'gO@',
'Ӻֽu' => 'ӽƽu',
'L' => '䥦',
'w' => 'ĳ',
'b' => 'uʱ',
'P' => '~',
'}' => '}ۥ',
'ݸO' => 'ݭO',
'`}' => '}`',
'`' => '`',
'' => '§',
'' => 'H',
'' => '',
'ѥl' => 'Ѥl',
'' => 'ֺ',
'' => 'ֺ',
'q' => 'ֺ',
'ť' => 'Ů',
'N' => 'CN',
'w' => '|',
'i' => 'iS',
'F' => 'qǪL',
'cM' => 'KM',
'n' => 'f',
'˳H^' => '˳H',
'' => 'w',
'A' => 'A',
'HѤ' => 'Ѥ',
'Mg' => 'M',
'ȥ' => 'ȱۥ',
'ۮe' => 'ݮe',
'Cϸ`' => 'tϸ`',
'I' => 'Iۥ',
'^o' => '^T',
'pɾ' => 'wɾ',
'p{' => 'X',
'p' => 'p⾹',
'}i' => '}',
'' => '^',
'' => '\\',
'˷' => '˾`',
'Ծ' => 'Ծ',
'M˳n' => 'n]',
'y' => '',
'z' => 'zۥ',
'}' => 'ݵ}',
'' => 'ݳ',
'b' => 'ѽb',
'' => '@',
'BL' => 'L',
'QC' => 'Q',
'QHH' => 'QII',
'}r' => 'hr',
'D' => 'y',
'Q' => '',
'ż' => 'Qż',
'޻K' => 'vK',
'㰩' => 'm',
'`p' => '`p',
'' => 'g',
'O' => 's',
'sf' => 'st',
'}C' => 'Ʋ',
'' => '',
'Ƶ{' => 'l{',
'ɦW' => 'XiW',
'հն' => 'ԩԶ',
'Ū' => 'uŪ',
'|' => '',
'M' => '',
'`' => 'ҵ{',
'dDf' => 'dIfJ',
'|' => 'ֱ覡',
'y' => 'y',
'Ұ' => 'E',
'wz' => 'Īѩwz',
'd' => '[',
'˻' => '',
'S' => 'S',
'J' => '',
'n' => '',
'qT' => 'qTï',
'qD' => 'HD',
'su' => 'p',
't' => '\\',
'Z' => '`',
'HH' => 'II',
'KK' => '',
'¤l' => 'l',
'¶Ѷ' => 'ĽѶ',
'·' => 'F',
'ټM' => '~M',
'' => 'ݴ',
'' => '',
'' => '',
'' => '',
'z' => '',
'' => '',
'Lka' => '^ka',
'fX' => 'U',
'nX' => '`P',
'w' => 'w',
'{' => '{',
'{Ǳ' => 'L{',
'' => '',
'' => '',
'' => 'o',
'' => '',
'y' => 'Sy',
'Ҭr' => 'Ϭr',
'' => 'f',
'Ws' => 'W챵',
'lϸ' => 'lFsX',
'¨HH' => '¨II',
'ü' => 'H',
'׬y' => '`u',
'콦' => '',
'JԲ{' => 'XJԲ{',
'|' => 'e|',
'P' => 'Pۥ',
'suL' => 'su',
'tHH' => 'tII',
'' => 'B',
'ƹ' => '',
'qjQ' => 'NjQ',
'ta' => 'taȭ',
'ѪR' => 'v',
'ѽX' => 'ĶX',
'պ' => 'ql',
'T' => 'H',
'J' => '[',
'B⤸' => 'ާ@',
'B⦡' => 'F',
'O~' => 'Ʀ~',
'P@' => 'Q@',
'dI' => 'I',
'hD' => '',
'pgL' => 'EL',
'p' => '',
'q' => '',
'qǵT' => 'Ϥq',
'q' => 'p',
'q{' => 'p{',
'q' => 'q',
'ù' => 'o',
'ϥ' => 'ϼ',
'GF' => 'GH',
'PX' => 'X',
'jf' => 'f',
'tk' => 'k',
'E' => '',
'' => '詥',
'Ϥ' => 'ϽL',
'ϭy' => 'ϹD',
'ϰ' => '',
'Ϻ' => 'ϽL',
'Ϻо' => 'ϽLXʾ',
'Ϻ' => 'ϽL',
'үX' => '',
'ְL' => 'ֺL',
'' => '_',
'' => '',
'éégg' => 'XX',
'ég' => 'X',
'`' => 'j',
'h' => 'b',
'|' => '|',
'' => '{',
'' => '',
'f' => '\\f',
'۰' => '\\\\۰',
'x' => '',
'ƾھ' => 'ըѽվ',
'Ȧs' => 'ws',
'ñ' => '',
'Ҳ' => 'Ҷ',
'' => 'u',
'p' => 'up⾹',
'Y' => 'L',
'd' => 'ҪO',
'kk' => 'JJ',
'k' => 'J',
'uW@~' => 'pާ@',
'' => '',
'R' => 'ľR',
'A~' => 'A',
'Bn' => '̽',
'K' => 'K',
'HH' => 'II',
'' => '',
'p' => 'Ap',
'nq' => 'q',
'ù' => '̹',
'Ը' => 't',
'հҦa' => 'հҩ',
'հҲ' => 'հҩ',
'v' => '',
'J' => '箫',
'z' => 'zB',
'V' => '',
'ͥ' => 'ͱۥ',
'' => 'ҿ',
'鿤' => '',
'NO' => 'BNO',
'n' => 'H',
'²' => 'tܤZ',
'Ǿ' => 'ɾ',
'Ȥ' => '',
'I' => 'I',
'T' => 'n',
']]' => 'VV',
'ùLd' => 'ùd',
'ùù' => 'oo',
'ùwq' => 'ùoq',
'ùù' => 'oo',
'ùù۶' => 'oo۶',
'ѧOr' => 'Ѳ',
'߽' => 'k',
'' => '',
'r' => '',
'' => '',
'T' => '',
'a' => 'K⦡',
'' => 'rq',
'Iܹ' => 'Tܹ',
'Jg' => 'E',
'v' => 'OP',
'Ūd' => 'd\\Ū',
'B' => '',
'eӧg' => 'ڮӧg',
'Ʀ' => '',
'Y' => 'AY',
'带' => '¦',
'' => '',
'P' => 'J',
'' => 'o',
'￦X' => 'X',
'^' => '^`',
'줸' => 'r`',
'쵲' => '챵',
'KX' => 'fO',
'u' => '',
'v' => '',
'y' => '',
'z' => '',
'g' => '٬',
'g|' => 'ٹ|',
'g' => 'ٵ',
'ie' => 'xe',
'iO' => 'xO',
'iW' => 'xW',
'OW' => 'xW',
'O_' => 'x_',
'On' => 'xn',
'䭷' => 'x',
'ѯ' => '',
'ѥ]' => ']',
'ѱ' => '',
'' => '',
'' => '',
'' => 'Z',
'' => 'Z',
) }
__END__

=head1 SEE ALSO

L<b2g.pl>, L<Encode::HanConvert>

=head1 AUTHORS

Currently maintained by Kuang-che Wu E<lt>kcwu@csie.orgE<gt>.  Orignal author:
Audrey Tang E<lt>cpan@audreyt.orgE<gt>

=head1 COPYRIGHT

Copyright 2002, 2003, 2004 by Audrey Tang E<lt>cpan@audreyt.orgE<gt>.
Copyright 2006 by Kuang-che Wu E<lt>kcwu@csie.orgE<gt>.

This program is free software; you can redistribute it and/or 
modify it under the same terms as Perl itself.

See L<http://www.perl.com/perl/misc/Artistic.html>

=cut
