You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
195 lines
4.3 KiB
195 lines
4.3 KiB
2 months ago
|
#!/usr/bin/perl
|
||
|
#
|
||
|
# Character Set Table Generator 1.0
|
||
|
# (c) 1998 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
|
||
|
#
|
||
|
# This program can be freely distributed and used according to the terms
|
||
|
# of the GNU General Public License.
|
||
|
#
|
||
|
|
||
|
# Internal codes 0..255 are mapped to UniCode 0..255
|
||
|
# Internal code 256 is the replacement character (U#FFFD)
|
||
|
|
||
|
$ncs = 0;
|
||
|
|
||
|
print "/* Generated by tabgen 1.0, please don't edit manually. */\n\n";
|
||
|
|
||
|
print STDERR "Charset list...\n";
|
||
|
|
||
|
while (<>) {
|
||
|
chomp;
|
||
|
(/^\w*$/ || /^#/) && next;
|
||
|
$charsets[$ncs++] = $_;
|
||
|
}
|
||
|
|
||
|
print STDERR "Found $ncs charsets, counting unique codes...\n";
|
||
|
|
||
|
for($unique=0; $unique<256; $unique++) {
|
||
|
$u2x{$unique} = $unique;
|
||
|
$x2u[$unique] = $unique;
|
||
|
}
|
||
|
$u2x{0xFFFD} = $unique;
|
||
|
$x2u[$unique++] = 0xFFFD;
|
||
|
print "static unsigned short int input_to_x[$ncs][256] = {\n";
|
||
|
for($x=0; $x<$ncs; $x++) {
|
||
|
$a = $charsets[$x];
|
||
|
print "\n/* $a */\n{\n";
|
||
|
open (A, $a) || die "Error opening $a";
|
||
|
while (<A>) {
|
||
|
chomp;
|
||
|
(/^\w*$/ || /^#/) && next;
|
||
|
($i, $u, $c) = split /\t/;
|
||
|
$cc[$x][hex $i] = $u;
|
||
|
}
|
||
|
close A;
|
||
|
for($i=0; $i<256; $i++) {
|
||
|
$u = hex((defined $cc[$x][$i]) ? $cc[$x][$i] : "FFFD");
|
||
|
if (!defined $u2x{$u}) {
|
||
|
$x2u[$unique] = $u;
|
||
|
$u2x{$u} = $unique++;
|
||
|
}
|
||
|
$o = $u2x{$u};
|
||
|
print "$o,", ($i % 16 == 15) ? "\n" : " ";
|
||
|
$cc[$x][$i] = $o;
|
||
|
$cx[$x]{$o} = $i;
|
||
|
}
|
||
|
print "},\n";
|
||
|
}
|
||
|
print "};\n\n";
|
||
|
|
||
|
print STDERR "$unique unique codes...\n";
|
||
|
|
||
|
print "static unsigned short int x_to_uni[$unique] = {\n";
|
||
|
for($i=0; $i<$unique; $i++) {
|
||
|
print "$x2u[$i],", ($i % 16 == 15) ? "\n" : " ";
|
||
|
}
|
||
|
if ($i % 16) { print "\n"; }
|
||
|
print "};\n\n";
|
||
|
|
||
|
print STDERR "UNICODE table...\n";
|
||
|
for($i=0; $i<$unique; $i++) {
|
||
|
$u = $x2u[$i];
|
||
|
$p = $u / 256;
|
||
|
$pg[$p] = 1;
|
||
|
}
|
||
|
for($i=0; $i<256; $i++) {
|
||
|
if ($pg[$i]) {
|
||
|
print "static unsigned short int uni_to_x_$i\[256\] = {\n";
|
||
|
for($j=0; $j<256; $j++) {
|
||
|
$u = 256*$i + $j;
|
||
|
$u = defined($u2x{$u}) ? $u2x{$u} : 256;
|
||
|
print "$u,", ($j % 16 == 15) ? "\n" : " ";
|
||
|
}
|
||
|
print "};\n\n";
|
||
|
}
|
||
|
}
|
||
|
print "static unsigned short int *uni_to_x[256] = {\n";
|
||
|
for($i=hex "FF00"; $i<=hex "FFFF"; $i++) {
|
||
|
if (defined $u2x{$i} && $i != 0xFFFD) { die "Invalid replacement strategy!"; }
|
||
|
}
|
||
|
for($i=0; $i<256; $i++) {
|
||
|
print "uni_to_x_", $pg[$i] ? $i : "255", ",", ($i % 4 == 3) ? "\n" : " ";
|
||
|
}
|
||
|
print "};\n\n";
|
||
|
|
||
|
print STDERR "UniData file...\n";
|
||
|
open (U, "unidata/UnicodeData.txt") || die "No UnicodeData file";
|
||
|
while (<U>) {
|
||
|
chomp;
|
||
|
($num,$name,$_,$_,$_,$exp) = split /;/;
|
||
|
if ($exp ne "") {
|
||
|
$exp =~ s/^<.*> *//g;
|
||
|
$a = "";
|
||
|
foreach $x (split (/ /, $exp)) {
|
||
|
if ($x ne "0020") {
|
||
|
$a = $a . " " . hex $x;
|
||
|
}
|
||
|
}
|
||
|
($expand{hex $num} = $a) =~ s/^ //;
|
||
|
}
|
||
|
}
|
||
|
close U;
|
||
|
|
||
|
print STDERR "Accent rules\n";
|
||
|
if (open(ACC, "misc/user_unacc")) {
|
||
|
while (<ACC>) {
|
||
|
chomp;
|
||
|
(/^\s*$/ || /^#/) && next;
|
||
|
s/0x([0-9a-zA-Z]+)/hex($1)/ge;
|
||
|
(/^(\d+)\s+(\d+)$/) || die "Syntax error in user accent rules";
|
||
|
$expand{$1} = $2;
|
||
|
}
|
||
|
close ACC;
|
||
|
}
|
||
|
|
||
|
print STDERR "Character expansions\n";
|
||
|
if (open(EXTRA, "misc/user_expand")) {
|
||
|
while (<EXTRA>) {
|
||
|
chomp;
|
||
|
(/^\s*$/ || /^#/) && next;
|
||
|
s/0x([0-9a-zA-Z]+)/hex($1)/ge;
|
||
|
(/^(\d+)\s+(.*)$/) || die "Syntax error in user expansions";
|
||
|
$expand{$1} = $2;
|
||
|
}
|
||
|
close EXTRA;
|
||
|
}
|
||
|
|
||
|
print "static unsigned short int x_to_output[$ncs][$unique] = {\n";
|
||
|
$pstr = 256;
|
||
|
for($c=0; $c<$ncs; $c++) {
|
||
|
print "\n/* $charsets[$c] */\n{\n";
|
||
|
for($i=0; $i<$unique; $i++) {
|
||
|
$u = $x2u[$i];
|
||
|
do {
|
||
|
$r = $u;
|
||
|
$u = "";
|
||
|
foreach $x (split (/ /, $r)) {
|
||
|
if (defined($k = $u2x{$x}) && defined $cx[$c]{$k}) {
|
||
|
$u = "$u $x";
|
||
|
} elsif (defined($k = $expand{$x})) {
|
||
|
$u = "$u $k";
|
||
|
}
|
||
|
}
|
||
|
$u =~ s/^ //;
|
||
|
} while ($r ne $u);
|
||
|
$u = "";
|
||
|
foreach $x (split (/ /, $r)) {
|
||
|
if (defined($k = $u2x{$x})) {
|
||
|
if ($k != 256 && defined ($k = $cx[$c]{$k})) {
|
||
|
$u = $u . pack("C", $k);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if (length($u) == 1) {
|
||
|
$z = unpack("C", $u);
|
||
|
} else {
|
||
|
if (!defined($string{$u})) {
|
||
|
$string{$u} = $pstr;
|
||
|
$strval{$pstr} = $u;
|
||
|
$pstr += 1 + length($u);
|
||
|
}
|
||
|
$z = $string{$u};
|
||
|
}
|
||
|
print "$z,", ($i % 16 == 15) ? "\n" : " ";
|
||
|
}
|
||
|
if ($i % 16) { print "\n"; }
|
||
|
print "},\n";
|
||
|
}
|
||
|
print "};\n\n";
|
||
|
|
||
|
print STDERR "And Tubular Bells...\n";
|
||
|
print "static unsigned char string_table[] = {\n";
|
||
|
$i = 256;
|
||
|
while ($i < $pstr) {
|
||
|
$w = $strval{$i};
|
||
|
print length $w, ",";
|
||
|
foreach $x (unpack("C256", $w)) {
|
||
|
print " $x,";
|
||
|
}
|
||
|
print "\n";
|
||
|
$i += 1 + length $w;
|
||
|
}
|
||
|
print "};\n";
|
||
|
|
||
|
print STDERR "Done.\n";
|