194 lines
		
	
	
	
		
			4.3 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			194 lines
		
	
	
	
		
			4.3 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable file
		
	
	
	
	
#!/usr/bin/perl
 | 
						|
#
 | 
						|
#	Character Set Table Generator 1.0
 | 
						|
#	(c) 1998 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
 | 
						|
#
 | 
						|
#	This program can be freely distributed and used according to the terms
 | 
						|
#	of the GNU General Public License.
 | 
						|
#
 | 
						|
 | 
						|
# Internal codes 0..255 are mapped to UniCode 0..255
 | 
						|
# Internal code 256 is the replacement character (U#FFFD)
 | 
						|
 | 
						|
$ncs = 0;
 | 
						|
 | 
						|
print "/* Generated by tabgen 1.0, please don't edit manually. */\n\n";
 | 
						|
 | 
						|
print STDERR "Charset list...\n";
 | 
						|
 | 
						|
while (<>) {
 | 
						|
	chomp;
 | 
						|
	(/^\w*$/ || /^#/) && next;
 | 
						|
	$charsets[$ncs++] = $_;
 | 
						|
}
 | 
						|
 | 
						|
print STDERR "Found $ncs charsets, counting unique codes...\n";
 | 
						|
 | 
						|
for($unique=0; $unique<256; $unique++) {
 | 
						|
	$u2x{$unique} = $unique;
 | 
						|
	$x2u[$unique] = $unique;
 | 
						|
}
 | 
						|
$u2x{0xFFFD} = $unique;
 | 
						|
$x2u[$unique++] = 0xFFFD;
 | 
						|
print "static unsigned short int input_to_x[$ncs][256] = {\n";
 | 
						|
for($x=0; $x<$ncs; $x++) {
 | 
						|
	$a = $charsets[$x];
 | 
						|
	print "\n/* $a */\n{\n";
 | 
						|
	open (A, $a) || die "Error opening $a";
 | 
						|
	while (<A>) {
 | 
						|
		chomp;
 | 
						|
		(/^\w*$/ || /^#/) && next;
 | 
						|
		($i, $u, $c) = split /\t/;
 | 
						|
		$cc[$x][hex $i] = $u;
 | 
						|
	}
 | 
						|
	close A;
 | 
						|
	for($i=0; $i<256; $i++) {
 | 
						|
		$u = hex((defined $cc[$x][$i]) ? $cc[$x][$i] : "FFFD");
 | 
						|
		if (!defined $u2x{$u}) {
 | 
						|
			$x2u[$unique] = $u;
 | 
						|
			$u2x{$u} = $unique++;
 | 
						|
		}
 | 
						|
		$o = $u2x{$u};
 | 
						|
		print "$o,", ($i % 16 == 15) ? "\n" : " ";
 | 
						|
		$cc[$x][$i] = $o;
 | 
						|
		$cx[$x]{$o} = $i;
 | 
						|
	}
 | 
						|
	print "},\n";
 | 
						|
}
 | 
						|
print "};\n\n";
 | 
						|
 | 
						|
print STDERR "$unique unique codes...\n";
 | 
						|
 | 
						|
print "static unsigned short int x_to_uni[$unique] = {\n";
 | 
						|
for($i=0; $i<$unique; $i++) {
 | 
						|
	print "$x2u[$i],", ($i % 16 == 15) ? "\n" : " ";
 | 
						|
}
 | 
						|
if ($i % 16) { print "\n"; }
 | 
						|
print "};\n\n";
 | 
						|
 | 
						|
print STDERR "UNICODE table...\n";
 | 
						|
for($i=0; $i<$unique; $i++) {
 | 
						|
	$u = $x2u[$i];
 | 
						|
	$p = $u / 256;
 | 
						|
	$pg[$p] = 1;
 | 
						|
}
 | 
						|
for($i=0; $i<256; $i++) {
 | 
						|
	if ($pg[$i]) {
 | 
						|
		print "static unsigned short int uni_to_x_$i\[256\] = {\n";
 | 
						|
		for($j=0; $j<256; $j++) {
 | 
						|
			$u = 256*$i + $j;
 | 
						|
			$u = defined($u2x{$u}) ? $u2x{$u} : 256;
 | 
						|
			print "$u,", ($j % 16 == 15) ? "\n" : " ";
 | 
						|
		}
 | 
						|
		print "};\n\n";
 | 
						|
	}
 | 
						|
}
 | 
						|
print "static unsigned short int *uni_to_x[256] = {\n";
 | 
						|
for($i=hex "FF00"; $i<=hex "FFFF"; $i++) {
 | 
						|
	if (defined $u2x{$i} && $i != 0xFFFD) { die "Invalid replacement strategy!"; }
 | 
						|
}
 | 
						|
for($i=0; $i<256; $i++) {
 | 
						|
	print "uni_to_x_", $pg[$i] ? $i : "255", ",", ($i % 4 == 3) ? "\n" : " ";
 | 
						|
}
 | 
						|
print "};\n\n";
 | 
						|
 | 
						|
print STDERR "UniData file...\n";
 | 
						|
open (U, "unidata/UnicodeData.txt") || die "No UnicodeData file";
 | 
						|
while (<U>) {
 | 
						|
	chomp;
 | 
						|
	($num,$name,$_,$_,$_,$exp) = split /;/;
 | 
						|
	if ($exp ne "") {
 | 
						|
		$exp =~ s/^<.*> *//g;
 | 
						|
		$a = "";
 | 
						|
		foreach $x (split (/ /, $exp)) {
 | 
						|
			if ($x ne "0020") {
 | 
						|
				$a = $a . " " . hex $x;
 | 
						|
			}
 | 
						|
		}
 | 
						|
		($expand{hex $num} = $a) =~ s/^ //;
 | 
						|
	}
 | 
						|
}
 | 
						|
close U;
 | 
						|
 | 
						|
print STDERR "Accent rules\n";
 | 
						|
if (open(ACC, "misc/user_unacc")) {
 | 
						|
	while (<ACC>) {
 | 
						|
		chomp;
 | 
						|
		(/^\s*$/ || /^#/) && next;
 | 
						|
		s/0x([0-9a-zA-Z]+)/hex($1)/ge;
 | 
						|
		(/^(\d+)\s+(\d+)$/) || die "Syntax error in user accent rules";
 | 
						|
		$expand{$1} = $2;
 | 
						|
	}
 | 
						|
	close ACC;
 | 
						|
}
 | 
						|
 | 
						|
print STDERR "Character expansions\n";
 | 
						|
if (open(EXTRA, "misc/user_expand")) {
 | 
						|
	while (<EXTRA>) {
 | 
						|
		chomp;
 | 
						|
		(/^\s*$/ || /^#/) && next;
 | 
						|
		s/0x([0-9a-zA-Z]+)/hex($1)/ge;
 | 
						|
		(/^(\d+)\s+(.*)$/) || die "Syntax error in user expansions";
 | 
						|
		$expand{$1} = $2;
 | 
						|
	}
 | 
						|
	close EXTRA;
 | 
						|
}
 | 
						|
 | 
						|
print "static unsigned short int x_to_output[$ncs][$unique] = {\n";
 | 
						|
$pstr = 256;
 | 
						|
for($c=0; $c<$ncs; $c++) {
 | 
						|
	print "\n/* $charsets[$c] */\n{\n";
 | 
						|
	for($i=0; $i<$unique; $i++) {
 | 
						|
		$u = $x2u[$i];
 | 
						|
		do {
 | 
						|
			$r = $u;
 | 
						|
			$u = "";
 | 
						|
			foreach $x (split (/ /, $r)) {
 | 
						|
				if (defined($k = $u2x{$x}) && defined $cx[$c]{$k}) {
 | 
						|
					$u = "$u $x";
 | 
						|
				} elsif (defined($k = $expand{$x})) {
 | 
						|
					$u = "$u $k";
 | 
						|
				}
 | 
						|
			}
 | 
						|
			$u =~ s/^ //;
 | 
						|
		} while ($r ne $u);
 | 
						|
		$u = "";
 | 
						|
		foreach $x (split (/ /, $r)) {
 | 
						|
			if (defined($k = $u2x{$x})) {
 | 
						|
				if ($k != 256 && defined ($k = $cx[$c]{$k})) {
 | 
						|
					$u = $u . pack("C", $k);
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
		if (length($u) == 1) {
 | 
						|
			$z = unpack("C", $u);
 | 
						|
		} else {
 | 
						|
			if (!defined($string{$u})) {
 | 
						|
				$string{$u} = $pstr;
 | 
						|
				$strval{$pstr} = $u;
 | 
						|
				$pstr += 1 + length($u);
 | 
						|
			}
 | 
						|
			$z = $string{$u};
 | 
						|
		}
 | 
						|
		print "$z,", ($i % 16 == 15) ? "\n" : " ";
 | 
						|
	}
 | 
						|
	if ($i % 16) { print "\n"; }
 | 
						|
	print "},\n";
 | 
						|
}
 | 
						|
print "};\n\n";
 | 
						|
 | 
						|
print STDERR "And Tubular Bells...\n";
 | 
						|
print "static unsigned char string_table[] = {\n";
 | 
						|
$i = 256;
 | 
						|
while ($i < $pstr) {
 | 
						|
	$w = $strval{$i};
 | 
						|
	print length $w, ",";
 | 
						|
	foreach $x (unpack("C256", $w)) {
 | 
						|
		print " $x,";
 | 
						|
	}
 | 
						|
	print "\n";
 | 
						|
	$i += 1 + length $w;
 | 
						|
}
 | 
						|
print "};\n";
 | 
						|
 | 
						|
print STDERR "Done.\n";
 |