#!/usr/bin/perl
#
#	Character Set Table Generator 1.0
#	(c) 1998 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
#
#	This program can be freely distributed and used according to the terms
#	of the GNU General Public License.
#

# Internal codes 0..255 are mapped to UniCode 0..255
# Internal code 256 is the replacement character (U#FFFD)

$ncs = 0;

print "/* Generated by tabgen 1.0, please don't edit manually. */\n\n";

print STDERR "Charset list...\n";

while (<>) {
	chomp;
	(/^\w*$/ || /^#/) && next;
	$charsets[$ncs++] = $_;
}

print STDERR "Found $ncs charsets, counting unique codes...\n";

for($unique=0; $unique<256; $unique++) {
	$u2x{$unique} = $unique;
	$x2u[$unique] = $unique;
}
$u2x{0xFFFD} = $unique;
$x2u[$unique++] = 0xFFFD;
print "static unsigned short int input_to_x[$ncs][256] = {\n";
for($x=0; $x<$ncs; $x++) {
	$a = $charsets[$x];
	print "\n/* $a */\n{\n";
	open (A, $a) || die "Error opening $a";
	while (<A>) {
		chomp;
		(/^\w*$/ || /^#/) && next;
		($i, $u, $c) = split /\t/;
		$cc[$x][hex $i] = $u;
	}
	close A;
	for($i=0; $i<256; $i++) {
		$u = hex((defined $cc[$x][$i]) ? $cc[$x][$i] : "FFFD");
		if (!defined $u2x{$u}) {
			$x2u[$unique] = $u;
			$u2x{$u} = $unique++;
		}
		$o = $u2x{$u};
		print "$o,", ($i % 16 == 15) ? "\n" : " ";
		$cc[$x][$i] = $o;
		$cx[$x]{$o} = $i;
	}
	print "},\n";
}
print "};\n\n";

print STDERR "$unique unique codes...\n";

print "static unsigned short int x_to_uni[$unique] = {\n";
for($i=0; $i<$unique; $i++) {
	print "$x2u[$i],", ($i % 16 == 15) ? "\n" : " ";
}
if ($i % 16) { print "\n"; }
print "};\n\n";

print STDERR "UNICODE table...\n";
for($i=0; $i<$unique; $i++) {
	$u = $x2u[$i];
	$p = $u / 256;
	$pg[$p] = 1;
}
for($i=0; $i<256; $i++) {
	if ($pg[$i]) {
		print "static unsigned short int uni_to_x_$i\[256\] = {\n";
		for($j=0; $j<256; $j++) {
			$u = 256*$i + $j;
			$u = defined($u2x{$u}) ? $u2x{$u} : 256;
			print "$u,", ($j % 16 == 15) ? "\n" : " ";
		}
		print "};\n\n";
	}
}
print "static unsigned short int *uni_to_x[256] = {\n";
for($i=hex "FF00"; $i<=hex "FFFF"; $i++) {
	if (defined $u2x{$i} && $i != 0xFFFD) { die "Invalid replacement strategy!"; }
}
for($i=0; $i<256; $i++) {
	print "uni_to_x_", $pg[$i] ? $i : "255", ",", ($i % 4 == 3) ? "\n" : " ";
}
print "};\n\n";

print STDERR "UniData file...\n";
open (U, "unidata/UnicodeData.txt") || die "No UnicodeData file";
while (<U>) {
	chomp;
	($num,$name,$_,$_,$_,$exp) = split /;/;
	if ($exp ne "") {
		$exp =~ s/^<.*> *//g;
		$a = "";
		foreach $x (split (/ /, $exp)) {
			if ($x ne "0020") {
				$a = $a . " " . hex $x;
			}
		}
		($expand{hex $num} = $a) =~ s/^ //;
	}
}
close U;

print STDERR "Accent rules\n";
if (open(ACC, "misc/user_unacc")) {
	while (<ACC>) {
		chomp;
		(/^\s*$/ || /^#/) && next;
		s/0x([0-9a-zA-Z]+)/hex($1)/ge;
		(/^(\d+)\s+(\d+)$/) || die "Syntax error in user accent rules";
		$expand{$1} = $2;
	}
	close ACC;
}

print STDERR "Character expansions\n";
if (open(EXTRA, "misc/user_expand")) {
	while (<EXTRA>) {
		chomp;
		(/^\s*$/ || /^#/) && next;
		s/0x([0-9a-zA-Z]+)/hex($1)/ge;
		(/^(\d+)\s+(.*)$/) || die "Syntax error in user expansions";
		$expand{$1} = $2;
	}
	close EXTRA;
}

print "static unsigned short int x_to_output[$ncs][$unique] = {\n";
$pstr = 256;
for($c=0; $c<$ncs; $c++) {
	print "\n/* $charsets[$c] */\n{\n";
	for($i=0; $i<$unique; $i++) {
		$u = $x2u[$i];
		do {
			$r = $u;
			$u = "";
			foreach $x (split (/ /, $r)) {
				if (defined($k = $u2x{$x}) && defined $cx[$c]{$k}) {
					$u = "$u $x";
				} elsif (defined($k = $expand{$x})) {
					$u = "$u $k";
				}
			}
			$u =~ s/^ //;
		} while ($r ne $u);
		$u = "";
		foreach $x (split (/ /, $r)) {
			if (defined($k = $u2x{$x})) {
				if ($k != 256 && defined ($k = $cx[$c]{$k})) {
					$u = $u . pack("C", $k);
				}
			}
		}
		if (length($u) == 1) {
			$z = unpack("C", $u);
		} else {
			if (!defined($string{$u})) {
				$string{$u} = $pstr;
				$strval{$pstr} = $u;
				$pstr += 1 + length($u);
			}
			$z = $string{$u};
		}
		print "$z,", ($i % 16 == 15) ? "\n" : " ";
	}
	if ($i % 16) { print "\n"; }
	print "},\n";
}
print "};\n\n";

print STDERR "And Tubular Bells...\n";
print "static unsigned char string_table[] = {\n";
$i = 256;
while ($i < $pstr) {
	$w = $strval{$i};
	print length $w, ",";
	foreach $x (unpack("C256", $w)) {
		print " $x,";
	}
	print "\n";
	$i += 1 + length $w;
}
print "};\n";

print STDERR "Done.\n";