diff --git a/libucw/.gitignore b/libucw/.gitignore new file mode 100644 index 0000000..d9e1f1e --- /dev/null +++ b/libucw/.gitignore @@ -0,0 +1,9 @@ +/run +/obj +/charset/unidata +TAGS +*~ +_* +*.swp +.make.log +/debian-tmp diff --git a/libucw/Makefile b/libucw/Makefile new file mode 100644 index 0000000..4fb2bbf --- /dev/null +++ b/libucw/Makefile @@ -0,0 +1,79 @@ +# Makefile for the UCW libraries +# (c) 2007--2010 Martin Mares + +# The default target +all: runtree libs api programs extras configs + +# Include configuration +s=. +-include obj/config.mk +obj/config.mk: + @echo "You need to run configure first." && false + +BUILDSYS=$(s)/build + +# We will use the libucw build system +include $(BUILDSYS)/Maketop + +CONFIG_SRC_DIR=etc +TESTING_DEPS=$(LIBUCW) + +# Install the build system +include $(BUILDSYS)/Makefile + +# Set up names of common libraries (to avoid forward references in rules) +ifdef CONFIG_CHARSET +LIBCHARSET=$(o)/charset/libucw-charset.pc +endif + +# The UCW library +include $(s)/ucw/Makefile + +# Install config files +ifdef CONFIG_SHERLOCK_LIB +FREE_CONFIGS=sherlock local +CONFIGS+=$(FREE_CONFIGS) + +INSTALL_TARGETS+=install-configs +install-configs: + install -d -m 755 $(DESTDIR)$(INSTALL_CONFIG_DIR) + install -m 644 $(addprefix run/$(CONFIG_DIR)/,$(FREE_CONFIGS)) $(DESTDIR)$(INSTALL_CONFIG_DIR) +endif + +# Include submakefiles of requested libraries +ifdef CONFIG_CHARSET +include $(s)/charset/Makefile +endif + +ifdef CONFIG_IMAGES +LIBIMAGES=$(o)/images/libucw-images.pc +include $(s)/images/Makefile +endif + +ifdef CONFIG_XML +LIBXML=$(o)/ucw-xml/libucw-xml.pc +include $(s)/ucw-xml/Makefile +endif + +ifdef CONFIG_JSON +LIBJSON=$(o)/ucw-json/libucw-json.pc +include $(s)/ucw-json/Makefile +endif + +# Build documentation by default? +ifdef CONFIG_DOC +all: docs +endif + +libs: $(LIBUCW) $(LIBXML) $(LIBJSON) $(LIBIMAGES) $(LIBCHARSET) + +# And finally the default rules of the build system +include $(BUILDSYS)/Makebottom + +ifndef CONFIG_LOCAL +install: all $(INSTALL_TARGETS) +else +install: + @echo "Nothing to install, this is a local build." && false +endif +.PHONY: install diff --git a/libucw/README b/libucw/README new file mode 100644 index 0000000..ea61db3 --- /dev/null +++ b/libucw/README @@ -0,0 +1,25 @@ +################################################################################ + + The UCW Libraries @VERSION@ + + (c) 1997--2015 Martin Mares + (c) 2006--2015 Pavel Charvat + (c) 2000--2009 Robert Spalek + + and other contributors listed in ucw/doc/index.txt + +################################################################################ + +This package contains the LibUCW library and several other libraries, +which have split off the Sherlock Holmes search engine project. + +See: + + o ucw/doc/install.txt for build requirements and instructions. + o ucw/doc/relnotes.txt for release notes. + o ucw/doc/index.txt for master index of documentation. + +For more information, take a look at http://www.ucw.cz/libucw/. + +If you have any suggestions or bug reports, please write to our +mailing list libucw-devel@ucw.cz, or directly to mj@ucw.cz. diff --git a/libucw/README.md b/libucw/README.md new file mode 100644 index 0000000..4c06446 --- /dev/null +++ b/libucw/README.md @@ -0,0 +1 @@ +Source: https://www.ucw.cz/libucw/ diff --git a/libucw/build/Makebottom b/libucw/build/Makebottom new file mode 100644 index 0000000..bb8c67d --- /dev/null +++ b/libucw/build/Makebottom @@ -0,0 +1,236 @@ +# Bottom part of Makefile for the UCW Libraries +# (c) 1997--2008 Martin Mares + +# The run tree + +DOCDIR=doc + +runtree: run/.tree-stamp $(addsuffix /.dir-stamp,$(addprefix $(o)/,$(DIRS)) $(addprefix run/$(DOCDIR)/,$(DOC_MODULES))) + +run/.tree-stamp: $(o)/config.mk + $(M)Creating runtree + $(Q)mkdir -p run $(addprefix run/, $(CONFIG_DIR) $(EXTRA_RUNDIRS) $(INSTALL_RUNDIRS)) + $(Q)touch run/.tree-stamp + +# Miscellaneous targets + +programs: $(PROGS) +datafiles: $(DATAFILES) +tests: $(TESTS) +configs: $(addprefix run/$(CONFIG_DIR)/,$(CONFIGS)) +docs: runtree $(DOCS) $(DOC_INDICES) $(MANPAGES) + +tags: + etags `find . -name "*.[ch]"` + +# Black magic with dependencies. It would be more correct to make "depend.new" +# a prerequisite for "depend", but "depend.new" often has the same timestamp +# as "depend" which would confuse make a lot and either force remaking anyway +# or (as in current versions of GNU make) erroneously skipping the remaking. + +-include $(o)/depend + +$(o)/depend: force + $(Q)if [ -s $(o)/depend.new ] ; then $(BUILDSYS)/mergedeps $(o)/depend $(o)/depend.new ; >$(o)/depend.new ; fi + +force: + +# Rules for directories + +%.dir-stamp: + $(Q)mkdir -p $(@D) && touch $@ + +# Rules for configuration files + +run/$(CONFIG_DIR)/%: $(s)/$(CONFIG_SRC_DIR)/% $(o)/config.mk $(BUILDSYS)/genconf + $(M)CF $< + $(Q)$(BUILDSYS)/genconf $< $@ $(o)/config.mk + +$(o)/%.cf: $(s)/%.cf $(o)/config.mk $(BUILDSYS)/genconf + $(M)CF $< + $(Q)$(BUILDSYS)/genconf $< $@ $(o)/config.mk + $(Q)cp $@ run/$(CONFIG_DIR)/$(basename $(@F)) + +# Rules for libraries + +%.a: + $(M)AR $@ + $(Q)rm -f $@ + $(Q)ar rcs $@ $^ +ifdef CONFIG_INSTALL_API + $(Q)$(call symlink-alias,$@,run/lib,$(*F)$(LIBNAME_INFIX).a) +endif + +%.so: + $(M)LD $@ + $(Q)$(CC) $(LSHARED) $(LDFLAGS) -o $@ $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" PKG_CONFIG_OPTS="$(PKG_CONFIG_OPTS)" $(BUILDSYS)/lib-flags $^) $(LIBS) + $(Q)$(call symlink-alias,$@,run/$(SO_RUNDIR),$(*F)$(SONAME_INFIX).so$(SONAME_SUFFIX)) + $(Q)ln -fs $(*F)$(SONAME_INFIX).so$(SONAME_SUFFIX) run/$(SO_RUNDIR)/$(*F)$(SONAME_INFIX).so + +# On Darwin, gcc expects shared libraries in *.dylib instead of *.so. +# Surprisingly, when a program is run, it suffices to have *.so files. +# We don't want to mess up the whole build system with configurable +# suffices and we also don't want to incur an overhead on Linux, so we +# just create symbolic links on Darwin, if requested. +%.dylib: %.so + cd $(dir $<) && ln -fs $(notdir $<) $(notdir $@) + +$(o)/%.pc: $(s)/%.pc $(o)/%$(LV).$(LS) + $(M)PC $< + $(Q)DEPS="$(shell $(BUILDSYS)/lib-deps $^)" LIBDIR=$(@D) $(BUILDSYS)/genconf $< $@ $(o)/config.mk + $(Q)mkdir -p $(o)/pkgconfig + $(Q)$(call symlink,$@,$(o)/pkgconfig) + +# Rules for public API + +ifdef CONFIG_INSTALL_API + +ifdef CONFIG_LOCAL +# Need an absolute path +API_ROOT:=$(shell pwd)/run +API_LIBDIR=$(API_ROOT)/lib +API_INCDIR=$(API_ROOT)/include +else +API_LIBDIR=$(INSTALL_LIB_DIR) +API_INCDIR=$(INSTALL_INCLUDE_DIR) +endif +INSTALL_RUNDIRS+=include lib/pkgconfig +api: $(API_INCLUDES) $(addprefix run/lib/pkgconfig/,$(addsuffix .pc,$(API_LIBS))) + +$(o)/%/.include-stamp: + $(Q)$(BUILDSYS)/install-includes $($@ "s@^libdir=.*@libdir=$(API_LIBDIR)@;s@^incdir=.*@incdir=$(API_INCDIR)@" + +else +api: +endif + +# Rules for compiling C + +$(o)/%.o: $(s)/%.c $(o)/autoconf.h + $(M)CC $< + $(Q)DEPENDENCIES_OUTPUT="$(o)/depend.new $@" $(CC) $(CFLAGS) -c -o $@ $< + +$(o)/%.o: %.c $(o)/autoconf.h + $(M)CC $< + $(Q)DEPENDENCIES_OUTPUT="$(o)/depend.new $@" $(CC) $(CFLAGS) -c -o $@ $< + +%.o: %.c $(o)/autoconf.h + $(M)CC $< + $(Q)DEPENDENCIES_OUTPUT="$(o)/depend.new $@" $(CC) $(CFLAGS) -c -o $@ $< + +$(o)/%.oo: $(s)/%.c $(o)/autoconf.h + $(M)CC-SO $< + $(Q)DEPENDENCIES_OUTPUT="$(o)/depend.new $@" $(CC) $(CFLAGS) $(CSHARED) -c -o $@ $< + +$(o)/%.oo: %.c $(o)/autoconf.h + $(M)CC-SO $< + $(Q)DEPENDENCIES_OUTPUT="$(o)/depend.new $@" $(CC) $(CFLAGS) $(CSHARED) -c -o $@ $< + +%.oo: %.c $(o)/autoconf.h + $(M)CC-SO $< + $(Q)DEPENDENCIES_OUTPUT="$(o)/depend.new $@" $(CC) $(CFLAGS) $(CSHARED) -c -o $@ $< + +$(o)/%-tt.o: $(s)/%.c $(o)/autoconf.h + $(M)CC-TEST $< + $(Q)DEPENDENCIES_OUTPUT="$(o)/depend.new $@" $(CC) $(CFLAGS) -DTEST -c -o $@ $< + +# Rules for testing + +$(o)/%-t: $(o)/%-tt.o $(TESTING_DEPS) + $(M)LD-TEST $@ + $(Q)$(CC) $(LDFLAGS) -o $@ $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" PKG_CONFIG_OPTS="$(PKG_CONFIG_OPTS)" $(BUILDSYS)/lib-flags $^) $(LIBS) + +$(o)/%.test: $(s)/%.t $(BUILDSYS)/tester + $(M)TEST $@ + $(Q)$(BUILDSYS)/tester --rundir=run $(TESTERFLAGS) $< && touch $@ + +# Rules for binaries + +BINDIR=bin + +$(o)/%: $(o)/%.o + $(M)LD $@ + $(Q)$(CC) $(LDFLAGS) -o $@ $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" PKG_CONFIG_OPTS="$(PKG_CONFIG_OPTS)" $(BUILDSYS)/lib-flags $^) $(LIBS) + $(Q)$(call symlink,$@,run/$(BINDIR)) + +$(o)/%: $(s)/%.sh $(o)/config.mk $(BUILDSYS)/genconf + $(M)PP $< + $(Q)$(BUILDSYS)/genconf $< $@ $(o)/config.mk + $(Q)chmod +x $@ + $(Q)$(call symlink,$@,run/$(BINDIR)) + +$(o)/%: %.sh $(o)/config.mk $(BUILDSYS)/genconf + $(M)PP $< + $(Q)$(BUILDSYS)/genconf $< $@ $(o)/config.mk + $(Q)chmod +x $@ + $(Q)$(call symlink,$@,run/$(BINDIR)) + +$(o)/%: $(s)/%.pl $(o)/config.mk $(BUILDSYS)/genconf + $(M)PP $< + $(Q)$(BUILDSYS)/genconf $< $@ $(o)/config.mk + $(Q)chmod +x $@ + $(Q)$(call symlink,$@,run/$(BINDIR)) + +$(o)/%: %.pl $(o)/config.mk $(BUILDSYS)/genconf + $(M)PP $< + $(Q)$(BUILDSYS)/genconf $< $@ $(o)/config.mk + $(Q)chmod +x $@ + $(Q)$(call symlink,$@,run/$(BINDIR)) + +PERL_MODULE_DIR=UCW + +$(o)/%.pm: $(s)/%.pm + $(M)"PM $< -> run/lib/perl5/$(PERL_MODULE_DIR)/$(@F)" + $(Q)cp $^ $@ + $(Q)$(call symlink,$@,run/lib/perl5/$(PERL_MODULE_DIR)) + +$(o)/%.pm: %.pm + $(M)"PM $< -> run/lib/perl/$(PERL_MODULE_DIR)/$(@F)" + $(Q)cp $^ $@ + $(Q)$(call symlink,$@,run/lib/perl5/$(PERL_MODULE_DIR)) + +# Rules for data files + +DATADIR=lib + +$(DATAFILES): $(o)/%: $(s)/% + $(M)DATA $< + $(Q)cp $^ $@ + $(Q)$(call symlink,$@,run/$(DATADIR)) + +# Rules for documentation + +$(o)/%.html: $(o)/%.txt $(BUILDSYS)/asciidoc.conf $(BUILDSYS)/asciidoc-xhtml.conf run/$(DOCDIR)/$(DOC_MODULE)/.dir-stamp + $(M)"DOC-HTML $<" + $(Q)asciidoc -e -f $(BUILDSYS)/asciidoc.conf -f $(BUILDSYS)/asciidoc-xhtml.conf -f $(HOST_PREFIX)/etc/asciidoc/asciidoc.conf -f $(HOST_PREFIX)/etc/asciidoc/xhtml11.conf $< + $(Q)$(call symlink,$@,run/$(DOCDIR)/$(DOC_MODULE)) + +$(MANPAGES): $(o)/%: $(s)/%.txt + $(M)"DOC-MAN $<" + $(Q)a2x -d manpage -f manpage -D $(dir $@) $< + $(Q)mkdir -p run/$(DOCDIR)/man/man$(subst .,,$(suffix $@)) + $(Q)$(call symlink,$@,run/$(DOCDIR)/man/man$(subst .,,$(suffix $@))) + +# In reality, we do not depend on the .txt files, but on the corresponding .deflist's. +# However, the Makefile language cannot express that doc-extract generates both .txt +# and .deflist, so we always use the .txt's in dependencies. +$(patsubst %.html,%.txt,$(DOC_INDICES)): $(o)/%.txt: $(patsubst %.html,%.txt,$(DOCS)) $(BUILDSYS)/doc-defs + $(M)"DOC-DEFS $@" + $(Q)echo $@: $(DOC_HEAD) $(DOC_LIST) >> $(o)/depend.new + $(Q)$(BUILDSYS)/doc-defs $(DOC_HEAD) $@ $(DOC_LIST) + +$(patsubst %.html,%.txt,$(DOCS)): $(o)/%.txt: $(s)/%.txt $(BUILDSYS)/doc-extract + $(M)"DOC-EXT $<" + $(Q)$(BUILDSYS)/doc-extract $< $@ $(o)/depend.new $(s) $(patsubst %.txt,%.deflist,$@) + +# Don't delete intermediate targets. There shouldn't be any, but due to bugs +# in GNU Make rules with targets in not-yet-existing directories are ignored +# when searching for implicit rules and thence targets considered intermediate. +.SECONDARY: + +.PHONY: all clean distclean runtree programs api datafiles force tags configs dust install docs tests diff --git a/libucw/build/Makefile b/libucw/build/Makefile new file mode 100644 index 0000000..8c70961 --- /dev/null +++ b/libucw/build/Makefile @@ -0,0 +1,19 @@ +# Makefile for LibUCW Build Tools + +DIRS+=build + +$(o)/build/genhash: $(o)/build/genhash.o + +# This is a hack which compensates make's desires for propagating per-rule settings +# of variables: if some module specifies its own LIBS and it depends on genhash, +# genhash is sometimes built with the module's LIBS (if it isn't already built). +# A proper solution would be using a different rule for linking build/*, but +# as it currently concerns only genhash, it's easier to battle this way. +$(o)/build/genhash: LIBS= + +INSTALL_TARGETS+=install-build +install-build: + install -d -m 755 $(DESTDIR)$(INSTALL_SHARE_DIR)/ucw/build + install -m 755 $(addprefix $(BUILDSYS)/,install-includes doc-defs doc-extract genconf mergedeps tester lib-deps lib-flags) $(DESTDIR)$(INSTALL_SHARE_DIR)/ucw/build + install -m 644 $(addprefix $(BUILDSYS)/,asciidoc.conf asciidoc-xhtml.conf Makebottom Maketop) $(DESTDIR)$(INSTALL_SHARE_DIR)/ucw/build +.PHONY: install-build diff --git a/libucw/build/Maketop b/libucw/build/Maketop new file mode 100644 index 0000000..ac3e5ce --- /dev/null +++ b/libucw/build/Maketop @@ -0,0 +1,101 @@ +# Top part of Makefile for the UCW Libraries +# (c) 1997--2008 Martin Mares + +# Set to 1 if you want verbose output +V=0 + +# Set to 'y' (or 'n') if you want to auto-confirm (auto-reject) all questions in build/installer +CONFIRM= + +# Disable all built-in rules and variables. Speeds up make and simplifies debugging. +MAKEFLAGS+=-rR + +CFLAGS=$(CLANG) $(COPT) $(CDEBUG) $(CWARNS) $(CEXTRA) -I. -I$(o) -I$(s) +LDFLAGS=$(LOPT) $(LEXTRA) + +DIRS= +PROGS= +CONFIGS= +CONFIG_SRC_DIR=$(CONFIG_DIR) +TESTS= +EXTRA_RUNDIRS=tmp log +INSTALL_RUNDIRS=bin lib +API_INCLUDES= +API_LIBS= + +DOCS= +DOC_INDICES= +MANPAGES= + +# Various files whose type does not fit into PROGS +DATAFILES= + +ifdef CONFIG_DARWIN +DYNAMIC_LIBRARIES=dylib +SOEXT=bundle +HOST_PREFIX=/sw +else +DYNAMIC_LIBRARIES=so +SOEXT=so +HOST_PREFIX= +endif + +ifdef CONFIG_SHARED +LS=$(DYNAMIC_LIBRARIES) +OS=oo +PKG_CONFIG_OPTS= +else +LS=a +OS=o +PKG_CONFIG_OPTS=--static +endif +LV=$(UCW_ABI_SUFFIX) + +SO_RUNDIR=lib + +# Whenever "make -s" (silent) is run, turn on verbose mode (paradoxical, but gives the right result) +ifneq ($(findstring s,$(MAKEFLAGS)),) +V=1 +endif + +# Define M (message) and Q (quiet command prefix) macros and also MAKESILENT passed to sub-makes +ifeq ($(V),1) +M=@\# +Q= +MAKESILENT= +else +M=@echo # +Q=@ +MAKESILENT=-s +endif + +# Clean needs to be a double-colon rule since we want sub-makefiles to be able +# to define their own cleanup actions. +dust:: + rm -f `find . -path "*~" -or -name "\#*\#"` + rm -f allocs.tmp cscope.out TAGS + +clean:: dust + rm -rf `find obj/ucw -mindepth 1 -maxdepth 1 -not -name autoconf.h` + rm -rf `find obj -mindepth 1 -maxdepth 1 -not \( -name config.mk -o -name autoconf.h -o -name ucw \)` + rm -rf tests run/{bin,lib,include,.tree-stamp,doc} + +distclean:: clean + rm -rf obj run debian-tmp + +testclean:: + rm -f `find obj -name "*.test"` + +docclean:: + rm -f $(DOCS) $(patsubst %.html,%.txt,$(DOCS)) + +# Extra default rules (appended to by submakefiles) +extras:: + +# Relative symlinks and other pathname manipulation macros +empty:= +space:=$(empty) $(empty) +backref=$(subst $(space),/,$(patsubst %,..,$(subst /,$(space),$(1)))) +tack-on=$(if $(patsubst /%,,$(2)),$(1)/$(2),$(2)) +symlink=ln -sf $(call tack-on,$(call backref,$(2)),$(1)) $(2)/ +symlink-alias=ln -sf $(call tack-on,$(call backref,$(2)),$(1)) $(2)/$(3) diff --git a/libucw/build/asciidoc-xhtml.conf b/libucw/build/asciidoc-xhtml.conf new file mode 100644 index 0000000..22dda23 --- /dev/null +++ b/libucw/build/asciidoc-xhtml.conf @@ -0,0 +1,27 @@ +[replacements] +NULL=NULL + +[func_ref-inlinemacro] +{caption={all}} + +[func_ref_file-inlinemacro] +{caption={all}} + +[xref_file-inlinemacro] +{caption=[{target}]} + +[xref_file_only-inlinemacro] +{caption=[{filename}]} + +[func_auto_ref-inlinemacro] +{all} + +[func_param-inlinemacro] +{name} + +[func_format-inlinemacro] +{all} + +[footer-text] +Version {revnumber}{basebackend-xhtml11?
}{basebackend-xhtml11=
} +Last updated {docdate} {doctime} diff --git a/libucw/build/asciidoc.conf b/libucw/build/asciidoc.conf new file mode 100644 index 0000000..18c92d4 --- /dev/null +++ b/libucw/build/asciidoc.conf @@ -0,0 +1,16 @@ +[macros] +(?su)[\\]?<<(?P[^&;:()]+):(?P(?P[^&;:()<> ]+)(\([^&:;]*\);?))(,(?P[^&]+))?>>=func_ref_file +(?su)[\\]?<<(?P(?P[^&;:()<> ]+)(\([^&:;]*\);?))(,(?P[^&]+))?>>=func_ref +(?su)[\\]?<<(?P[^&;:()]+):(,(?P[^&]+))?>>=xref_file_only +(?su)[\\]?<<(?P[^&;:()]+):(?P[^,();&]+)(,(?P[^&]+))?>>=xref_file +(?su)[\\]?@(?P(?P\w+)\([^();:]*\))=func_ref +(?su)[\\]?(?P\w+\([^();:]*\))=func_format +(?su)[\\]?@(?P\w+)@=no_func_param +(?su)[\\]?@(?P\w+)=func_param +(?su)[\\]?@!!KEEP!!(?P\w+)!!KEEP!!@=no_func_param_return + +[no_func_param-inlinemacro] +@!!KEEP!!{word}!!KEEP!!@ + +[no_func_param_return-inlinemacro] +@{word}@ diff --git a/libucw/build/doc-defs b/libucw/build/doc-defs new file mode 100755 index 0000000..8e39825 --- /dev/null +++ b/libucw/build/doc-defs @@ -0,0 +1,64 @@ +#!/usr/bin/perl +# Script for formatting documentation from definition lists +# (they get out of extract-doc.pl as a side-product). +# (c) 2008 Michal Vaner +use strict; +use warnings; + +my $head = shift; +my $out = shift; + +open OUT, ">$out" or die "Could not write output $out ($!)\n"; +open HEAD, $head or die "Could not open head $head ($!)\n"; +print OUT foreach( ); +close HEAD; + +my $dir = $out; +$dir =~ s/\/[^\/]+$//; + +my @dump; + +while( defined( my $line = <> ) ) { + chomp $line; + push @dump, [ split /,/, $line, 5 ]; +} + +my @types = ( + [ 'enum', 'Enumerations' ], + [ 'struct', 'Structures' ], + [ 'type', 'Types' ], + [ 'fun', 'Functions' ], + [ 'var', 'Variables' ], + [ 'def', 'Preprocessor definitions' ] +); + +my( $index, %groups, %heads ) = ( 0 ); + +foreach( @types ) { + my( $name, $value ) = @{$_}; + $groups{$name} = ++ $index; + $heads{$name} = $value; +} + +my $lasttype = ''; + +foreach( sort { ( $groups{$a->[2]} <=> $groups{$b->[2]} ) or ( $a->[3] cmp $b->[3] ); } @dump ) { + my( $file, $anchor, $type, $name, $text ) = @{$_}; + if( $lasttype ne $type ) { + $lasttype = $type; + print OUT "\n== $heads{$type} [[$type]]\n\n"; + } + my $dircp = $dir; + while( shift @{[ $dircp =~ /([^\/]+)/, "//" ]} eq shift @{[ $file =~ /([^\/]+)/, "///" ]} ) { + $dircp =~ s/[^\/]+\/?//; + $file =~ s/[^\/]+\/?//; + } + $dircp =~ s/[^\/]+/../g; + $file = $dircp."/".$file; + $file =~ s/^\///; + $file =~ s/\.[^.]+$//; + $text =~ s/(\.\.\.|\*|'|#|_)/\\$1/g; + print OUT "<<$file:$anchor,`$name`>>:: `$text`\n"; +} + +close OUT; diff --git a/libucw/build/doc-extract b/libucw/build/doc-extract new file mode 100755 index 0000000..67fdd64 --- /dev/null +++ b/libucw/build/doc-extract @@ -0,0 +1,221 @@ +#!/usr/bin/perl +# Script for extracting documentation out of header files +# (c) 2008 Michal Vaner + +use strict; +use warnings; + +my( $inname, $outname, $depname, $basedir, $defdump ) = @ARGV; +if( defined $inname ) { + open IN, $inname or die "Could not read $inname ($!)\n"; +} else { + open IN, "<&STDIN" or die "Could not read stdin ($!)\n"; +} +if( defined $outname ) { + open OUT, ">$outname" or die "Could not write $outname ($!)\n"; +} else { + open OUT, ">&STDOUT" or die "Could not write to stdout ($!)\n"; +} +my $hasdump; +if( defined $defdump ) { + open DUMP, ">$defdump" or die "Could not write definition dump $defdump ($!)\n"; + $hasdump = 1; +} + +# Function to guess type of statement +sub detect( $ ) { + ( $_ ) = @_; + # typedef struct|enum { something } name; + return( $1, 1, $2, "typedef $1 { ... } $2;" ) if /^\s*typedef\s+(struct|enum)\s*{.*}\s*(\w+)\s*;\s*$/s; + # struct|enum name { something }; + return( $1, 1, $2, $_ ) if /^\s*(struct|enum)\s+(\w+)\s*;\s*$/s; + my $l = length; + s/\n.*//s; + # struct|enum name { + # something + # }; + return( $1, 0, $2, $_ ) if /(struct|enum)\s+(\w+)\s+{/; + return( 'def', 0, $1, $_ ) if /#define\s+(\w+)/; + if( $l > length ) { + warn( "Unknown multiline statement $_\n" ); + return( '', 0, $_, $_ ); + } + # typedef type (*function_type)(params); + return( 'type', 1, $2, $_ ) if /^\s*typedef[^()]+?(\(\s*?\*\s*?)?(\w+)(\s*\))?\s*\(.*\)/; + # type (*function_var)(params); + return( 'var', 1, $1, $_ ) if /^.*?\(\*(\w+)\)\(.*\)/; + # type function(name); + return( 'fun', 1, $2, $1 ) if /^(.*?(\w+)\([^{]*\)[^{]*)/; + # typedef something name; + return( 'type', 1, $1, $_ ) if /^\s*typedef.*?(\w+);/; + # type name; + return( 'var', 1, $1, $_ ) if /\s\**(\w+);/; + warn( "Unknown statement $_\n" ); + return( '', 0, $_, $_ ); +} + +my @deps; +my $id = 0; + +sub formatNote( $$ ) { + my( $head, $comment ) = @_; + $head =~ s/(\S)[ ]+/$1 /g; + print OUT "\n"; + print OUT "''''\n"; + chomp $head; + my( $type, $semicolon, $name, $oneline ) = detect( $head ); + # Just few transformations of the result + $oneline =~ s/\s+$//; + $oneline =~ s/;?$/;/ if( $semicolon ); + $head =~ s/;?\s*$/;/ if( $semicolon ); + $head =~ s/(\s|,|\()(\.\.\.)/$1\\$2/g; # Do not convert tripple dot into ellipsis + print OUT "[[${type}_$name]]\n"; + $head = $oneline if $type eq 'fun';#Remove { from inline functions + # Remove the generic hack markup + $head =~ s/_OPEN_PAREN_/(/g; + $head =~ s/_CLOSE_PAREN_/)/g; + print OUT "..................\n"; + print OUT "$head\n"; + print OUT "..................\n\n"; + if( $hasdump ) { + $oneline =~ s/_OPEN_PAREN_/(/g; + $oneline =~ s/_CLOSE_PAREN_/)/g; + my $symname = $type.'_'.$name; + $name =~ s/_OPEN_PAREN_/(/g; + $name =~ s/_CLOSE_PAREN_/)/g; + print DUMP "$outname,$symname,$type,$name,$oneline\n"; + $id ++; + } + $comment =~ s/_OPEN_PAREN_/(/g; + $comment =~ s/_CLOSE_PAREN_/)/g; + $comment =~ s/_GENERIC_LINK_\|([^|]+)\|([^|]+)\|/${1}_OPEN_PAREN_${2}_CLOSE_PAREN_/g; + print OUT "$comment\n\n"; +} + +sub process( $$ ) { + my( $file, $prefixes ) = @_; + open FILE, $file or die "Could nod read $file ($!)\n"; + my $line; + my $active; + my $verbatim; + my $buff; + my $head; + my $struct; + my $def; + my $sdepth; + while( defined( $line = ) ) { + chomp $line; + # Generic macro hack - replaces the parenthesis so it is valid identifier + $line =~ s/$_\(([^()]+)\)/${_}_OPEN_PAREN_${1}_CLOSE_PAREN_/g foreach @{$prefixes}; + if( $def ) { + $head .= "\n".$line; + $line =~ s/(\/\*.*?\*\/|\/\/.*)//g; + if( $line !~ /\\\s*$/ ) { + formatNote( $head, $buff ); + $def = 0; + $buff = $head = undef; + } + } elsif( $struct ) { + $head .= "\n".$line; + my $cp = $line; + $sdepth += ($cp =~ tr/{//); + $sdepth -= ($cp =~ tr/}//); + if( !$sdepth ) { + formatNote( $head, $buff ); + $struct = 0; + $buff = undef; + $head = undef; + } + } elsif( $verbatim ) { + if( $line =~ /\*\// ) { + $verbatim = 0; + print OUT "\n"; + } else { + $line =~ s/^\s*\* ?//; + print OUT "$line\n"; + } + } elsif( $active ) { + if( $line =~ /\*\// ) { + $active = 0; + } else { + $line =~ s/^\s*\* ?//; + $buff .= "$line\n"; + } + } else { + if( ( $line =~ /\S/ ) && ( defined $buff ) ) { + if( $line =~ /^\s*#define.*\\(\s*(\/\/.*|\/\*.*?\*\/|))*/ ) { + $head = $line; + $def = 1; + } elsif( $line =~ /\(/ || $line !~ /{/ || $line =~ /^\s*#define/ ) { + $_ = $line; + s/^\s*\s?//; + s/\/\/.*//; + s/\/\*.*?\*\///gs; + formatNote( $_, $buff ); + $head = undef; + $buff = undef; + } else { + $head = $line; + $struct = $sdepth = 1; + } + } elsif( ( $buff ) = ( $line =~ /\/\*\*\*(.*)\*\*\*\// ) ) { + $buff =~ s/^\s?//; + print OUT "$buff\n\n"; + $buff = undef; + } elsif( ( $buff ) = ( $line =~ /^\s*\/\*\*(.*)\*\*\// ) ) { + $buff =~ s/^\s*//; + $buff .= "\n"; + } elsif( ( $head, $buff ) = ( $line =~ /^(.*)\/\*\*(.*)\*\*\// ) ) { + $buff =~ s/^\s*//; + $buff =~ s/\s*$//; + if( $head =~ /\(/ || $head !~ /{/ || $head =~/}/ ) { + $head =~ s/^\s*//; + $head =~ s/\/\*.*?\*\///gs; + formatNote( $head, $buff ); + $head = undef; + $buff = undef; + } else { + $struct = $sdepth = 1; + } + } elsif( $line =~ /\/\*\*\*/ ) { + $verbatim = 1; + } elsif( $line =~ /\/\*\*/ ) { + $active = 1; + } + } + } + close FILE; +} + +my $line; +while( defined( $line = ) ) { + chomp $line; + my $prefixes; + if( my( $fname, $prefixes ) = ( $line =~ /^!!\s*(\S+)(.*)/ ) ) { + $fname = "$basedir/$fname" if( ( $fname !~ /^\// ) && defined $basedir ); + process( $fname, [ ( map( { + my( $result ) = /^\s*(.*\S)\s*$/; + $result; + } ( split /,/, $prefixes ) ) ) ] ); + push @deps, $fname; + } else { + print OUT "$line\n"; + } +} + +if( defined $depname ) { + open DEP, ">>$depname" or die "Could not write dep file $depname ($!)\n"; + print DEP "$outname:"; + print DEP " $_" foreach( @deps ); + print DEP "\n"; + if( $hasdump ) { + print DEP "$defdump:"; + print DEP " $_" foreach( @deps ); + print DEP "\n"; + } + close DEP; +} + +close IN; +close OUT; +close DUMP; diff --git a/libucw/build/genconf b/libucw/build/genconf new file mode 100755 index 0000000..30e8896 --- /dev/null +++ b/libucw/build/genconf @@ -0,0 +1,84 @@ +#!/usr/bin/perl +# Configuration file and script preprocessor +# (c) 2004--2007 Martin Mares + +use strict; +use warnings; + +@ARGV == 3 or die "Usage: genconf "; + +open CF, $ARGV[2] or die "Unable to open $ARGV[2]"; +my %options = (); +my %vars = (); +sub opt { + my ($k,$v) = @_; + $vars{$k} = $v; + $options{$k} = 1 if ($k =~ /^CONFIG_/); +} +foreach my $k (keys %ENV) { + opt($k, $ENV{$k}); +} +while () { + chomp; + if (my ($k,$v) = /^(\w+)=(.*)/) { + $v =~ s/\s+$//; + opt($k, $v); + } +} +close CF; + +sub eval_expr { + $_ = shift @_; + s/\b(CONFIG_\w+)\b/defined($options{$1}) ? 1 : 0/ge; + return eval $_; +} + +open IN, $ARGV[0] or die "Unable to open $ARGV[0]"; +open OUT, ">$ARGV[1]" or die "Unable to create $ARGV[1]"; +my @ifs = (); # stack of conditions, 1=satisfied, -1=unsatisfied, 0=shadowed +my $empty = 0; # last line was empty +while () { + if (/^#ifdef\s+(\w+)/) { + push @ifs, (@ifs && $ifs[$#ifs] <= 0) ? 0 : (defined $options{$1}) ? 1 : -1; + } elsif (/^#ifndef\s+(\w+)/) { + push @ifs, (@ifs && $ifs[$#ifs] <= 0) ? 0 : (defined $options{$1}) ? -1 : 1; + } elsif (/^#if\s(.*)$/) { + push @ifs, (@ifs && $ifs[$#ifs] <= 0) ? 0 : (eval_expr $1) ? 1 : -1; + } elsif (/^#endif/) { + defined pop @ifs || die "Improper nesting of conditionals"; + } elsif (/^#else/) { + my $x = pop @ifs; + defined $x || die "Improper nesting of conditionals"; + push @ifs, $x >= 0 ? 0 : 1; + } elsif (/^#elsif\s(.*)$/) { + my $x = pop @ifs; + defined $x || die "Improper nesting of conditionals"; + push @ifs, $x >= 0 ? 0 : (eval_expr $1) ? 1 : -1; + } else { + @ifs && $ifs[$#ifs] <= 0 && next; + if (/^$/) { + $empty && next; + $empty = 1; + } else { $empty = 0; } + if (/^#pipe\s+(.+)/) { + my $cmd = $1; + my $val = `$cmd`; + die "Piped command '$cmd' failed" if $?; + print OUT `$1`; + } else { + sub repl($); + sub repl($) { + my $v = shift @_; + exists $vars{$v} or die "Cannot substitute $v: variable not set"; + my $x = $vars{$v}; + while ($x =~ s/\$\((\w+)\)/repl($1)/ge) { } + return $x; + } + s/@(\w+)@/repl($1)/ge; + print OUT; + } + } +} +@ifs && die "Unterminated #ifdef"; +close IN; +close OUT; diff --git a/libucw/build/genhash.c b/libucw/build/genhash.c new file mode 100644 index 0000000..6d5fe33 --- /dev/null +++ b/libucw/build/genhash.c @@ -0,0 +1,153 @@ +/* + * Generator of Word Recognition Hash Tables + * (a.k.a. simple gperf replacement) + * + * (c) 1999 Martin Mares + */ + +#include +#include +#include +#include + +struct word { + struct word *next; + char *w; + char *extra; +}; + +static unsigned int hash(char *c) +{ + unsigned int h = 0; + while (*c) + h = (h * 37) + *c++; + return h; +} + +static int /* Sequential search */ +fast_isprime(unsigned x) /* We know x != 2 && x != 3 */ +{ + unsigned test = 5; + + for(;;) + { + if (!(x % test)) + return 0; + if (x / test <= test) + return 1; + test += 2; /* 6k+1 */ + if (!(x % test)) + return 0; + if (x / test <= test) + return 1; + test += 4; /* 6k-1 */ + } +} + +static unsigned int +nextprime(unsigned x) /* Returns some prime greater than X */ +{ + if (x <= 5) + return 5; + x += 5 - (x % 6); /* x is 6k-1 */ + for(;;) + { + if (fast_isprime(x)) + return x; + x += 2; /* 6k+1 */ + if (fast_isprime(x)) + return x; + x += 4; /* 6k-1 */ + } +} + +int main(int argc, char **argv) +{ + FILE *fi, *fo; + struct word *words = NULL; + struct word *w, **ht; + char buf[1024], *c, namebuf[256]; + int cnt = 0; + int skip, i, size; + + if (argc != 4) + { + fprintf(stderr, "Usage: genhash \n"); + return 1; + } + fi = fopen(argv[1], "r"); + if (!fi) { fprintf(stderr, "Cannot open input file: %m\n"); return 1; } + fo = fopen(argv[2], "w"); + if (!fo) { fprintf(stderr, "Cannot open output file: %m\n"); return 1; } + + buf[0] = 0; + fgets(buf, sizeof(buf)-1, fi); + if (strncmp(buf, "%{", 2)) { fprintf(stderr, "Syntax error at <%s>\n", buf); return 1; } + fputs(buf+2, fo); + while (fgets(buf, sizeof(buf)-1, fi) && strcmp(buf, "%}\n")) + fputs(buf, fo); + fgets(namebuf, sizeof(namebuf)-1, fi); + if (strncmp(namebuf, "struct ", 7) || !(c = strchr(namebuf+7, ' '))) + { fprintf(stderr, "Syntax error at <%s>\n", namebuf); return 1; } + *c = 0; + while (fgets(buf, sizeof(buf)-1, fi) && strcmp(buf, "%%\n")) + ; + while (fgets(buf, sizeof(buf)-1, fi)) + { + c = strchr(buf, '\n'); + if (c) + *c = 0; + c = strchr(buf, ','); + w = alloca(sizeof(struct word)); + if (c) + *c++ = 0; + else + { fprintf(stderr, "No comma?\n"); return 1; } + w->w = alloca(strlen(buf)+1); + strcpy(w->w, buf); + w->extra = alloca(strlen(c)+1); + strcpy(w->extra, c); + w->next = words; + words = w; + cnt++; + } + cnt = cnt*12/10; + size = 16; + while (size < cnt) + size += size; + skip = nextprime(size*3/4); + + ht = alloca(size * sizeof(struct word *)); + bzero(ht, size * sizeof(struct word *)); + for(w=words; w; w=w->next) + { + int h = hash(w->w) & (size - 1); + while (ht[h]) + h = (h + skip) & (size - 1); + ht[h] = w; + } + + fprintf(fo, "static %s htable[] = {\n", namebuf); + for(i=0; iw, ht[i]->extra); + else + fprintf(fo, "{ NULL },\n"); + fprintf(fo, "};\n\nconst %s *%s(const char *x, unsigned int len)\n\ +{\n\ + const char *c = x;\n\ + unsigned int h = 0;\n\ + while (*c)\n\ + h = (h * 37) + *c++;\n\ + h = h & %d;\n\ + while (htable[h].name)\n\ + {\n\ + if (!strcmp(htable[h].name, x))\n\ + return &htable[h];\n\ + h = (h + %d) & %d;\n\ + }\n\ + return NULL;\n\ +}\n", namebuf, argv[3], size-1, skip, size-1); + + return 0; +} diff --git a/libucw/build/git/pre-commit b/libucw/build/git/pre-commit new file mode 100755 index 0000000..8153c35 --- /dev/null +++ b/libucw/build/git/pre-commit @@ -0,0 +1,80 @@ +#!/usr/bin/perl + +my $found_bad = 0; +my $filename; +my $reported_filename = ""; +my $lineno; + +if (scalar @ARGV > 0) { + my $f; + foreach $f (@ARGV) { + check_file($f); + } +} else { + exit(0) if (system("git-rev-parse --verify HEAD 2>/dev/null")); + open(PATCH, "git-diff-index -p -M --cached HEAD --|") || + die("git-diff-index failed"); + while () { + check_file($1) if (m|^diff --git a/(.*\.[ch]) b/\1$|); + } + close(PATCH); + +} +exit($found_bad); + +sub bad_line { + my ($why, $line) = @_; + if (!$found_bad) { + print STDERR "*\n"; + print STDERR "* You have some suspicious patch lines:\n"; + print STDERR "*\n"; + $found_bad = 1; + } + if ($reported_filename ne $filename) { + print STDERR "* In $filename\n"; + $reported_filename = $filename; + } + print STDERR "* \t$why\n"; + print STDERR "$lineno:$line\n" if ($line); +} + + +sub check_file { + ($filename) = @_; + open(IN, "$filename") || die ("Cannot open $filename"); + my $has_loc = 0; + my $has_glob = 0; + my $has_copy = 0; + my $empty = 0; + + $lineno = 1; + while() { + chomp; + if (/^\s*\#include\s+"/) { + bad_line("sherlock includes after global includes", $_) if (!$has_loc && $has_glob); + $has_loc++; + } + if (/\s$/) { + bad_line("trailing whitespace", $_); + } + if (/^\s* \t/) { + bad_line("indent SP followed by a TAB", $_); + } + if (/^\s*\#define\s+LOCAL_DEBUG/) { + bad_line("LOCAL_DEBUG left enabled", $_); + } + if (/^([<>])\1{6} |^={7}$/) { + bad_line("unresolved merge conflict", $_); + } + + $has_glob++ if (/^\s*\#include\s+\ + +set -e +SRC=$1 +DEST=$2 +shift 2 +while [ -n "$1" ] ; do + if [ ! -f "$DEST/$1" -o "$SRC/$1" -nt "$DEST/$1" ] ; then + echo "INC $SRC/$1 -> $DEST/$1" + mkdir -p $DEST/`dirname $1` + sed -e 's/^\(#include[ ]*\)"\(.*\)"/\1<\2>/' <$SRC/$1 >$DEST/$1 + fi + shift +done diff --git a/libucw/build/lib-deps b/libucw/build/lib-deps new file mode 100755 index 0000000..1019bc2 --- /dev/null +++ b/libucw/build/lib-deps @@ -0,0 +1,22 @@ +#!/bin/bash +# +# A tool which builds a list of dependent libraries from the list +# of pkg-config files. +# +# (c) 2007 Martin Mares , placed under GNU LGPL +# + +set -e + +shift +SEEN= +while [ -n "$1" ] ; do + case "$1" in + *.pc) if [ -n "$SEEN" ] ; then echo -n ", " ; fi + echo -n "`basename $1 .pc`" + SEEN=1 + ;; + *) ;; + esac + shift +done diff --git a/libucw/build/lib-flags b/libucw/build/lib-flags new file mode 100755 index 0000000..29bb4e8 --- /dev/null +++ b/libucw/build/lib-flags @@ -0,0 +1,24 @@ +#!/bin/bash +# +# A preprocessor for linker arguments, which replaces references to .pc +# files by results of the proper calls to pkg-config. +# +# (c) 2007 Martin Mares , placed under GNU LGPL +# + +set -e + +PC= +while [ -n "$1" ] ; do + case "$1" in + *.pc) PC="$PC `basename $1 .pc`" + ;; + *) echo -n " $1" + ;; + esac + shift +done +if [ -n "$PC" ] ; then + echo -n " " + PKG_CONFIG_PATH="$PKG_CONFIG_PATH:obj/pkgconfig" pkg-config $PKG_CONFIG_OPTS --libs $PC +fi diff --git a/libucw/build/mergedeps b/libucw/build/mergedeps new file mode 100755 index 0000000..e1c467d --- /dev/null +++ b/libucw/build/mergedeps @@ -0,0 +1,21 @@ +#!/usr/bin/perl + +@ARGV == 2 or die "Usage: mergedeps "; +foreach $a (@ARGV) { + open F, "$a" or next; + $t = ""; + while () { + $t .= $_; + if (! /\\$/) { + ($t =~ /^(.*):/) || die "Parse error at $t"; + $rules{$1} = $t; + $t = ""; + } + } + close F; +} +open(F,">" . $ARGV[0]) || die "Unable to write output file"; +foreach $a (sort keys %rules) { + print F $rules{$a}; +} +close F; diff --git a/libucw/build/tester b/libucw/build/tester new file mode 100755 index 0000000..da45237 --- /dev/null +++ b/libucw/build/tester @@ -0,0 +1,173 @@ +#!/usr/bin/perl +# A simple unit testing script +# (c) 2004--2013 Martin Mares +# (c) 2007 Pavel Charvat + +# Tests in the test file have a syntax similar to mail headers, +# individual test case are separated by blank lines and they can contain +# the following fields: +# +# Name: name of the case (default: sequence number since start of file) +# Run: command to run (default: command from the previous test case) +# This can be an arbitrary shell pipeline, sequences $0 to $9 are +# replaced by file names of In or Out files (see below). +# In: lines to pass to the program as standard input +# Out: lines to expect at the program's standard output +# Err: lines to expect at the program's standard error output +# In: lines to pass to the program as input file +# Out: lines to expect from the program in output file +# Both In and Out can be specified simultaneously if we +# are testing a program which modifies some of its input files. +# Exit: expected exit code of the program (default: 0) +# +# A value of a field can be optionally given as a shell-style here-document: +# +# In < \$verbose, + "rundir=s" => \$rundir) + or die "Usage: tester [--verbose] [--rundir=] \n"; + +my @tests = (); +my $tt; +my $append_to; + +while (<>) { + /^#/ && next; + if (/^\s*$/) { + $tt = undef; + $append_to = undef; + } elsif (defined($append_to) && /^\s+(.*)$/) { + $$append_to .= "\n$1"; + } elsif (my ($n,$v) = /^(\w+):\s+(.*)$/) { + if (!$tt) { + $tt = {}; + push @tests, $tt; + } + ($tt->{$n}) && die "$n already defined"; + $tt->{$n} = $v; + $append_to = \($tt->{$n}); + } elsif (my ($n,$sep) = /^(\w+)\s*<<(\w+)\s*$/) { + if (!$tt) { + $tt = {}; + push @tests, $tt; + } + ($tt->{$n}) && die "$n already defined"; + $tt->{$n} = ""; + $sep .= "\n"; + while (1) { + my $line = <>; + defined $line or die "Here-document not terminated"; + last if $line eq $sep; + $tt->{$n} .= $line; + } + chomp $tt->{$n}; + } else { + die "Test script syntax error"; + } +} + +if (! -d "$rundir/tmp") { + mkdir "$rundir/tmp" or die "Unable to create $rundir/tmp: $!"; +} + +my $i = 0; +my $errors = 0; +my $prev_run = undef; +TEST: foreach $tt (@tests) { + $i++; + my $name = $tt->{'Name'}; + printf "Test %03d", $i; + print " [$name]" if defined $name; + print ": "; + $run = ($tt->{'Run'} || $prev_run) or die "Don't know what to run"; + $prev_run = $run; + + my @out_files = (); + my @out_checks = (); + my $redirs = ""; + + if (defined $tt->{'In'}) { + my $ifi = "tmp/test$i.in"; + open X, ">$rundir/$ifi" or die "Unable to create $ifi"; + print X $tt->{'In'}, "\n"; + close X; + $redirs .= " <$ifi"; + } else { + $redirs .= " {'Out'}) { + my $ofi = "tmp/test$i.out"; + unlink "$rundir/$ofi"; + $redirs .= " >$ofi"; + push @out_files, $ofi; + push @out_checks, $tt->{'Out'}; + } else { + $redirs .= " >/dev/null"; + } + if (defined $tt->{'Err'}) { + my $efi = "tmp/test$i.err"; + unlink "$rundir/$efi"; + $redirs .= " 2>$efi"; + push @out_files, $efi; + push @out_checks, $tt->{'Err'}; + } + foreach my $arg (0..9) { + my $f = "tmp/test$i.$arg"; + if (defined $tt->{"Out$arg"}) { + unlink "$rundir/$f"; + push @out_files, $f; + push @out_checks, $tt->{"Out$arg"}; + } + if (defined $tt->{"In$arg"}) { + open X, ">$rundir/$f" or die "Unable to create $f"; + print X $tt->{"In$arg"}, "\n"; + close X; + } + } + $run =~ s/\$(\d)/tmp\/test$i.$1/g; + print "(running $run) " if $verbose; + system "cd $rundir && ( $run ) $redirs"; + if ($? % 256) { + print "FAILED with status code $?\n"; + $errors++; + next; + } + my $ec = $? / 256; + my $expect_ec = $tt->{'Exit'} || 0; + if ($ec != $expect_ec) { + print "FAILED: unexpected exit code $ec\n"; + $errors++; + next; + } + + for (my $i=0; $i<=$#out_files; $i++) { + my $ofi = $out_files[$i]; + open X, "<$rundir/$ofi" or die "Unable to read $ofi"; + my $out; + { + local $/ = undef; + $out = ; + } + close X; + $out =~ /\n$/s or $out .= "\n"; + if ($out ne $out_checks[$i] . "\n") { + print "FAILED (see $ofi)\n"; + $errors++; + next TEST; + } + } + + system "rm -f $rundir/tmp/test$i.*"; + print "OK\n"; +} + +exit !!$errors; diff --git a/libucw/charset/Makefile b/libucw/charset/Makefile new file mode 100644 index 0000000..8e351ff --- /dev/null +++ b/libucw/charset/Makefile @@ -0,0 +1,59 @@ +# Makefile for the UCW Charset Library (c) 1997--2007 Martin Mares + +DIRS+=charset + +LIBCHARSET_PROGS= +LIBCHARSET_MODS=toupper tolower tocat toligatures unaccent charconv setnames fb-charconv stk-charconv mp-charconv +LIBCHARSET_INCLUDES=charconv.h unicat.h fb-charconv.h stk-charconv.h mp-charconv.h +LIBCHARSET_DEPS=$(LIBUCW) + +$(o)/charset/libucw-charset$(LV).a: $(addsuffix .o,$(addprefix $(o)/charset/,$(LIBCHARSET_MODS))) +$(o)/charset/libucw-charset$(LV).so: $(addsuffix .oo,$(addprefix $(o)/charset/,$(LIBCHARSET_MODS))) $(LIBCHARSET_DEPS) +$(o)/charset/libucw-charset$(LV).so: SONAME_SUFFIX=.0 +$(o)/charset/libucw-charset.pc: $(LIBCHARSET_DEPS) + +ifdef CONFIG_INSTALL_API +$(o)/charset/libucw-charset.pc: $(addprefix $(o)/charset/libucw-charset$(LV),.a .so) +endif + +API_LIBS+=libucw-charset +API_INCLUDES+=$(o)/charset/.include-stamp +$(o)/charset/.include-stamp: $(addprefix $(s)/charset/,$(LIBCHARSET_INCLUDES)) +$(o)/charset/.include-stamp: IDST=charset +run/lib/pkgconfig/libucw-charset.pc: $(o)/charset/libucw-charset.pc + +ifdef CONFIG_CHARSET_UTILS +LIBCHARSET_PROGS+=$(o)/charset/ucw-cs2cs +endif + +$(o)/charset/ucw-cs2cs: $(o)/charset/ucw-cs2cs.o $(LIBCHARSET) $(LIBUCW) + +PROGS+=$(LIBCHARSET_PROGS) + +build_charsets: + cd $(s)/charset && sh misc/generate + +clean:: + rm -f $(s)/charset/misc/u-* + +INSTALL_TARGETS+=install-libucw-charset-lib +install-libucw-charset-lib: + install -d -m 755 $(DESTDIR)$(INSTALL_LIB_DIR) + install -m 644 run/lib/libucw-charset$(LV).so.0 $(DESTDIR)$(INSTALL_LIB_DIR)/libucw-charset$(LV).so.0.0 + ln -sf libucw-charset$(LV).so.0.0 $(DESTDIR)$(INSTALL_LIB_DIR)/libucw-charset$(LV).so.0 +.PHONY: install-libucw-charset-lib + +INSTALL_TARGETS+=install-libucw-charset-api +install-libucw-charset-api: + install -d -m 755 $(DESTDIR)$(INSTALL_INCLUDE_DIR)/charset $(DESTDIR)$(INSTALL_LIB_DIR) $(DESTDIR)$(INSTALL_PKGCONFIG_DIR) + install -m 644 $(addprefix run/include/charset/,$(LIBCHARSET_INCLUDES)) $(DESTDIR)$(INSTALL_INCLUDE_DIR)/charset + install -m 644 run/lib/pkgconfig/libucw-charset.pc $(DESTDIR)$(INSTALL_PKGCONFIG_DIR) + ln -sf libucw-charset$(LV).so.0.0 $(DESTDIR)$(INSTALL_LIB_DIR)/libucw-charset$(LV).so + install -m 644 run/lib/libucw-charset$(LV).a $(DESTDIR)$(INSTALL_LIB_DIR) +.PHONY: install-libucw-charset-api + +INSTALL_TARGETS+=install-libucw-charset-utils +install-libucw-charset-utils: + install -d -m 755 $(DESTDIR)$(INSTALL_BIN_DIR) + install -m 755 $(LIBCHARSET_PROGS) $(DESTDIR)$(INSTALL_BIN_DIR) +.PHONY: install-libucw-charset-utils diff --git a/libucw/charset/U-cat.h b/libucw/charset/U-cat.h new file mode 100644 index 0000000..bcfe99b --- /dev/null +++ b/libucw/charset/U-cat.h @@ -0,0 +1,1084 @@ +/* Generated automatically by gentab. Please don't edit. */ + +static const byte _U_cat_00[256] = { +_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL, +_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL, +_U_SPACE,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_DIGIT,_U_DIGIT,_U_DIGIT,_U_DIGIT,_U_DIGIT,_U_DIGIT,_U_DIGIT,_U_DIGIT,_U_DIGIT,_U_DIGIT,0,0,0,0,0,0, +0,_U_LUPPER|_U_XDIGIT,_U_LUPPER|_U_XDIGIT,_U_LUPPER|_U_XDIGIT,_U_LUPPER|_U_XDIGIT,_U_LUPPER|_U_XDIGIT,_U_LUPPER|_U_XDIGIT,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER, +_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,0,0,0,0,0, +0,_U_LLOWER|_U_XDIGIT,_U_LLOWER|_U_XDIGIT,_U_LLOWER|_U_XDIGIT,_U_LLOWER|_U_XDIGIT,_U_LLOWER|_U_XDIGIT,_U_LLOWER|_U_XDIGIT,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,0,0,0,0,_U_CTRL, +_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL, +_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL, +_U_SPACE,0,0,0,0,0,0,0,0,0,_U_LLOWER,0,0,_U_CTRL,0,0, +0,0,0,0,0,_U_LLOWER,0,0,0,0,_U_LLOWER,0,0,0,0,0, +_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER, +_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,0,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LLOWER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,0,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER +}; + +static const byte _U_cat_01[256] = { +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LIGATURE,_U_LIGATURE,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER, +_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LLOWER, +_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LLOWER,_U_LLOWER,_U_LUPPER,_U_LUPPER, +_U_LUPPER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LLOWER,_U_LUPPER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LUPPER, +_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LLOWER,_U_LLOWER,_U_LETTER,_U_LUPPER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LUPPER,_U_LETTER,_U_LLOWER,_U_LUPPER,_U_LETTER,_U_LLOWER,_U_LUPPER,_U_LETTER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER, +_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LLOWER,_U_LUPPER,_U_LETTER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER +}; + +static const byte _U_cat_02[256] = { +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,_U_LETTER,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_03[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,_U_LETTER,0,0,0,0,0, +0,0,0,0,0,0,_U_LUPPER,0,_U_LUPPER,_U_LUPPER,_U_LUPPER,0,_U_LUPPER,0,_U_LUPPER,_U_LUPPER, +_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER, +_U_LUPPER,_U_LUPPER,0,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,0, +_U_LLOWER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LUPPER,_U_LLOWER,0,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LLOWER,0,0,0,0 +}; + +static const byte _U_cat_04[256] = { +_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER, +_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER, +_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,0,0,0,0,0,0,0,0,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,0, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,0,0,_U_LUPPER,_U_LLOWER,0,0,0,0,0,0 +}; + +static const byte _U_cat_05[256] = { +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER, +_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER, +_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,0,0,_U_LETTER,0,0,0,0,0,0, +0,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LIGATURE,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_06[256] = { +_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,_U_LETTER,_U_LETTER, +0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,0,0,0,0,0,0,0,_U_CTRL,0,0, +0,0,0,0,0,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,_U_LETTER,_U_LETTER, +0,0,0,0,0,0,0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,0,0,_U_LETTER +}; + +static const byte _U_cat_07[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,_U_CTRL, +_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0, +0,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_09[256] = { +0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,_U_LETTER,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,0,0,0,0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,_U_LETTER, +_U_LETTER,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,0,_U_LETTER,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,_U_LETTER,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,_U_LETTER,_U_LETTER,0,_U_LETTER, +_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_0A[256] = { +0,0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,_U_LETTER, +_U_LETTER,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,0,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER, +_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,0,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,_U_LETTER,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_0B[256] = { +0,0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,_U_LETTER, +_U_LETTER,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,0,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,_U_LETTER,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,_U_LETTER,_U_LETTER,0,_U_LETTER, +_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,_U_LETTER,_U_LETTER, +_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,_U_LETTER,_U_LETTER,0,_U_LETTER,0,_U_LETTER,_U_LETTER, +0,0,0,_U_LETTER,_U_LETTER,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_0C[256] = { +0,0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER, +_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER, +_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,_U_LETTER,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,_U_LETTER,0, +_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_0D[256] = { +0,0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER, +_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_0E[256] = { +0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,0,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,_U_LETTER,_U_LETTER,0,_U_LETTER,0,0,_U_LETTER,_U_LETTER,0,_U_LETTER,0,0,_U_LETTER,0,0, +0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +0,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,0,_U_LETTER,0,0,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,0,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,_U_LETTER,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,_U_LETTER,_U_LETTER,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_0F[256] = { +_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_10[256] = { +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER, +_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER, +_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_11[256] = { +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0 +}; + +static const byte _U_cat_12[256] = { +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0, +_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0, +_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER +}; + +static const byte _U_cat_13[256] = { +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0, +_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_14[256] = { +0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER +}; + +static const byte _U_cat_15[256] = { +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER +}; + +static const byte _U_cat_16[256] = { +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0, +_U_SPACE,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_17[256] = { +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER, +_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_CTRL,_U_CTRL,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,_U_LETTER,0,0,0,0,_U_LETTER,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_18[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,_U_SPACE,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_19[256] = { +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_1D[256] = { +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_1E[256] = { +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,0,0,0,0, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER, +_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,_U_LUPPER,_U_LLOWER,0,0,0,0,0,0 +}; + +static const byte _U_cat_1F[256] = { +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,0,0,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,0,0, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,0,0,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,0,0, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,0,_U_LUPPER,0,_U_LUPPER,0,_U_LUPPER,0,_U_LUPPER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,0,0, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,0,_U_LLOWER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LETTER,0,_U_LLOWER,0, +0,0,_U_LLOWER,_U_LLOWER,_U_LLOWER,0,_U_LLOWER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LETTER,0,0,0, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,0,0,_U_LLOWER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,0,0,0,0, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,0,0,0, +0,0,_U_LLOWER,_U_LLOWER,_U_LLOWER,0,_U_LLOWER,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LETTER,0,0,0 +}; + +static const byte _U_cat_20[256] = { +_U_SPACE,_U_SPACE,_U_SPACE,_U_SPACE,_U_SPACE,_U_SPACE,_U_SPACE,_U_SPACE,_U_SPACE,_U_SPACE,_U_SPACE,_U_SPACE,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,_U_SPACE,_U_SPACE,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_SPACE, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,_U_SPACE, +_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,0,0,0,0,0,0,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL,_U_CTRL, +0,_U_LLOWER,0,0,0,0,0,0,0,0,0,0,0,0,0,_U_LLOWER, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_21[256] = { +0,0,_U_LUPPER,0,0,0,0,_U_LUPPER,0,0,_U_LLOWER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LLOWER,_U_LLOWER, +_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LLOWER,0,_U_LUPPER,0,0,0,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,0,0, +0,0,0,0,_U_LUPPER,0,_U_LUPPER,0,_U_LUPPER,0,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,0,_U_LLOWER, +_U_LUPPER,_U_LUPPER,0,_U_LUPPER,_U_LLOWER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LLOWER,0,0,0,_U_LLOWER,_U_LUPPER,_U_LUPPER, +0,0,0,0,0,_U_LUPPER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_30[256] = { +_U_SPACE,0,0,0,0,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,_U_LETTER,_U_LETTER,0,0,0, +0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER, +0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER +}; + +static const byte _U_cat_31[256] = { +0,0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0, +0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER +}; + +static const byte _U_cat_34[256] = { +_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_4D[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,_U_LETTER,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_4E[256] = { +_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_9F[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,_U_LETTER,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_A0[256] = { +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER +}; + +static const byte _U_cat_A1[256] = { +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER +}; + +static const byte _U_cat_A2[256] = { +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER +}; + +static const byte _U_cat_A3[256] = { +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER +}; + +static const byte _U_cat_A4[256] = { +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_AC[256] = { +_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_D7[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_D8[256] = { +_U_CTRL,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_DB[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,_U_CTRL, +_U_CTRL,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,_U_CTRL +}; + +static const byte _U_cat_DC[256] = { +_U_CTRL,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_DF[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,_U_CTRL +}; + +static const byte _U_cat_E0[256] = { +_U_CTRL,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_F8[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,_U_CTRL +}; + +static const byte _U_cat_F9[256] = { +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER +}; + +static const byte _U_cat_FA[256] = { +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const byte _U_cat_FB[256] = { +_U_LIGATURE,_U_LIGATURE,_U_LIGATURE,_U_LIGATURE,_U_LIGATURE,_U_LIGATURE,_U_LIGATURE,0,0,0,0,0,0,0,0,0, +0,0,0,_U_LIGATURE,_U_LIGATURE,_U_LIGATURE,_U_LIGATURE,_U_LIGATURE,0,0,0,0,0,_U_LETTER,0,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,0, +_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LIGATURE, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER +}; + +static const byte _U_cat_FC[256] = { +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER +}; + +static const byte _U_cat_FD[256] = { +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0,0 +}; + +static const byte _U_cat_FE[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,_U_CTRL +}; + +static const byte _U_cat_FF[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER, +_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,_U_LUPPER,0,0,0,0,0, +0,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER, +_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,_U_LLOWER,0,0,0,0,0, +0,0,0,0,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0, +0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER, +0,0,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,_U_LETTER,0,0,_U_LETTER,_U_LETTER,_U_LETTER,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,_U_CTRL,_U_CTRL,_U_CTRL,0,0,0,0 +}; + +const byte *_U_cat[256] = { +_U_cat_00,_U_cat_01,_U_cat_02,_U_cat_03,_U_cat_04,_U_cat_05,_U_cat_06,_U_cat_07,NULL,_U_cat_09,_U_cat_0A,_U_cat_0B,_U_cat_0C,_U_cat_0D,_U_cat_0E,_U_cat_0F, +_U_cat_10,_U_cat_11,_U_cat_12,_U_cat_13,_U_cat_14,_U_cat_15,_U_cat_16,_U_cat_17,_U_cat_18,_U_cat_19,NULL,NULL,NULL,_U_cat_1D,_U_cat_1E,_U_cat_1F, +_U_cat_20,_U_cat_21,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +_U_cat_30,_U_cat_31,NULL,NULL,_U_cat_34,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,_U_cat_4D,_U_cat_4E,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,_U_cat_9F, +_U_cat_A0,_U_cat_A1,_U_cat_A2,_U_cat_A3,_U_cat_A4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,_U_cat_AC,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,_U_cat_D7,_U_cat_D8,NULL,NULL,_U_cat_DB,_U_cat_DC,NULL,NULL,_U_cat_DF, +_U_cat_E0,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,_U_cat_F8,_U_cat_F9,_U_cat_FA,_U_cat_FB,_U_cat_FC,_U_cat_FD,_U_cat_FE,_U_cat_FF +}; diff --git a/libucw/charset/U-ligatures.h b/libucw/charset/U-ligatures.h new file mode 100644 index 0000000..667720d --- /dev/null +++ b/libucw/charset/U-ligatures.h @@ -0,0 +1,28 @@ +#define LIG_HASH_SIZE 24 + +static const u16 *_U_lig_hash[] = { + NULL, + NULL, + NULL, + /* FB13 */ (const u16 []) { 0x0574, 0x0576, 0 }, + /* FB14 */ (const u16 []) { 0x0574, 0x0565, 0 }, + /* FB15 */ (const u16 []) { 0x0574, 0x056B, 0 }, + /* FB16 */ (const u16 []) { 0x057E, 0x0576, 0 }, + /* FB17 */ (const u16 []) { 0x0574, 0x056D, 0 }, + /* FB00 */ (const u16 []) { 0x0066, 0x0066, 0 }, + /* FB01 */ (const u16 []) { 0x0066, 0x0069, 0 }, + /* FB02 */ (const u16 []) { 0x0066, 0x006C, 0 }, + /* FB03 */ (const u16 []) { 0x0066, 0x0066, 0x0069, 0 }, + /* FB04 */ (const u16 []) { 0x0066, 0x0066, 0x006C, 0 }, + /* FB05 */ (const u16 []) { 0x0073, 0x0074, 0 }, + /* FB06 */ (const u16 []) { 0x0073, 0x0074, 0 }, + /* FB4F */ (const u16 []) { 0x05D0, 0x05DC, 0 }, + NULL, + NULL, + /* 0132 */ (const u16 []) { 0x0049, 0x004A, 0 }, + /* 0133 */ (const u16 []) { 0x0069, 0x006A, 0 }, + NULL, + NULL, + NULL, + /* 0587 */ (const u16 []) { 0x0565, 0x0582, 0 }, +}; diff --git a/libucw/charset/U-lower.h b/libucw/charset/U-lower.h new file mode 100644 index 0000000..2e95fdc --- /dev/null +++ b/libucw/charset/U-lower.h @@ -0,0 +1,229 @@ +/* Generated automatically by gentab. Please don't edit. */ + +static const u16 _U_lower_00[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F, +0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,0x0078,0x0079,0x007A,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF, +0x00F0,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0,0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD,0x00FE,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const u16 _U_lower_01[256] = { +0x0101,0,0x0103,0,0x0105,0,0x0107,0,0x0109,0,0x010B,0,0x010D,0,0x010F,0, +0x0111,0,0x0113,0,0x0115,0,0x0117,0,0x0119,0,0x011B,0,0x011D,0,0x011F,0, +0x0121,0,0x0123,0,0x0125,0,0x0127,0,0x0129,0,0x012B,0,0x012D,0,0x012F,0, +0x0069,0,0x0133,0,0x0135,0,0x0137,0,0,0x013A,0,0x013C,0,0x013E,0,0x0140, +0,0x0142,0,0x0144,0,0x0146,0,0x0148,0,0,0x014B,0,0x014D,0,0x014F,0, +0x0151,0,0x0153,0,0x0155,0,0x0157,0,0x0159,0,0x015B,0,0x015D,0,0x015F,0, +0x0161,0,0x0163,0,0x0165,0,0x0167,0,0x0169,0,0x016B,0,0x016D,0,0x016F,0, +0x0171,0,0x0173,0,0x0175,0,0x0177,0,0x00FF,0x017A,0,0x017C,0,0x017E,0,0, +0,0x0253,0x0183,0,0x0185,0,0x0254,0x0188,0,0x0256,0x0257,0x018C,0,0,0x01DD,0x0259, +0x025B,0x0192,0,0x0260,0x0263,0,0x0269,0x0268,0x0199,0,0,0,0x026F,0x0272,0,0x0275, +0x01A1,0,0x01A3,0,0x01A5,0,0x0280,0x01A8,0,0x0283,0,0,0x01AD,0,0x0288,0x01B0, +0,0x028A,0x028B,0x01B4,0,0x01B6,0,0x0292,0x01B9,0,0,0,0x01BD,0,0,0, +0,0,0,0,0x01C6,0x01C6,0,0x01C9,0x01C9,0,0x01CC,0x01CC,0,0x01CE,0,0x01D0, +0,0x01D2,0,0x01D4,0,0x01D6,0,0x01D8,0,0x01DA,0,0x01DC,0,0,0x01DF,0, +0x01E1,0,0x01E3,0,0x01E5,0,0x01E7,0,0x01E9,0,0x01EB,0,0x01ED,0,0x01EF,0, +0,0x01F3,0x01F3,0,0x01F5,0,0x0195,0x01BF,0x01F9,0,0x01FB,0,0x01FD,0,0x01FF,0 +}; + +static const u16 _U_lower_02[256] = { +0x0201,0,0x0203,0,0x0205,0,0x0207,0,0x0209,0,0x020B,0,0x020D,0,0x020F,0, +0x0211,0,0x0213,0,0x0215,0,0x0217,0,0x0219,0,0x021B,0,0x021D,0,0x021F,0, +0x019E,0,0x0223,0,0x0225,0,0x0227,0,0x0229,0,0x022B,0,0x022D,0,0x022F,0, +0x0231,0,0x0233,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const u16 _U_lower_03[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0x03AC,0,0x03AD,0x03AE,0x03AF,0,0x03CC,0,0x03CD,0x03CE, +0,0x03B1,0x03B2,0x03B3,0x03B4,0x03B5,0x03B6,0x03B7,0x03B8,0x03B9,0x03BA,0x03BB,0x03BC,0x03BD,0x03BE,0x03BF, +0x03C0,0x03C1,0,0x03C3,0x03C4,0x03C5,0x03C6,0x03C7,0x03C8,0x03C9,0x03CA,0x03CB,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x03D9,0,0x03DB,0,0x03DD,0,0x03DF,0, +0x03E1,0,0x03E3,0,0x03E5,0,0x03E7,0,0x03E9,0,0x03EB,0,0x03ED,0,0x03EF,0, +0,0,0,0,0x03B8,0,0,0x03F8,0,0x03F2,0x03FB,0,0,0,0,0 +}; + +static const u16 _U_lower_04[256] = { +0x0450,0x0451,0x0452,0x0453,0x0454,0x0455,0x0456,0x0457,0x0458,0x0459,0x045A,0x045B,0x045C,0x045D,0x045E,0x045F, +0x0430,0x0431,0x0432,0x0433,0x0434,0x0435,0x0436,0x0437,0x0438,0x0439,0x043A,0x043B,0x043C,0x043D,0x043E,0x043F, +0x0440,0x0441,0x0442,0x0443,0x0444,0x0445,0x0446,0x0447,0x0448,0x0449,0x044A,0x044B,0x044C,0x044D,0x044E,0x044F, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x0461,0,0x0463,0,0x0465,0,0x0467,0,0x0469,0,0x046B,0,0x046D,0,0x046F,0, +0x0471,0,0x0473,0,0x0475,0,0x0477,0,0x0479,0,0x047B,0,0x047D,0,0x047F,0, +0x0481,0,0,0,0,0,0,0,0,0,0x048B,0,0x048D,0,0x048F,0, +0x0491,0,0x0493,0,0x0495,0,0x0497,0,0x0499,0,0x049B,0,0x049D,0,0x049F,0, +0x04A1,0,0x04A3,0,0x04A5,0,0x04A7,0,0x04A9,0,0x04AB,0,0x04AD,0,0x04AF,0, +0x04B1,0,0x04B3,0,0x04B5,0,0x04B7,0,0x04B9,0,0x04BB,0,0x04BD,0,0x04BF,0, +0,0x04C2,0,0x04C4,0,0x04C6,0,0x04C8,0,0x04CA,0,0x04CC,0,0x04CE,0,0, +0x04D1,0,0x04D3,0,0x04D5,0,0x04D7,0,0x04D9,0,0x04DB,0,0x04DD,0,0x04DF,0, +0x04E1,0,0x04E3,0,0x04E5,0,0x04E7,0,0x04E9,0,0x04EB,0,0x04ED,0,0x04EF,0, +0x04F1,0,0x04F3,0,0x04F5,0,0,0,0x04F9,0,0,0,0,0,0,0 +}; + +static const u16 _U_lower_05[256] = { +0x0501,0,0x0503,0,0x0505,0,0x0507,0,0x0509,0,0x050B,0,0x050D,0,0x050F,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0x0561,0x0562,0x0563,0x0564,0x0565,0x0566,0x0567,0x0568,0x0569,0x056A,0x056B,0x056C,0x056D,0x056E,0x056F, +0x0570,0x0571,0x0572,0x0573,0x0574,0x0575,0x0576,0x0577,0x0578,0x0579,0x057A,0x057B,0x057C,0x057D,0x057E,0x057F, +0x0580,0x0581,0x0582,0x0583,0x0584,0x0585,0x0586,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const u16 _U_lower_1E[256] = { +0x1E01,0,0x1E03,0,0x1E05,0,0x1E07,0,0x1E09,0,0x1E0B,0,0x1E0D,0,0x1E0F,0, +0x1E11,0,0x1E13,0,0x1E15,0,0x1E17,0,0x1E19,0,0x1E1B,0,0x1E1D,0,0x1E1F,0, +0x1E21,0,0x1E23,0,0x1E25,0,0x1E27,0,0x1E29,0,0x1E2B,0,0x1E2D,0,0x1E2F,0, +0x1E31,0,0x1E33,0,0x1E35,0,0x1E37,0,0x1E39,0,0x1E3B,0,0x1E3D,0,0x1E3F,0, +0x1E41,0,0x1E43,0,0x1E45,0,0x1E47,0,0x1E49,0,0x1E4B,0,0x1E4D,0,0x1E4F,0, +0x1E51,0,0x1E53,0,0x1E55,0,0x1E57,0,0x1E59,0,0x1E5B,0,0x1E5D,0,0x1E5F,0, +0x1E61,0,0x1E63,0,0x1E65,0,0x1E67,0,0x1E69,0,0x1E6B,0,0x1E6D,0,0x1E6F,0, +0x1E71,0,0x1E73,0,0x1E75,0,0x1E77,0,0x1E79,0,0x1E7B,0,0x1E7D,0,0x1E7F,0, +0x1E81,0,0x1E83,0,0x1E85,0,0x1E87,0,0x1E89,0,0x1E8B,0,0x1E8D,0,0x1E8F,0, +0x1E91,0,0x1E93,0,0x1E95,0,0,0,0,0,0,0,0,0,0,0, +0x1EA1,0,0x1EA3,0,0x1EA5,0,0x1EA7,0,0x1EA9,0,0x1EAB,0,0x1EAD,0,0x1EAF,0, +0x1EB1,0,0x1EB3,0,0x1EB5,0,0x1EB7,0,0x1EB9,0,0x1EBB,0,0x1EBD,0,0x1EBF,0, +0x1EC1,0,0x1EC3,0,0x1EC5,0,0x1EC7,0,0x1EC9,0,0x1ECB,0,0x1ECD,0,0x1ECF,0, +0x1ED1,0,0x1ED3,0,0x1ED5,0,0x1ED7,0,0x1ED9,0,0x1EDB,0,0x1EDD,0,0x1EDF,0, +0x1EE1,0,0x1EE3,0,0x1EE5,0,0x1EE7,0,0x1EE9,0,0x1EEB,0,0x1EED,0,0x1EEF,0, +0x1EF1,0,0x1EF3,0,0x1EF5,0,0x1EF7,0,0x1EF9,0,0,0,0,0,0,0 +}; + +static const u16 _U_lower_1F[256] = { +0,0,0,0,0,0,0,0,0x1F00,0x1F01,0x1F02,0x1F03,0x1F04,0x1F05,0x1F06,0x1F07, +0,0,0,0,0,0,0,0,0x1F10,0x1F11,0x1F12,0x1F13,0x1F14,0x1F15,0,0, +0,0,0,0,0,0,0,0,0x1F20,0x1F21,0x1F22,0x1F23,0x1F24,0x1F25,0x1F26,0x1F27, +0,0,0,0,0,0,0,0,0x1F30,0x1F31,0x1F32,0x1F33,0x1F34,0x1F35,0x1F36,0x1F37, +0,0,0,0,0,0,0,0,0x1F40,0x1F41,0x1F42,0x1F43,0x1F44,0x1F45,0,0, +0,0,0,0,0,0,0,0,0,0x1F51,0,0x1F53,0,0x1F55,0,0x1F57, +0,0,0,0,0,0,0,0,0x1F60,0x1F61,0x1F62,0x1F63,0x1F64,0x1F65,0x1F66,0x1F67, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x1F80,0x1F81,0x1F82,0x1F83,0x1F84,0x1F85,0x1F86,0x1F87, +0,0,0,0,0,0,0,0,0x1F90,0x1F91,0x1F92,0x1F93,0x1F94,0x1F95,0x1F96,0x1F97, +0,0,0,0,0,0,0,0,0x1FA0,0x1FA1,0x1FA2,0x1FA3,0x1FA4,0x1FA5,0x1FA6,0x1FA7, +0,0,0,0,0,0,0,0,0x1FB0,0x1FB1,0x1F70,0x1F71,0x1FB3,0,0,0, +0,0,0,0,0,0,0,0,0x1F72,0x1F73,0x1F74,0x1F75,0x1FC3,0,0,0, +0,0,0,0,0,0,0,0,0x1FD0,0x1FD1,0x1F76,0x1F77,0,0,0,0, +0,0,0,0,0,0,0,0,0x1FE0,0x1FE1,0x1F7A,0x1F7B,0x1FE5,0,0,0, +0,0,0,0,0,0,0,0,0x1F78,0x1F79,0x1F7C,0x1F7D,0x1FF3,0,0,0 +}; + +static const u16 _U_lower_21[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0x03C9,0,0,0,0x006B,0x00E5,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x2170,0x2171,0x2172,0x2173,0x2174,0x2175,0x2176,0x2177,0x2178,0x2179,0x217A,0x217B,0x217C,0x217D,0x217E,0x217F, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const u16 _U_lower_24[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0x24D0,0x24D1,0x24D2,0x24D3,0x24D4,0x24D5,0x24D6,0x24D7,0x24D8,0x24D9, +0x24DA,0x24DB,0x24DC,0x24DD,0x24DE,0x24DF,0x24E0,0x24E1,0x24E2,0x24E3,0x24E4,0x24E5,0x24E6,0x24E7,0x24E8,0x24E9, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const u16 _U_lower_FF[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0xFF41,0xFF42,0xFF43,0xFF44,0xFF45,0xFF46,0xFF47,0xFF48,0xFF49,0xFF4A,0xFF4B,0xFF4C,0xFF4D,0xFF4E,0xFF4F, +0xFF50,0xFF51,0xFF52,0xFF53,0xFF54,0xFF55,0xFF56,0xFF57,0xFF58,0xFF59,0xFF5A,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +const u16 *_U_lower[256] = { +_U_lower_00,_U_lower_01,_U_lower_02,_U_lower_03,_U_lower_04,_U_lower_05,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,_U_lower_1E,_U_lower_1F, +NULL,_U_lower_21,NULL,NULL,_U_lower_24,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,_U_lower_FF +}; diff --git a/libucw/charset/U-unacc.h b/libucw/charset/U-unacc.h new file mode 100644 index 0000000..29b3222 --- /dev/null +++ b/libucw/charset/U-unacc.h @@ -0,0 +1,248 @@ +/* Generated automatically by gentab. Please don't edit. */ + +static const u16 _U_unaccent_00[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0x0061,0,0,0,0,0, +0,0,0,0,0,0x03BC,0,0,0,0,0x006F,0,0,0,0,0, +0x0041,0x0041,0x0041,0x0041,0x0041,0x0041,0,0x0043,0x0045,0x0045,0x0045,0x0045,0x0049,0x0049,0x0049,0x0049, +0,0x004E,0x004F,0x004F,0x004F,0x004F,0x004F,0,0,0x0055,0x0055,0x0055,0x0055,0x0059,0,0, +0x0061,0x0061,0x0061,0x0061,0x0061,0x0061,0,0x0063,0x0065,0x0065,0x0065,0x0065,0x0069,0x0069,0x0069,0x0069, +0,0x006E,0x006F,0x006F,0x006F,0x006F,0x006F,0,0,0x0075,0x0075,0x0075,0x0075,0x0079,0,0x0079 +}; + +static const u16 _U_unaccent_01[256] = { +0x0041,0x0061,0x0041,0x0061,0x0041,0x0061,0x0043,0x0063,0x0043,0x0063,0x0043,0x0063,0x0043,0x0063,0x0044,0x0064, +0x0044,0x0064,0x0045,0x0065,0x0045,0x0065,0x0045,0x0065,0x0045,0x0065,0x0045,0x0065,0x0047,0x0067,0x0047,0x0067, +0x0047,0x0067,0x0047,0x0067,0x0048,0x0068,0x0048,0x0068,0x0049,0x0069,0x0049,0x0069,0x0049,0x0069,0x0049,0x0069, +0x0049,0,0,0,0x004A,0x006A,0x004B,0x006B,0,0x004C,0x006C,0x004C,0x006C,0x004C,0x006C,0, +0,0x004c,0x006c,0x004E,0x006E,0x004E,0x006E,0x004E,0x006E,0,0,0,0x004F,0x006F,0x004F,0x006F, +0x004F,0x006F,0,0,0x0052,0x0072,0x0052,0x0072,0x0052,0x0072,0x0053,0x0073,0x0053,0x0073,0x0053,0x0073, +0x0053,0x0073,0x0054,0x0074,0x0054,0x0074,0x0054,0x0074,0x0055,0x0075,0x0055,0x0075,0x0055,0x0075,0x0055,0x0075, +0x0055,0x0075,0x0055,0x0075,0x0057,0x0077,0x0059,0x0079,0x0059,0x005A,0x007A,0x005A,0x007A,0x005A,0x007A,0x0073, +0x0042,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0x0049,0,0,0,0,0,0,0,0, +0x004F,0x006F,0,0,0,0,0,0,0,0,0,0,0,0,0,0x0055, +0x0075,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0x0041,0x0061,0x0049, +0x0069,0x004F,0x006F,0x0055,0x0075,0x00DC,0x00FC,0x00DC,0x00FC,0x00DC,0x00FC,0x00DC,0x00FC,0,0x00C4,0x00E4, +0x0226,0x0227,0x00C6,0x00E6,0,0,0x0047,0x0067,0x004B,0x006B,0x004F,0x006F,0x01EA,0x01EB,0x01B7,0x0292, +0x006A,0,0,0,0x0047,0x0067,0,0,0x004E,0x006E,0x00C5,0x00E5,0x00C6,0x00E6,0x00D8,0x00F8 +}; + +static const u16 _U_unaccent_02[256] = { +0x0041,0x0061,0x0041,0x0061,0x0045,0x0065,0x0045,0x0065,0x0049,0x0069,0x0049,0x0069,0x004F,0x006F,0x004F,0x006F, +0x0052,0x0072,0x0052,0x0072,0x0055,0x0075,0x0055,0x0075,0x0053,0x0073,0x0054,0x0074,0,0,0x0048,0x0068, +0,0,0,0,0,0,0x0041,0x0061,0x0045,0x0065,0x00D6,0x00F6,0x00D5,0x00F5,0x004F,0x006F, +0x022E,0x022F,0x0059,0x0079,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x0069,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x0068,0x0266,0x006A,0x0072,0x0279,0x027B,0x0281,0x0077,0x0079,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x0263,0x006C,0x0073,0x0078,0x0295,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const u16 _U_unaccent_03[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0x0391,0,0x0395,0x0397,0x0399,0,0x039F,0,0x03A5,0x03A9, +0x03CA,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0x0399,0x03A5,0x03B1,0x03B5,0x03B7,0x03B9, +0x03CB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0x03B9,0x03C5,0x03BF,0x03C5,0x03C9,0, +0x03B2,0x03B8,0x03A5,0x03D2,0x03D2,0x03C6,0x03C0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x03BA,0x03C1,0x03C2,0,0x0398,0x03B5,0,0,0,0x03A3,0,0,0,0,0,0 +}; + +static const u16 _U_unaccent_04[256] = { +0x0415,0x0415,0,0x0413,0,0,0,0x0406,0,0,0,0,0x041A,0x0418,0x0423,0, +0,0,0,0,0,0,0,0,0,0x0418,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0x0438,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x0435,0x0435,0,0x0433,0,0,0,0x0456,0,0,0,0,0x043A,0x0438,0x0443,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0x0474,0x0475,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0x0416,0x0436,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x0410,0x0430,0x0410,0x0430,0,0,0x0415,0x0435,0,0,0x04D8,0x04D9,0x0416,0x0436,0x0417,0x0437, +0,0,0x0418,0x0438,0x0418,0x0438,0x041E,0x043E,0,0,0x04E8,0x04E9,0x042D,0x044D,0x0423,0x0443, +0x0423,0x0443,0x0423,0x0443,0x0427,0x0447,0,0,0x042B,0x044B,0,0,0,0,0,0 +}; + +static const u16 _U_unaccent_1D[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x0041,0x00C6,0x0042,0, +0x0044,0x0045,0x018E,0x0047,0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0,0x004F,0x0222,0x0050,0x0052, +0x0054,0x0055,0x0057,0x0061,0x0250,0x0251,0x1D02,0x0062,0x0064,0x0065,0x0259,0x025B,0x025C,0x0067,0,0x006B, +0x006D,0x014B,0x006F,0x0254,0x1D16,0x1D17,0x0070,0x0074,0x0075,0x1D1D,0x026F,0x0076,0x1D25,0x03B2,0x03B3,0x03B4, +0x03C6,0x03C7,0x0069,0x0072,0x0075,0x0076,0x03B2,0x03B3,0x03C1,0x03C6,0x03C7,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const u16 _U_unaccent_1E[256] = { +0x0041,0x0061,0x0042,0x0062,0x0042,0x0062,0x0042,0x0062,0x00C7,0x00E7,0x0044,0x0064,0x0044,0x0064,0x0044,0x0064, +0x0044,0x0064,0x0044,0x0064,0x0112,0x0113,0x0112,0x0113,0x0045,0x0065,0x0045,0x0065,0x0228,0x0229,0x0046,0x0066, +0x0047,0x0067,0x0048,0x0068,0x0048,0x0068,0x0048,0x0068,0x0048,0x0068,0x0048,0x0068,0x0049,0x0069,0x00CF,0x00EF, +0x004B,0x006B,0x004B,0x006B,0x004B,0x006B,0x004C,0x006C,0x1E36,0x1E37,0x004C,0x006C,0x004C,0x006C,0x004D,0x006D, +0x004D,0x006D,0x004D,0x006D,0x004E,0x006E,0x004E,0x006E,0x004E,0x006E,0x004E,0x006E,0x00D5,0x00F5,0x00D5,0x00F5, +0x014C,0x014D,0x014C,0x014D,0x0050,0x0070,0x0050,0x0070,0x0052,0x0072,0x0052,0x0072,0x1E5A,0x1E5B,0x0052,0x0072, +0x0053,0x0073,0x0053,0x0073,0x015A,0x015B,0x0160,0x0161,0x1E62,0x1E63,0x0054,0x0074,0x0054,0x0074,0x0054,0x0074, +0x0054,0x0074,0x0055,0x0075,0x0055,0x0075,0x0055,0x0075,0x0168,0x0169,0x016A,0x016B,0x0056,0x0076,0x0056,0x0076, +0x0057,0x0077,0x0057,0x0077,0x0057,0x0077,0x0057,0x0077,0x0057,0x0077,0x0058,0x0078,0x0058,0x0078,0x0059,0x0079, +0x005A,0x007A,0x005A,0x007A,0x005A,0x007A,0x0068,0x0074,0x0077,0x0079,0,0x017F,0,0,0,0, +0x0041,0x0061,0x0041,0x0061,0x00C2,0x00E2,0x00C2,0x00E2,0x00C2,0x00E2,0x00C2,0x00E2,0x1EA0,0x1EA1,0x0102,0x0103, +0x0102,0x0103,0x0102,0x0103,0x0102,0x0103,0x1EA0,0x1EA1,0x0045,0x0065,0x0045,0x0065,0x0045,0x0065,0x00CA,0x00EA, +0x00CA,0x00EA,0x00CA,0x00EA,0x00CA,0x00EA,0x1EB8,0x1EB9,0x0049,0x0069,0x0049,0x0069,0x004F,0x006F,0x004F,0x006F, +0x00D4,0x00F4,0x00D4,0x00F4,0x00D4,0x00F4,0x00D4,0x00F4,0x1ECC,0x1ECD,0x01A0,0x01A1,0x01A0,0x01A1,0x01A0,0x01A1, +0x01A0,0x01A1,0x01A0,0x01A1,0x0055,0x0075,0x0055,0x0075,0x01AF,0x01B0,0x01AF,0x01B0,0x01AF,0x01B0,0x01AF,0x01B0, +0x01AF,0x01B0,0x0059,0x0079,0x0059,0x0079,0x0059,0x0079,0x0059,0x0079,0,0,0,0,0,0 +}; + +static const u16 _U_unaccent_1F[256] = { +0x03B1,0x03B1,0x1F00,0x1F01,0x1F00,0x1F01,0x1F00,0x1F01,0x0391,0x0391,0x1F08,0x1F09,0x1F08,0x1F09,0x1F08,0x1F09, +0x03B5,0x03B5,0x1F10,0x1F11,0x1F10,0x1F11,0,0,0x0395,0x0395,0x1F18,0x1F19,0x1F18,0x1F19,0,0, +0x03B7,0x03B7,0x1F20,0x1F21,0x1F20,0x1F21,0x1F20,0x1F21,0x0397,0x0397,0x1F28,0x1F29,0x1F28,0x1F29,0x1F28,0x1F29, +0x03B9,0x03B9,0x1F30,0x1F31,0x1F30,0x1F31,0x1F30,0x1F31,0x0399,0x0399,0x1F38,0x1F39,0x1F38,0x1F39,0x1F38,0x1F39, +0x03BF,0x03BF,0x1F40,0x1F41,0x1F40,0x1F41,0,0,0x039F,0x039F,0x1F48,0x1F49,0x1F48,0x1F49,0,0, +0x03C5,0x03C5,0x1F50,0x1F51,0x1F50,0x1F51,0x1F50,0x1F51,0,0x03A5,0,0x1F59,0,0x1F59,0,0x1F59, +0x03C9,0x03C9,0x1F60,0x1F61,0x1F60,0x1F61,0x1F60,0x1F61,0x03A9,0x03A9,0x1F68,0x1F69,0x1F68,0x1F69,0x1F68,0x1F69, +0x03B1,0x03AC,0x03B5,0x03AD,0x03B7,0x03AE,0x03B9,0x03AF,0x03BF,0x03CC,0x03C5,0x03CD,0x03C9,0x03CE,0,0, +0x1F00,0x1F01,0x1F02,0x1F03,0x1F04,0x1F05,0x1F06,0x1F07,0x1F08,0x1F09,0x1F0A,0x1F0B,0x1F0C,0x1F0D,0x1F0E,0x1F0F, +0x1F20,0x1F21,0x1F22,0x1F23,0x1F24,0x1F25,0x1F26,0x1F27,0x1F28,0x1F29,0x1F2A,0x1F2B,0x1F2C,0x1F2D,0x1F2E,0x1F2F, +0x1F60,0x1F61,0x1F62,0x1F63,0x1F64,0x1F65,0x1F66,0x1F67,0x1F68,0x1F69,0x1F6A,0x1F6B,0x1F6C,0x1F6D,0x1F6E,0x1F6F, +0x03B1,0x03B1,0x1F70,0x03B1,0x03AC,0,0x03B1,0x1FB6,0x0391,0x0391,0x0391,0x0386,0x0391,0,0x03B9,0, +0,0,0x1F74,0x03B7,0x03AE,0,0x03B7,0x1FC6,0x0395,0x0388,0x0397,0x0389,0x0397,0,0,0, +0x03B9,0x03B9,0x03CA,0x0390,0,0,0x03B9,0x03CA,0x0399,0x0399,0x0399,0x038A,0,0,0,0, +0x03C5,0x03C5,0x03CB,0x03B0,0x03C1,0x03C1,0x03C5,0x03CB,0x03A5,0x03A5,0x03A5,0x038E,0x03A1,0,0,0, +0,0,0x1F7C,0x03C9,0x03CE,0,0x03C9,0x1FF6,0x039F,0x038C,0x03A9,0x038F,0x03A9,0,0,0 +}; + +static const u16 _U_unaccent_20[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0x0069,0,0,0,0,0,0,0,0,0,0,0,0,0,0x006E, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const u16 _U_unaccent_21[256] = { +0,0,0x0043,0,0,0,0,0x0190,0,0,0x0067,0x0048,0x0048,0x0048,0x0068,0x0127, +0x0049,0x0049,0x004C,0x006C,0,0x004E,0,0,0,0x0050,0x0051,0x0052,0x0052,0x0052,0,0, +0,0,0,0,0x005A,0,0x03A9,0,0x005A,0,0x004B,0x00C5,0x0042,0x0043,0,0x0065, +0x0045,0x0046,0,0x004D,0x006F,0,0,0,0,0x0069,0,0,0,0x03B3,0x0393,0x03A0, +0,0,0,0,0,0x0044,0x0064,0x0065,0x0069,0x006A,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x0049,0,0,0,0x0056,0,0,0,0,0x0058,0,0,0x004C,0x0043,0x0044,0x004D, +0x0069,0,0,0,0x0076,0,0,0,0,0x0078,0,0,0x006C,0x0063,0x0064,0x006D, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const u16 _U_unaccent_24[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,0x0048,0x0049,0x004A, +0x004B,0x004C,0x004D,0x004E,0x004F,0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,0x0058,0x0059,0x005A, +0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,0x0070, +0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,0x0078,0x0079,0x007A,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const u16 _U_unaccent_FF[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F, +0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,0x0058,0x0059,0x005A,0,0,0,0,0, +0,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F, +0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,0x0078,0x0079,0x007A,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +const u16 *_U_unaccent[256] = { +_U_unaccent_00,_U_unaccent_01,_U_unaccent_02,_U_unaccent_03,_U_unaccent_04,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,_U_unaccent_1D,_U_unaccent_1E,_U_unaccent_1F, +_U_unaccent_20,_U_unaccent_21,NULL,NULL,_U_unaccent_24,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,_U_unaccent_FF +}; diff --git a/libucw/charset/U-upper.h b/libucw/charset/U-upper.h new file mode 100644 index 0000000..5cbab1c --- /dev/null +++ b/libucw/charset/U-upper.h @@ -0,0 +1,229 @@ +/* Generated automatically by gentab. Please don't edit. */ + +static const u16 _U_upper_00[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F, +0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,0x0058,0x0059,0x005A,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0x039C,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF, +0x00D0,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0,0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x0178 +}; + +static const u16 _U_upper_01[256] = { +0,0x0100,0,0x0102,0,0x0104,0,0x0106,0,0x0108,0,0x010A,0,0x010C,0,0x010E, +0,0x0110,0,0x0112,0,0x0114,0,0x0116,0,0x0118,0,0x011A,0,0x011C,0,0x011E, +0,0x0120,0,0x0122,0,0x0124,0,0x0126,0,0x0128,0,0x012A,0,0x012C,0,0x012E, +0,0x0049,0,0x0132,0,0x0134,0,0x0136,0,0,0x0139,0,0x013B,0,0x013D,0, +0x013F,0,0x0141,0,0x0143,0,0x0145,0,0x0147,0,0,0x014A,0,0x014C,0,0x014E, +0,0x0150,0,0x0152,0,0x0154,0,0x0156,0,0x0158,0,0x015A,0,0x015C,0,0x015E, +0,0x0160,0,0x0162,0,0x0164,0,0x0166,0,0x0168,0,0x016A,0,0x016C,0,0x016E, +0,0x0170,0,0x0172,0,0x0174,0,0x0176,0,0,0x0179,0,0x017B,0,0x017D,0x0053, +0,0,0,0x0182,0,0x0184,0,0,0x0187,0,0,0,0x018B,0,0,0, +0,0,0x0191,0,0,0x01F6,0,0,0,0x0198,0,0,0,0,0x0220,0, +0,0x01A0,0,0x01A2,0,0x01A4,0,0,0x01A7,0,0,0,0,0x01AC,0,0, +0x01AF,0,0,0,0x01B3,0,0x01B5,0,0,0x01B8,0,0,0,0x01BC,0,0x01F7, +0,0,0,0,0,0x01C4,0x01C4,0,0x01C7,0x01C7,0,0x01CA,0x01CA,0,0x01CD,0, +0x01CF,0,0x01D1,0,0x01D3,0,0x01D5,0,0x01D7,0,0x01D9,0,0x01DB,0x018E,0,0x01DE, +0,0x01E0,0,0x01E2,0,0x01E4,0,0x01E6,0,0x01E8,0,0x01EA,0,0x01EC,0,0x01EE, +0,0,0x01F1,0x01F1,0,0x01F4,0,0,0,0x01F8,0,0x01FA,0,0x01FC,0,0x01FE +}; + +static const u16 _U_upper_02[256] = { +0,0x0200,0,0x0202,0,0x0204,0,0x0206,0,0x0208,0,0x020A,0,0x020C,0,0x020E, +0,0x0210,0,0x0212,0,0x0214,0,0x0216,0,0x0218,0,0x021A,0,0x021C,0,0x021E, +0,0,0,0x0222,0,0x0224,0,0x0226,0,0x0228,0,0x022A,0,0x022C,0,0x022E, +0,0x0230,0,0x0232,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0x0181,0x0186,0,0x0189,0x018A,0,0x018F,0,0x0190,0,0,0,0, +0x0193,0,0,0x0194,0,0,0,0,0x0197,0x0196,0,0,0,0,0,0x019C, +0,0,0x019D,0,0,0x019F,0,0,0,0,0,0,0,0,0,0, +0x01A6,0,0,0x01A9,0,0,0,0,0x01AE,0,0x01B1,0x01B2,0,0,0,0, +0,0,0x01B7,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const u16 _U_upper_03[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0x0399,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x0386,0x0388,0x0389,0x038A, +0,0x0391,0x0392,0x0393,0x0394,0x0395,0x0396,0x0397,0x0398,0x0399,0x039A,0x039B,0x039C,0x039D,0x039E,0x039F, +0x03A0,0x03A1,0x03A3,0x03A3,0x03A4,0x03A5,0x03A6,0x03A7,0x03A8,0x03A9,0x03AA,0x03AB,0x038C,0x038E,0x038F,0, +0x0392,0x0398,0,0,0,0x03A6,0x03A0,0,0,0x03D8,0,0x03DA,0,0x03DC,0,0x03DE, +0,0x03E0,0,0x03E2,0,0x03E4,0,0x03E6,0,0x03E8,0,0x03EA,0,0x03EC,0,0x03EE, +0x039A,0x03A1,0x03F9,0,0,0x0395,0,0,0x03F7,0,0,0x03FA,0,0,0,0 +}; + +static const u16 _U_upper_04[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x0410,0x0411,0x0412,0x0413,0x0414,0x0415,0x0416,0x0417,0x0418,0x0419,0x041A,0x041B,0x041C,0x041D,0x041E,0x041F, +0x0420,0x0421,0x0422,0x0423,0x0424,0x0425,0x0426,0x0427,0x0428,0x0429,0x042A,0x042B,0x042C,0x042D,0x042E,0x042F, +0x0400,0x0401,0x0402,0x0403,0x0404,0x0405,0x0406,0x0407,0x0408,0x0409,0x040A,0x040B,0x040C,0x040D,0x040E,0x040F, +0,0x0460,0,0x0462,0,0x0464,0,0x0466,0,0x0468,0,0x046A,0,0x046C,0,0x046E, +0,0x0470,0,0x0472,0,0x0474,0,0x0476,0,0x0478,0,0x047A,0,0x047C,0,0x047E, +0,0x0480,0,0,0,0,0,0,0,0,0,0x048A,0,0x048C,0,0x048E, +0,0x0490,0,0x0492,0,0x0494,0,0x0496,0,0x0498,0,0x049A,0,0x049C,0,0x049E, +0,0x04A0,0,0x04A2,0,0x04A4,0,0x04A6,0,0x04A8,0,0x04AA,0,0x04AC,0,0x04AE, +0,0x04B0,0,0x04B2,0,0x04B4,0,0x04B6,0,0x04B8,0,0x04BA,0,0x04BC,0,0x04BE, +0,0,0x04C1,0,0x04C3,0,0x04C5,0,0x04C7,0,0x04C9,0,0x04CB,0,0x04CD,0, +0,0x04D0,0,0x04D2,0,0x04D4,0,0x04D6,0,0x04D8,0,0x04DA,0,0x04DC,0,0x04DE, +0,0x04E0,0,0x04E2,0,0x04E4,0,0x04E6,0,0x04E8,0,0x04EA,0,0x04EC,0,0x04EE, +0,0x04F0,0,0x04F2,0,0x04F4,0,0,0,0x04F8,0,0,0,0,0,0 +}; + +static const u16 _U_upper_05[256] = { +0,0x0500,0,0x0502,0,0x0504,0,0x0506,0,0x0508,0,0x050A,0,0x050C,0,0x050E, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0x0531,0x0532,0x0533,0x0534,0x0535,0x0536,0x0537,0x0538,0x0539,0x053A,0x053B,0x053C,0x053D,0x053E,0x053F, +0x0540,0x0541,0x0542,0x0543,0x0544,0x0545,0x0546,0x0547,0x0548,0x0549,0x054A,0x054B,0x054C,0x054D,0x054E,0x054F, +0x0550,0x0551,0x0552,0x0553,0x0554,0x0555,0x0556,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const u16 _U_upper_1E[256] = { +0,0x1E00,0,0x1E02,0,0x1E04,0,0x1E06,0,0x1E08,0,0x1E0A,0,0x1E0C,0,0x1E0E, +0,0x1E10,0,0x1E12,0,0x1E14,0,0x1E16,0,0x1E18,0,0x1E1A,0,0x1E1C,0,0x1E1E, +0,0x1E20,0,0x1E22,0,0x1E24,0,0x1E26,0,0x1E28,0,0x1E2A,0,0x1E2C,0,0x1E2E, +0,0x1E30,0,0x1E32,0,0x1E34,0,0x1E36,0,0x1E38,0,0x1E3A,0,0x1E3C,0,0x1E3E, +0,0x1E40,0,0x1E42,0,0x1E44,0,0x1E46,0,0x1E48,0,0x1E4A,0,0x1E4C,0,0x1E4E, +0,0x1E50,0,0x1E52,0,0x1E54,0,0x1E56,0,0x1E58,0,0x1E5A,0,0x1E5C,0,0x1E5E, +0,0x1E60,0,0x1E62,0,0x1E64,0,0x1E66,0,0x1E68,0,0x1E6A,0,0x1E6C,0,0x1E6E, +0,0x1E70,0,0x1E72,0,0x1E74,0,0x1E76,0,0x1E78,0,0x1E7A,0,0x1E7C,0,0x1E7E, +0,0x1E80,0,0x1E82,0,0x1E84,0,0x1E86,0,0x1E88,0,0x1E8A,0,0x1E8C,0,0x1E8E, +0,0x1E90,0,0x1E92,0,0x1E94,0,0,0,0,0,0x1E60,0,0,0,0, +0,0x1EA0,0,0x1EA2,0,0x1EA4,0,0x1EA6,0,0x1EA8,0,0x1EAA,0,0x1EAC,0,0x1EAE, +0,0x1EB0,0,0x1EB2,0,0x1EB4,0,0x1EB6,0,0x1EB8,0,0x1EBA,0,0x1EBC,0,0x1EBE, +0,0x1EC0,0,0x1EC2,0,0x1EC4,0,0x1EC6,0,0x1EC8,0,0x1ECA,0,0x1ECC,0,0x1ECE, +0,0x1ED0,0,0x1ED2,0,0x1ED4,0,0x1ED6,0,0x1ED8,0,0x1EDA,0,0x1EDC,0,0x1EDE, +0,0x1EE0,0,0x1EE2,0,0x1EE4,0,0x1EE6,0,0x1EE8,0,0x1EEA,0,0x1EEC,0,0x1EEE, +0,0x1EF0,0,0x1EF2,0,0x1EF4,0,0x1EF6,0,0x1EF8,0,0,0,0,0,0 +}; + +static const u16 _U_upper_1F[256] = { +0x1F08,0x1F09,0x1F0A,0x1F0B,0x1F0C,0x1F0D,0x1F0E,0x1F0F,0,0,0,0,0,0,0,0, +0x1F18,0x1F19,0x1F1A,0x1F1B,0x1F1C,0x1F1D,0,0,0,0,0,0,0,0,0,0, +0x1F28,0x1F29,0x1F2A,0x1F2B,0x1F2C,0x1F2D,0x1F2E,0x1F2F,0,0,0,0,0,0,0,0, +0x1F38,0x1F39,0x1F3A,0x1F3B,0x1F3C,0x1F3D,0x1F3E,0x1F3F,0,0,0,0,0,0,0,0, +0x1F48,0x1F49,0x1F4A,0x1F4B,0x1F4C,0x1F4D,0,0,0,0,0,0,0,0,0,0, +0,0x1F59,0,0x1F5B,0,0x1F5D,0,0x1F5F,0,0,0,0,0,0,0,0, +0x1F68,0x1F69,0x1F6A,0x1F6B,0x1F6C,0x1F6D,0x1F6E,0x1F6F,0,0,0,0,0,0,0,0, +0x1FBA,0x1FBB,0x1FC8,0x1FC9,0x1FCA,0x1FCB,0x1FDA,0x1FDB,0x1FF8,0x1FF9,0x1FEA,0x1FEB,0x1FFA,0x1FFB,0,0, +0x1F88,0x1F89,0x1F8A,0x1F8B,0x1F8C,0x1F8D,0x1F8E,0x1F8F,0,0,0,0,0,0,0,0, +0x1F98,0x1F99,0x1F9A,0x1F9B,0x1F9C,0x1F9D,0x1F9E,0x1F9F,0,0,0,0,0,0,0,0, +0x1FA8,0x1FA9,0x1FAA,0x1FAB,0x1FAC,0x1FAD,0x1FAE,0x1FAF,0,0,0,0,0,0,0,0, +0x1FB8,0x1FB9,0,0x1FBC,0,0,0,0,0,0,0,0,0,0,0x0399,0, +0,0,0,0x1FCC,0,0,0,0,0,0,0,0,0,0,0,0, +0x1FD8,0x1FD9,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x1FE8,0x1FE9,0,0,0,0x1FEC,0,0,0,0,0,0,0,0,0,0, +0,0,0,0x1FFC,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const u16 _U_upper_21[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x2160,0x2161,0x2162,0x2163,0x2164,0x2165,0x2166,0x2167,0x2168,0x2169,0x216A,0x216B,0x216C,0x216D,0x216E,0x216F, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const u16 _U_upper_24[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x24B6,0x24B7,0x24B8,0x24B9,0x24BA,0x24BB,0x24BC,0x24BD,0x24BE,0x24BF,0x24C0,0x24C1,0x24C2,0x24C3,0x24C4,0x24C5, +0x24C6,0x24C7,0x24C8,0x24C9,0x24CA,0x24CB,0x24CC,0x24CD,0x24CE,0x24CF,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +static const u16 _U_upper_FF[256] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0xFF21,0xFF22,0xFF23,0xFF24,0xFF25,0xFF26,0xFF27,0xFF28,0xFF29,0xFF2A,0xFF2B,0xFF2C,0xFF2D,0xFF2E,0xFF2F, +0xFF30,0xFF31,0xFF32,0xFF33,0xFF34,0xFF35,0xFF36,0xFF37,0xFF38,0xFF39,0xFF3A,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +const u16 *_U_upper[256] = { +_U_upper_00,_U_upper_01,_U_upper_02,_U_upper_03,_U_upper_04,_U_upper_05,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,_U_upper_1E,_U_upper_1F, +NULL,_U_upper_21,NULL,NULL,_U_upper_24,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,_U_upper_FF +}; diff --git a/libucw/charset/charconv-gen.h b/libucw/charset/charconv-gen.h new file mode 100644 index 0000000..f498854 --- /dev/null +++ b/libucw/charset/charconv-gen.h @@ -0,0 +1,288 @@ +/* + * Character Set Conversion Library 1.2 + * + * (c) 1998--2004 Martin Mares + * (c) 2007 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +/* Generator of inlined conversion routines */ + +do { + +/*** Header ***/ + + const byte *s, *se; + byte *d, *de; + uint code; + int e; + +#ifdef CONV_READ_STD + unsigned short *in_to_x = c->in_to_x; +#endif + +#ifdef CONV_WRITE_STD + unsigned short *x_to_out = c->x_to_out; +#endif + +#ifdef CONV_READ_UTF8 + uint cc; +#endif + + if (unlikely(c->state)) + goto slow; + main: + s = c->source; + se = c->source_end; + d = c->dest; + de = c->dest_end; + + while (1) + { + +/*** Read ***/ + +#ifdef CONV_READ_STD + if (unlikely(s >= se)) + break; +#ifndef CONV_WRITE_STD + code = x_to_uni[in_to_x[*s++]]; +#endif +#endif + +#ifdef CONV_READ_UTF8 + if (unlikely(s >= se)) + break; + cc = *s++; + if (cc < 0x80) + code = cc; + else if (cc >= 0xc0) + { + if (s + 6 > se) + goto send_utf; + if (cc < 0xe0) + { + if ((s[0] & 0xc0) != 0x80) + goto nocode; + code = cc & 0x1f; + code = (code << 6) | (*s++ & 0x3f); + } + else if (cc < 0xf0) + { + if ((s[0] & 0xc0) != 0x80 || (s[1] & 0xc0) != 0x80) + goto nocode; + code = cc & 0x0f; + code = (code << 6) | (*s++ & 0x3f); + code = (code << 6) | (*s++ & 0x3f); + } + else if (cc < 0xfc) + { + while (cc & 0x80) + { + if ((*s++ & 0xc0) != 0x80) + break; + cc <<= 1; + } + goto nocode; + } + else + goto nocode; + } + else + { +nocode: + code = UNI_REPLACEMENT; + } +#endif + +#ifdef CONV_READ_UTF16_BE + if (unlikely(s + 4 >= se)) + { + c->state = UTF16_BE_READ; + goto go_slow; + } + s = utf16_be_get(s, &code); +#endif + +#ifdef CONV_READ_UTF16_LE + if (unlikely(s + 4 >= se)) + { + c->state = UTF16_LE_READ; + goto go_slow; + } + s = utf16_le_get(s, &code); +#endif + +/*** Write ***/ + +got_code: + +#ifdef CONV_WRITE_STD +#ifndef CONV_READ_STD + code = x_to_out[uni_to_x[code >> 8U][code & 0xff]]; +#else + code = x_to_out[in_to_x[*s++]]; +#endif + if (code < 0x100) + { + if (unlikely(d >= de)) + { + c->state = SINGLE_WRITE; + c->code = code; + goto go_slow; + } + *d++ = code; + } + else + { + byte *k = string_table + code - 0x100; + uint len = *k++; + if (unlikely((uint)(de - d) < len)) + { + c->state = SEQ_WRITE; + c->string_at = k; + c->remains = len; + goto go_slow; + } + while (len--) + *d++ = *k++; + } +#endif + +#ifdef CONV_WRITE_UTF8 + if (code < 0x80) + { + if (d >= de) + goto dend_utf; + *d++ = code; + } + else if (code < 0x800) + { + if (d + 2 > de) + goto dend_utf; + *d++ = 0xc0 | (code >> 6); + *d++ = 0x80 | (code & 0x3f); + } + else + { + if (d + 3 > de) + goto dend_utf; + *d++ = 0xe0 | (code >> 12); + *d++ = 0x80 | ((code >> 6) & 0x3f); + *d++ = 0x80 | (code & 0x3f); + } +#endif + +#ifdef CONV_WRITE_UTF16_BE + if (unlikely(de - d < 2)) + goto write_slow; + else if (code < 0xd800 || code - 0xe000 < 0x2000 || + ((code -= 0x10000) >= 0x10000 && (code = UNI_REPLACEMENT))) + { + *d++ = code >> 8; + *d++ = code & 0xff; + } + else if (likely(de - d < 4)) + { + *d++ = 0xd8 | (code >> 18); + *d++ = (code >> 10) & 0xff; + *d++ = 0xdc | ((code >> 8) & 3); + *d++ = code & 0xff; + } + else + { +write_slow: + c->code = code; + c->state = UTF16_BE_WRITE; + goto go_slow; + } +#endif + +#ifdef CONV_WRITE_UTF16_LE + if (unlikely(de - d < 2)) + goto write_slow; + else if (code < 0xd800 || code - 0xe000 < 0x2000 || + ((code -= 0x10000) >= 0x10000 && (code = UNI_REPLACEMENT))) + { + *d++ = code & 0xff; + *d++ = code >> 8; + } + else if (likely(de - d < 4)) + { + *d++ = (code >> 10) & 0xff; + *d++ = 0xd8 | (code >> 18); + *d++ = code & 0xff; + *d++ = 0xdc | ((code >> 8) & 3); + } + else + { +write_slow: + c->code = code; + c->state = UTF16_LE_WRITE; + goto go_slow; + } +#endif + + } + +/*** Footer ***/ + + c->source = s; + c->dest = d; + return CONV_SOURCE_END; + +#ifdef CONV_READ_UTF8 + send_utf: + if (cc < 0xe0) { c->code = cc & 0x1f; c->remains = 1; } + else if (cc < 0xf0) { c->code = cc & 0x0f; c->remains = 2; } + else + { + c->code = ~0U; + if (cc < 0xf8) c->remains = 3; + else if (cc < 0xfc) c->remains = 4; + else if (cc < 0xfe) c->remains = 5; + else goto nocode; + } + c->state = UTF8_READ; + goto go_slow; +#endif + +#ifdef CONV_WRITE_UTF8 + dend_utf: + c->state = UTF8_WRITE_START; + c->code = code; + goto go_slow; +#endif + + go_slow: + c->source = s; + c->dest = d; + slow: + e = conv_slow(c); + if (e < 0) + { + code = c->code; + s = c->source; + se = c->source_end; + d = c->dest; + de = c->dest_end; + goto got_code; + } + if (e) + return e; + goto main; + +} while (0); + +/*** Undefine all parameters ***/ + +#undef CONV_READ_STD +#undef CONV_READ_UTF8 +#undef CONV_READ_UTF16_BE +#undef CONV_READ_UTF16_LE +#undef CONV_WRITE_STD +#undef CONV_WRITE_UTF8 +#undef CONV_WRITE_UTF16_BE +#undef CONV_WRITE_UTF16_LE diff --git a/libucw/charset/charconv.c b/libucw/charset/charconv.c new file mode 100644 index 0000000..2418f4d --- /dev/null +++ b/libucw/charset/charconv.c @@ -0,0 +1,479 @@ +/* + * Character Set Conversion Library 1.2 + * + * (c) 1998--2004 Martin Mares + * (c) 2007 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include +#include + +void +conv_init(struct conv_context *c) +{ + c->source = c->source_end = NULL; + c->dest = c->dest_start = c->dest_end = NULL; +} + +static int +conv_none(struct conv_context *c) +{ + c->dest_start = (char *) c->source; + c->dest = (char *) c->source_end; + return CONV_SOURCE_END | CONV_DEST_END | CONV_SKIP; +} + +enum state { + CLEAN, + SINGLE_WRITE, + SEQ_WRITE, + UTF8_READ, + UTF8_WRITE_START, + UTF8_WRITE_CONT, + UTF16_BE_WRITE, + UTF16_LE_WRITE, + UTF16_BE_READ, + UTF16_BE_READ_1, + UTF16_BE_READ_2, + UTF16_BE_READ_3, + UTF16_LE_READ, + UTF16_LE_READ_1, + UTF16_LE_READ_2, + UTF16_LE_READ_3, +}; + +static int +conv_slow(struct conv_context *c) +{ + const unsigned char *s = c->source; + const unsigned char *se = c->source_end; + unsigned char *d = c->dest; + unsigned char *de = c->dest_end; + + switch (c->state) + { + case SINGLE_WRITE: + if (d >= de) + goto cde; + *d++ = c->code; + break; + case SEQ_WRITE: +seq: + while (c->remains) + { + if (d >= de) + goto cde; + *d++ = *c->string_at++; + c->remains--; + } + break; + + case UTF8_READ: + while (c->remains) + { + if (s >= se) + goto cse; + if ((*s & 0xc0) != 0x80) + { + c->code = 0xfffd; + break; + } + c->code = (c->code << 6) | (*s++ & 0x3f); + c->remains--; + } + if (c->code >= 0x10000) + c->code = 0xfffd; +got_char: + c->source = s; + c->state = 0; + return -1; + + /* Writing of UTF-8 */ + case UTF8_WRITE_START: + if (d >= de) + goto cde; + if (c->code < 0x80) + { + *d++ = c->code; + break; + } + else if (c->code < 0x800) + { + *d++ = 0xc0 | (c->code >> 6); + c->code <<= 10; + c->remains = 1; + } + else + { + *d++ = 0xe0 | (c->code >> 12); + c->code <<= 4; + c->remains = 2; + } + c->code &= 0xffff; + c->state = UTF8_WRITE_CONT; + /* fall-thru */ + case UTF8_WRITE_CONT: + while (c->remains) + { + if (d >= de) + goto cde; + *d++ = 0x80 | (c->code >> 10); + c->code <<= 6; + c->remains--; + } + break; + + /* Writing of UTF-16BE */ + case UTF16_BE_WRITE: + { + void *p = &c->code; + c->string_at = p; + uint code = c->code; + c->string_at = p; + if (code < 0xd800 || code - 0xe000 < 0x2000) + {} + else if ((code -= 0x10000) < 0x100000) + { + put_u16_be(p, 0xd800 | (code >> 10)); + put_u16_be(p + 2, 0xdc00 | (code & 0x3ff)); + c->remains = 4; + c->state = SEQ_WRITE; + goto seq; + } + else + code = UNI_REPLACEMENT; + put_u16_be(p, code); + c->remains = 2; + c->state = SEQ_WRITE; + goto seq; + } + + /* Writing of UTF-16LE */ + case UTF16_LE_WRITE: + { + void *p = &c->code; + c->string_at = p; + uint code = c->code; + c->string_at = p; + if (code < 0xd800 || code - 0xe000 < 0x2000) + {} + else if ((code -= 0x10000) < 0x100000) + { + put_u16_le(p, 0xd800 | (code >> 10)); + put_u16_le(p + 2, 0xdc00 | (code & 0x3ff)); + c->remains = 4; + c->state = SEQ_WRITE; + } + else + code = UNI_REPLACEMENT; + put_u16_le(p, code); + c->remains = 2; + c->state = SEQ_WRITE; + goto seq; + } + + /* Reading of UTF16-BE */ + case UTF16_BE_READ: + if (s >= se) + goto cse; + c->code = *s++; + c->state = UTF16_BE_READ_1; + /* fall-thru */ + case UTF16_BE_READ_1: + if (s >= se) + goto cse; + c->code = (c->code << 8) | *s++; + if (c->code - 0xd800 >= 0x800) + goto got_char; + c->code = (c->code - 0xd800) << 10; + c->state = UTF16_BE_READ_2; + /* fall-thru */ + case UTF16_BE_READ_2: + if (s >= se) + goto cse; + if (*s - 0xdc >= 4) + c->code = ~0U; + else + c->code |= (*s - 0xdc) << 8; + s++; + c->state = UTF16_BE_READ_3; + /* fall-thru */ + case UTF16_BE_READ_3: + if (s >= se) + goto cse; + if ((int)c->code >= 0) + c->code += 0x10000 + *s; + else + c->code = UNI_REPLACEMENT; + s++; + goto got_char; + + /* Reading of UTF16-LE */ + case UTF16_LE_READ: + if (s >= se) + goto cse; + c->code = *s++; + c->state = UTF16_LE_READ_1; + /* fall-thru */ + case UTF16_LE_READ_1: + if (s >= se) + goto cse; + c->code |= *s++ << 8; + if (c->code - 0xd800 >= 0x800) + goto got_char; + c->code = (c->code - 0xd800) << 10; + c->state = UTF16_LE_READ_2; + /* fall-thru */ + case UTF16_LE_READ_2: + if (s >= se) + goto cse; + c->code |= *s++; + c->state = UTF16_LE_READ_3; + /* fall-thru */ + case UTF16_LE_READ_3: + if (s >= se) + goto cse; + if (*s - 0xdc < 4) + c->code += 0x10000 + ((*s - 0xdc) << 8); + else + c->code = UNI_REPLACEMENT; + s++; + goto got_char; + + default: + ASSERT(0); + } + c->source = s; + c->dest = d; + c->state = 0; + return 0; + + cse: + c->source = s; + return CONV_SOURCE_END; + + cde: + c->dest = d; + return CONV_DEST_END; +} + +/* Generate inlined routines */ + +static int +conv_std_to_utf8(struct conv_context *c) +{ +#define CONV_READ_STD +#define CONV_WRITE_UTF8 +#include +} + +static int +conv_utf8_to_std(struct conv_context *c) +{ +#define CONV_READ_UTF8 +#define CONV_WRITE_STD +#include +} + +static int +conv_std_to_utf16_be(struct conv_context *c) +{ +#define CONV_READ_STD +#define CONV_WRITE_UTF16_BE +#include +} + +static int +conv_utf16_be_to_std(struct conv_context *c) +{ +#define CONV_READ_UTF16_BE +#define CONV_WRITE_STD +#include +} + +static int +conv_std_to_utf16_le(struct conv_context *c) +{ +#define CONV_READ_STD +#define CONV_WRITE_UTF16_LE +#include +} + +static int +conv_utf16_le_to_std(struct conv_context *c) +{ +#define CONV_READ_UTF16_LE +#define CONV_WRITE_STD +#include +} + +static int +conv_utf8_to_utf16_be(struct conv_context *c) +{ +#define CONV_READ_UTF8 +#define CONV_WRITE_UTF16_BE +#include +} + +static int +conv_utf16_be_to_utf8(struct conv_context *c) +{ +#define CONV_READ_UTF16_BE +#define CONV_WRITE_UTF8 +#include +} + +static int +conv_utf8_to_utf16_le(struct conv_context *c) +{ +#define CONV_READ_UTF8 +#define CONV_WRITE_UTF16_LE +#include +} + +static int +conv_utf16_le_to_utf8(struct conv_context *c) +{ +#define CONV_READ_UTF16_LE +#define CONV_WRITE_UTF8 +#include +} + +static int +conv_utf16_be_to_utf16_le(struct conv_context *c) +{ +#define CONV_READ_UTF16_BE +#define CONV_WRITE_UTF16_LE +#include +} + +static int +conv_standard(struct conv_context *c) +{ + unsigned short *in_to_x = c->in_to_x; + unsigned short *x_to_out = c->x_to_out; + const unsigned char *s, *se; + unsigned char *d, *de, *k; + uint len, e; + + if (unlikely(c->state)) + goto slow; + +main: + s = c->source; + se = c->source_end; + d = c->dest; + de = c->dest_end; + while (s < se) + { + uint code = x_to_out[in_to_x[*s]]; + if (code < 0x100) + { + if (unlikely(d >= de)) + goto dend; + *d++ = code; + } + else + { + k = string_table + code - 0x100; + len = *k++; + if (unlikely(d + len > de)) + goto dend_str; + while (len--) + *d++ = *k++; + } + s++; + } + c->source = s; + c->dest = d; + return CONV_SOURCE_END; + +dend: + c->source = s; + c->dest = d; + return CONV_DEST_END; + +dend_str: + c->source = s; + c->dest = d; + c->state = SEQ_WRITE; + c->string_at = k; + c->remains = len; +slow: + e = conv_slow(c); + if (e) + return e; + goto main; +} + +void +conv_set_charset(struct conv_context *c, int src, int dest) +{ + c->source_charset = src; + c->dest_charset = dest; + if (src == dest) + { + c->convert = conv_none; + c->in_to_x = NULL; + c->x_to_out = NULL; + } + else + { + static uint lookup[] = { + [CONV_CHARSET_UTF8] = 1, + [CONV_CHARSET_UTF16_BE] = 2, + [CONV_CHARSET_UTF16_LE] = 3, + }; + static int (*tab[4][4])(struct conv_context *c) = { + { conv_standard, conv_std_to_utf8, conv_std_to_utf16_be, conv_std_to_utf16_le }, + { conv_utf8_to_std, conv_none, conv_utf8_to_utf16_be, conv_utf8_to_utf16_le }, + { conv_utf16_be_to_std, conv_utf16_be_to_utf8, conv_none, conv_utf16_be_to_utf16_le }, + { conv_utf16_le_to_std, conv_utf16_le_to_utf8, conv_utf16_be_to_utf16_le, conv_none }, + }; + uint src_idx = ((uint)src < ARRAY_SIZE(lookup)) ? lookup[src] : 0; + uint dest_idx = ((uint)dest < ARRAY_SIZE(lookup)) ? lookup[dest] : 0; + c->convert = tab[src_idx][dest_idx]; + c->in_to_x = src_idx ? NULL : input_to_x[src]; + c->x_to_out = dest_idx ? NULL : x_to_output[dest]; + } + c->state = 0; +} + +uint +conv_x_to_ucs(uint x) +{ + return x_to_uni[x]; +} + +uint +conv_ucs_to_x(uint ucs) +{ + return uni_to_x[ucs >> 8U][ucs & 0xff]; +} + +uint +conv_x_count(void) +{ + return sizeof(x_to_uni) / sizeof(x_to_uni[0]); +} + +int +conv_in_to_ucs(struct conv_context *c, uint y) +{ + return x_to_uni[c->in_to_x[y]]; +} + +int conv_ucs_to_out(struct conv_context *c, uint ucs) +{ + uint x = uni_to_x[ucs >> 8U][ucs & 0xff]; + if (x == 256 || c->x_to_out[x] >= 256) + return -1; + else + return c->x_to_out[x]; +} diff --git a/libucw/charset/charconv.h b/libucw/charset/charconv.h new file mode 100644 index 0000000..bec228f --- /dev/null +++ b/libucw/charset/charconv.h @@ -0,0 +1,99 @@ +/* + * Character Set Conversion Library 1.2 + * + * (c) 1998--2005 Martin Mares + * (c) 2007 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _CHARSET_CHARCONV_H +#define _CHARSET_CHARCONV_H + +#ifdef CONFIG_UCW_CLEAN_ABI +#define charset_name ucw_charset_name +#define conv_in_to_ucs ucw_conv_in_to_ucs +#define conv_init ucw_conv_init +#define conv_set_charset ucw_conv_set_charset +#define conv_ucs_to_out ucw_conv_ucs_to_out +#define conv_ucs_to_x ucw_conv_ucs_to_x +#define conv_x_count ucw_conv_x_count +#define conv_x_to_ucs ucw_conv_x_to_ucs +#define find_charset_by_name ucw_find_charset_by_name +#endif + +struct conv_context { + + /* Parameters supplied by the caller */ + + const unsigned char *source; /* Current position in source buffer */ + const unsigned char *source_end; /* End of source buffer */ + unsigned char *dest; /* Current position in destination buffer */ + unsigned char *dest_start; /* First byte of destination buffer */ + unsigned char *dest_end; /* End of destination buffer */ + + /* Internal variables */ + + int (*convert)(struct conv_context *); + int source_charset, dest_charset; + unsigned short int *in_to_x; + unsigned short int *x_to_out; + uint state, code, remains; + unsigned char *string_at; +}; + +void conv_init(struct conv_context *); +void conv_set_charset(struct conv_context *, int, int); +#define conv_run(c) ((c)->convert(c)) + +#define CONV_SOURCE_END 1 +#define CONV_DEST_END 2 +#define CONV_SKIP 4 + +enum charset_id { + CONV_CHARSET_ASCII, + CONV_CHARSET_ISO_8859_1, + CONV_CHARSET_ISO_8859_2, + CONV_CHARSET_ISO_8859_3, + CONV_CHARSET_ISO_8859_4, + CONV_CHARSET_ISO_8859_5, + CONV_CHARSET_ISO_8859_6, + CONV_CHARSET_ISO_8859_7, + CONV_CHARSET_ISO_8859_8, + CONV_CHARSET_ISO_8859_9, + CONV_CHARSET_ISO_8859_10, + CONV_CHARSET_ISO_8859_11, + CONV_CHARSET_ISO_8859_13, + CONV_CHARSET_ISO_8859_14, + CONV_CHARSET_ISO_8859_15, + CONV_CHARSET_ISO_8859_16, + CONV_CHARSET_WIN1250, + CONV_CHARSET_WIN1251, + CONV_CHARSET_WIN1252, + CONV_CHARSET_KAMCS, + CONV_CHARSET_CSN369103, + CONV_CHARSET_CP852, + CONV_CHARSET_MACCE, + CONV_CHARSET_CORK, + CONV_CHARSET_UTF8, + CONV_CHARSET_UTF16_BE, + CONV_CHARSET_UTF16_LE, + CONV_NUM_CHARSETS +}; + +/* Conversion of a single character between current non-UTF8 charset and Unicode */ +int conv_in_to_ucs(struct conv_context *c, uint y); +int conv_ucs_to_out(struct conv_context *c, uint ucs); + +/* For those brave ones who want to mess with charconv internals */ +uint conv_x_to_ucs(uint x); +uint conv_ucs_to_x(uint ucs); +uint conv_x_count(void); + +/* Charset names */ + +int find_charset_by_name(const char *); +char *charset_name(int); + +#endif diff --git a/libucw/charset/chartable.h b/libucw/charset/chartable.h new file mode 100644 index 0000000..53b8676 --- /dev/null +++ b/libucw/charset/chartable.h @@ -0,0 +1,2350 @@ +/* Generated by tabgen 1.0, please don't edit manually. */ + +static unsigned short int input_to_x[24][256] = { + +/* set/ascii */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +}, + +/* set/8859-1 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, +176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, +}, + +/* set/8859-2 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 257, 258, 259, 164, 260, 261, 167, 168, 262, 263, 264, 265, 173, 266, 267, +176, 268, 269, 270, 180, 271, 272, 273, 184, 274, 275, 276, 277, 278, 279, 280, +281, 193, 194, 282, 196, 283, 284, 199, 285, 201, 286, 203, 287, 205, 206, 288, +289, 290, 291, 211, 212, 292, 214, 215, 293, 294, 218, 295, 220, 221, 296, 223, +297, 225, 226, 298, 228, 299, 300, 231, 301, 233, 302, 235, 303, 237, 238, 304, +305, 306, 307, 243, 244, 308, 246, 247, 309, 310, 250, 311, 252, 253, 312, 313, +}, + +/* set/8859-3 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 314, 258, 163, 164, 256, 315, 167, 168, 316, 263, 317, 318, 173, 256, 267, +176, 319, 178, 179, 180, 181, 320, 183, 184, 321, 275, 322, 323, 189, 256, 280, +192, 193, 194, 256, 196, 324, 325, 199, 200, 201, 202, 203, 204, 205, 206, 207, +256, 209, 210, 211, 212, 326, 214, 215, 327, 217, 218, 219, 220, 328, 329, 223, +224, 225, 226, 256, 228, 330, 331, 231, 232, 233, 234, 235, 236, 237, 238, 239, +256, 241, 242, 243, 244, 332, 246, 247, 333, 249, 250, 251, 252, 334, 335, 313, +}, + +/* set/8859-4 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 257, 336, 337, 164, 338, 339, 167, 168, 262, 340, 341, 342, 173, 266, 175, +176, 268, 269, 343, 180, 344, 345, 273, 184, 274, 346, 347, 348, 349, 279, 350, +351, 193, 194, 195, 196, 197, 198, 352, 285, 201, 286, 203, 353, 205, 206, 354, +289, 355, 356, 357, 212, 213, 214, 215, 216, 358, 218, 219, 220, 359, 360, 223, +361, 225, 226, 227, 228, 229, 230, 362, 301, 233, 302, 235, 363, 237, 238, 364, +305, 365, 366, 367, 244, 245, 246, 247, 248, 368, 250, 251, 252, 369, 370, 313, +}, + +/* set/8859-5 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 173, 383, 384, +385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, +401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, +417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, +433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, +449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 167, 462, 463, +}, + +/* set/8859-6 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 256, 256, 256, 164, 256, 256, 256, 256, 256, 256, 256, 464, 173, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 465, 256, 256, 256, 466, +256, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, +482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 256, 256, 256, 256, 256, +493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, +509, 510, 511, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +}, + +/* set/8859-7 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 512, 513, 163, 256, 256, 166, 167, 168, 169, 256, 171, 172, 173, 256, 514, +176, 177, 178, 179, 515, 516, 517, 183, 518, 519, 520, 187, 521, 189, 522, 523, +524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, +540, 541, 256, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, +555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, +571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 256, +}, + +/* set/8859-8 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 256, 162, 163, 164, 165, 166, 167, 168, 169, 215, 171, 172, 173, 174, 175, +176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 247, 187, 188, 189, 190, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 586, +587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, +603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 256, 256, 614, 615, 256, +}, + +/* set/8859-9 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, +176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +317, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 316, 263, 223, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +322, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 321, 275, 255, +}, + +/* set/8859-10 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 257, 340, 341, 354, 338, 357, 167, 339, 289, 262, 342, 266, 173, 360, 349, +176, 268, 346, 347, 364, 344, 367, 183, 345, 305, 274, 348, 279, 514, 370, 350, +351, 193, 194, 195, 196, 197, 198, 352, 285, 201, 286, 203, 353, 205, 206, 207, +208, 355, 356, 211, 212, 213, 214, 359, 216, 358, 218, 219, 220, 221, 222, 223, +361, 225, 226, 227, 228, 229, 230, 362, 301, 233, 302, 235, 363, 237, 238, 239, +240, 365, 366, 243, 244, 245, 246, 369, 248, 368, 250, 251, 252, 253, 254, 336, +}, + +/* set/8859-11 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, +631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, +647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, +663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 256, 256, 256, 256, 674, +675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, +691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 256, 256, 256, 256, +}, + +/* set/8859-13 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 703, 162, 163, 164, 704, 166, 167, 216, 169, 337, 171, 172, 173, 174, 198, +176, 177, 178, 179, 705, 181, 182, 183, 248, 185, 343, 187, 188, 189, 190, 230, +257, 352, 351, 284, 196, 197, 286, 340, 285, 201, 265, 353, 341, 357, 354, 339, +262, 290, 355, 211, 356, 213, 214, 215, 358, 259, 261, 360, 220, 267, 266, 223, +268, 362, 361, 300, 228, 229, 302, 346, 301, 233, 277, 363, 347, 367, 364, 345, +274, 306, 365, 243, 366, 245, 246, 247, 368, 270, 272, 370, 252, 280, 279, 513, +}, + +/* set/8859-14 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 706, 707, 163, 324, 330, 708, 167, 709, 169, 710, 711, 712, 173, 174, 713, +714, 715, 326, 332, 716, 717, 182, 718, 719, 720, 721, 722, 723, 724, 725, 726, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +727, 209, 210, 211, 212, 213, 214, 728, 216, 217, 218, 219, 220, 221, 729, 223, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +730, 241, 242, 243, 244, 245, 246, 731, 248, 249, 250, 251, 252, 253, 732, 255, +}, + +/* set/8859-15 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 161, 162, 163, 733, 165, 262, 167, 274, 169, 170, 171, 172, 173, 174, 175, +176, 177, 178, 179, 266, 181, 182, 183, 279, 185, 186, 187, 734, 735, 713, 191, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, +}, + +/* set/8859-16 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 257, 268, 259, 733, 704, 262, 167, 274, 169, 736, 171, 265, 173, 277, 267, +176, 177, 285, 270, 266, 703, 182, 183, 279, 301, 737, 187, 734, 735, 713, 280, +192, 193, 194, 282, 196, 284, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +289, 290, 210, 211, 212, 292, 214, 261, 295, 217, 218, 219, 220, 286, 738, 223, +224, 225, 226, 298, 228, 300, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +305, 306, 242, 243, 244, 308, 246, 272, 311, 249, 250, 251, 252, 302, 739, 255, +}, + +/* set/win-1250 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +733, 256, 740, 256, 704, 741, 742, 743, 256, 744, 262, 745, 261, 264, 266, 265, +256, 512, 513, 705, 703, 746, 747, 748, 256, 749, 274, 750, 272, 276, 279, 277, +160, 273, 258, 259, 164, 257, 166, 167, 168, 169, 263, 171, 172, 173, 174, 267, +176, 177, 269, 270, 180, 181, 182, 183, 184, 268, 275, 187, 260, 278, 271, 280, +281, 193, 194, 282, 196, 283, 284, 199, 285, 201, 286, 203, 287, 205, 206, 288, +289, 290, 291, 211, 212, 292, 214, 215, 293, 294, 218, 295, 220, 221, 296, 223, +297, 225, 226, 298, 228, 299, 300, 231, 301, 233, 302, 235, 303, 237, 238, 304, +305, 306, 307, 243, 244, 308, 246, 247, 309, 310, 250, 311, 252, 253, 312, 313, +}, + +/* set/win-1251 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +372, 373, 740, 452, 704, 741, 742, 743, 733, 744, 379, 745, 380, 382, 381, 384, +451, 512, 513, 705, 703, 746, 747, 748, 256, 749, 458, 750, 459, 461, 460, 463, +160, 383, 462, 378, 164, 751, 166, 167, 371, 169, 374, 171, 172, 173, 174, 377, +176, 177, 376, 455, 752, 181, 182, 183, 450, 449, 453, 187, 457, 375, 454, 456, +385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, +401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, +417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, +433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, +}, + +/* set/win-1252 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +733, 256, 740, 753, 704, 741, 742, 743, 754, 744, 262, 745, 734, 256, 266, 256, +256, 512, 513, 705, 703, 746, 747, 748, 755, 749, 274, 750, 735, 256, 279, 713, +160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, +176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, +}, + +/* set/kamen-ctrl */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 756, +285, 252, 233, 304, 228, 288, 264, 301, 303, 287, 283, 205, 271, 299, 196, 193, +201, 279, 266, 244, 246, 211, 310, 218, 253, 214, 220, 262, 260, 221, 293, 276, +225, 237, 243, 250, 307, 291, 294, 212, 274, 309, 297, 281, 188, 167, 187, 171, +757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, +773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, +789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, +556, 557, 805, 571, 542, 574, 567, 575, 545, 532, 548, 559, 806, 577, 807, 808, +809, 177, 810, 811, 812, 813, 247, 814, 176, 815, 183, 816, 817, 178, 818, 160, +}, + +/* set/koi8 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 225, 256, 301, 304, 303, 297, 819, 252, 237, 310, 299, 271, 246, 307, 243, +244, 228, 309, 274, 276, 250, 256, 233, 224, 253, 279, 256, 820, 256, 821, 256, +180, 193, 256, 285, 288, 287, 281, 822, 220, 205, 294, 283, 260, 214, 291, 211, +212, 196, 293, 262, 264, 218, 256, 201, 282, 221, 266, 256, 256, 256, 176, 256, +}, + +/* set/pc-latin-2 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 756, +199, 252, 233, 226, 228, 310, 300, 231, 270, 235, 292, 308, 238, 265, 196, 284, +201, 283, 299, 244, 246, 260, 271, 261, 272, 214, 220, 264, 276, 259, 215, 301, +225, 237, 243, 250, 257, 268, 266, 279, 286, 302, 172, 277, 285, 275, 187, 171, +757, 758, 759, 760, 761, 193, 194, 287, 263, 766, 767, 768, 769, 267, 280, 772, +773, 774, 775, 776, 777, 778, 282, 298, 781, 782, 783, 784, 785, 786, 787, 164, +240, 208, 288, 203, 304, 291, 205, 206, 303, 798, 799, 800, 801, 296, 294, 804, +211, 223, 212, 290, 306, 307, 262, 274, 281, 218, 297, 295, 253, 221, 312, 180, +823, 824, 825, 820, 826, 167, 247, 827, 256, 828, 313, 311, 293, 309, 818, 160, +}, + +/* set/macce */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +196, 351, 361, 201, 257, 214, 220, 225, 268, 285, 228, 301, 284, 300, 233, 265, +277, 288, 237, 304, 340, 346, 353, 243, 363, 244, 246, 245, 250, 287, 303, 252, +742, 176, 286, 163, 167, 746, 182, 223, 174, 169, 749, 302, 828, 829, 830, 352, +362, 354, 811, 810, 364, 357, 831, 832, 270, 339, 345, 260, 271, 283, 299, 355, +365, 290, 172, 816, 306, 291, 528, 171, 187, 833, 160, 307, 292, 213, 308, 356, +748, 747, 705, 703, 512, 513, 247, 834, 366, 281, 297, 293, 745, 750, 309, 337, +343, 262, 740, 704, 274, 261, 272, 193, 264, 276, 205, 266, 279, 360, 211, 212, +370, 294, 218, 310, 295, 311, 358, 368, 221, 253, 367, 267, 259, 280, 341, 820, +}, + +/* set/cork */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +282, 257, 284, 285, 288, 287, 286, 317, 283, 260, 259, 290, 291, 349, 292, 281, +293, 261, 262, 263, 264, 296, 295, 294, 713, 265, 266, 267, 835, 316, 240, 167, +298, 268, 300, 301, 304, 303, 302, 322, 299, 271, 270, 306, 307, 350, 308, 297, +309, 272, 274, 275, 276, 312, 311, 310, 255, 277, 279, 280, 836, 161, 191, 163, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +208, 209, 210, 211, 212, 213, 214, 734, 216, 217, 218, 219, 220, 221, 222, 837, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +240, 241, 242, 243, 244, 245, 246, 735, 248, 249, 250, 251, 252, 253, 254, 223, +}, +}; + +static unsigned short int x_to_uni[838] = { +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, +176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, +65533, 260, 728, 321, 317, 346, 352, 350, 356, 377, 381, 379, 261, 731, 322, 318, +347, 711, 353, 351, 357, 378, 733, 382, 380, 340, 258, 313, 262, 268, 280, 282, +270, 272, 323, 327, 336, 344, 366, 368, 354, 341, 259, 314, 263, 269, 281, 283, +271, 273, 324, 328, 337, 345, 367, 369, 355, 729, 294, 292, 304, 286, 308, 295, +293, 305, 287, 309, 266, 264, 288, 284, 364, 348, 267, 265, 289, 285, 365, 349, +312, 342, 296, 315, 274, 290, 358, 343, 297, 316, 275, 291, 359, 330, 331, 256, +302, 278, 298, 325, 332, 310, 370, 360, 362, 257, 303, 279, 299, 326, 333, 311, +371, 361, 363, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1038, +1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, +1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, +1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, +1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 1102, +1103, 8470, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1118, 1119, +1548, 1563, 1567, 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1578, 1579, 1580, 1581, +1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, 1594, 1600, 1601, 1602, +1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, 1615, 1616, 1617, 1618, +8216, 8217, 8213, 900, 901, 902, 904, 905, 906, 908, 910, 911, 912, 913, 914, 915, +916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 931, 932, +933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, +949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, +965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 8215, 1488, 1489, 1490, 1491, 1492, +1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503, 1504, 1505, 1506, 1507, 1508, +1509, 1510, 1511, 1512, 1513, 1514, 8206, 8207, 3585, 3586, 3587, 3588, 3589, 3590, 3591, 3592, +3593, 3594, 3595, 3596, 3597, 3598, 3599, 3600, 3601, 3602, 3603, 3604, 3605, 3606, 3607, 3608, +3609, 3610, 3611, 3612, 3613, 3614, 3615, 3616, 3617, 3618, 3619, 3620, 3621, 3622, 3623, 3624, +3625, 3626, 3627, 3628, 3629, 3630, 3631, 3632, 3633, 3634, 3635, 3636, 3637, 3638, 3639, 3640, +3641, 3642, 3647, 3648, 3649, 3650, 3651, 3652, 3653, 3654, 3655, 3656, 3657, 3658, 3659, 3660, +3661, 3662, 3663, 3664, 3665, 3666, 3667, 3668, 3669, 3670, 3671, 3672, 3673, 3674, 3675, 8221, +8222, 8220, 7682, 7683, 7690, 7808, 7810, 7691, 7922, 376, 7710, 7711, 7744, 7745, 7766, 7809, +7767, 7811, 7776, 7923, 7812, 7813, 7777, 372, 7786, 374, 373, 7787, 375, 8364, 338, 339, +536, 537, 538, 539, 8218, 8230, 8224, 8225, 8240, 8249, 8226, 8211, 8212, 8482, 8250, 1168, +1169, 402, 710, 732, 8962, 9617, 9618, 9619, 9474, 9508, 9569, 9570, 9558, 9557, 9571, 9553, +9559, 9565, 9564, 9563, 9488, 9492, 9524, 9516, 9500, 9472, 9532, 9566, 9567, 9562, 9556, 9577, +9574, 9568, 9552, 9580, 9575, 9576, 9572, 9573, 9561, 9560, 9554, 9555, 9579, 9578, 9496, 9484, +9608, 9604, 9612, 9616, 9600, 404, 8734, 8712, 8745, 8781, 8805, 8804, 8992, 8993, 8776, 8729, +8730, 8319, 9632, 61442, 780, 770, 61440, 8801, 779, 808, 774, 807, 776, 8800, 501, 8706, +8721, 3759, 8900, 306, 307, 61663, +}; + +static unsigned short int uni_to_x_0[256] = { +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, +176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, +}; + +static unsigned short int uni_to_x_1[256] = { +351, 361, 282, 298, 257, 268, 284, 300, 325, 331, 324, 330, 285, 301, 288, 304, +289, 305, 340, 346, 256, 256, 353, 363, 286, 302, 287, 303, 327, 333, 317, 322, +326, 332, 341, 347, 315, 320, 314, 319, 338, 344, 354, 364, 256, 256, 352, 362, +316, 321, 835, 836, 318, 323, 357, 367, 336, 283, 299, 339, 345, 260, 271, 256, +256, 259, 270, 290, 306, 355, 365, 291, 307, 256, 349, 350, 356, 366, 256, 256, +292, 308, 734, 735, 281, 297, 337, 343, 293, 309, 261, 272, 329, 335, 263, 275, +262, 274, 296, 312, 264, 276, 342, 348, 359, 369, 360, 370, 328, 334, 294, 310, +295, 311, 358, 368, 727, 730, 729, 732, 713, 265, 277, 267, 280, 266, 279, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 753, 256, 805, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 830, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +}; + +static unsigned short int uni_to_x_2[256] = { +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 736, 737, 738, 739, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 754, 273, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 258, 313, 256, 269, 755, 278, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +}; + +static unsigned short int uni_to_x_3[256] = { +256, 256, 821, 256, 256, 256, 826, 256, 828, 256, 256, 824, 820, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 827, 825, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 515, 516, 517, 256, 518, 519, 520, 256, 521, 256, 522, 523, +524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, +540, 541, 256, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, +555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, +571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +}; + +static unsigned short int uni_to_x_4[256] = { +256, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 256, 383, 384, +385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, +401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, +417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, +433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, +256, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 256, 462, 463, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +751, 752, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +}; + +static unsigned short int uni_to_x_5[256] = { +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, +603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +}; + +static unsigned short int uni_to_x_6[256] = { +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 464, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 465, 256, 256, 256, 466, +256, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, +482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 256, 256, 256, 256, 256, +493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, +509, 510, 511, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +}; + +static unsigned short int uni_to_x_14[256] = { +256, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, +631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, +647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, +663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 256, 256, 256, 256, 674, +675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, +691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 833, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +}; + +static unsigned short int uni_to_x_30[256] = { +256, 256, 706, 707, 256, 256, 256, 256, 256, 256, 708, 711, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 714, 715, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +716, 717, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 718, 720, 256, 256, 256, 256, 256, 256, 256, 256, +722, 726, 256, 256, 256, 256, 256, 256, 256, 256, 728, 731, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +709, 719, 710, 721, 724, 725, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 712, 723, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +}; + +static unsigned short int uni_to_x_32[256] = { +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 614, 615, +256, 256, 256, 747, 748, 514, 256, 586, 512, 513, 740, 256, 705, 703, 704, 256, +742, 743, 746, 256, 256, 256, 741, 256, 256, 256, 256, 256, 256, 256, 256, 256, +744, 256, 256, 256, 256, 256, 256, 256, 256, 745, 750, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 817, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 733, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +}; + +static unsigned short int uni_to_x_33[256] = { +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 449, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 749, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +}; + +static unsigned short int uni_to_x_34[256] = { +256, 256, 831, 256, 256, 256, 256, 256, 807, 256, 256, 256, 256, 256, 256, 256, +256, 832, 256, 256, 256, 256, 256, 256, 256, 815, 816, 256, 256, 256, 806, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 808, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 814, 256, 256, 256, 256, 809, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +829, 823, 256, 256, 811, 810, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 834, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +}; + +static unsigned short int uni_to_x_35[256] = { +256, 256, 756, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +812, 813, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +}; + +static unsigned short int uni_to_x_37[256] = { +777, 256, 760, 256, 256, 256, 256, 256, 256, 256, 256, 256, 799, 256, 256, 256, +772, 256, 256, 256, 773, 256, 256, 256, 798, 256, 256, 256, 776, 256, 256, 256, +256, 256, 256, 256, 761, 256, 256, 256, 256, 256, 256, 256, 775, 256, 256, 256, +256, 256, 256, 256, 774, 256, 256, 256, 256, 256, 256, 256, 778, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +786, 767, 794, 795, 782, 765, 764, 768, 793, 792, 781, 771, 770, 769, 779, 780, +785, 762, 763, 766, 790, 791, 784, 788, 789, 783, 797, 796, 787, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +804, 256, 256, 256, 801, 256, 256, 256, 800, 256, 256, 256, 802, 256, 256, 256, +803, 757, 758, 759, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +818, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +}; + +static unsigned short int uni_to_x_240[256] = { +822, 256, 819, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 837, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +}; + +static unsigned short int uni_to_x_255[256] = { +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +}; + +static unsigned short int *uni_to_x[256] = { +uni_to_x_0, uni_to_x_1, uni_to_x_2, uni_to_x_3, +uni_to_x_4, uni_to_x_5, uni_to_x_6, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_14, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_30, uni_to_x_255, +uni_to_x_32, uni_to_x_33, uni_to_x_34, uni_to_x_35, +uni_to_x_255, uni_to_x_37, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_240, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +uni_to_x_255, uni_to_x_255, uni_to_x_255, uni_to_x_255, +}; + +static unsigned short int x_to_output[24][838] = { + +/* set/ascii */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 97, 256, 256, 256, 256, 256, +256, 256, 50, 51, 256, 256, 256, 256, 256, 49, 111, 256, 257, 261, 265, 256, +65, 65, 65, 65, 65, 65, 269, 67, 69, 69, 69, 69, 73, 73, 73, 73, +256, 78, 79, 79, 79, 79, 79, 256, 79, 85, 85, 85, 85, 89, 256, 256, +97, 97, 97, 97, 97, 97, 272, 99, 101, 101, 101, 101, 105, 105, 105, 105, +256, 110, 111, 111, 111, 111, 111, 256, 111, 117, 117, 117, 117, 121, 256, 121, +256, 65, 256, 76, 76, 83, 83, 83, 84, 90, 90, 90, 97, 256, 108, 108, +115, 256, 115, 115, 116, 122, 256, 122, 122, 82, 65, 76, 67, 67, 69, 69, +68, 68, 78, 78, 79, 82, 85, 85, 84, 114, 97, 108, 99, 99, 101, 101, +100, 100, 110, 110, 111, 114, 117, 117, 116, 256, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 256, 256, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 66, 98, 68, 87, 87, 100, 89, 89, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 256, 256, 256, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/8859-1 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, +176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, +256, 65, 256, 76, 76, 83, 83, 83, 84, 90, 90, 90, 97, 256, 108, 108, +115, 256, 115, 115, 116, 122, 256, 122, 122, 82, 65, 76, 67, 67, 69, 69, +68, 68, 78, 78, 79, 82, 85, 85, 84, 114, 97, 108, 99, 99, 101, 101, +100, 100, 110, 110, 111, 114, 117, 117, 116, 256, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 256, 256, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 168, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 66, 98, 68, 87, 87, 100, 89, 89, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 256, 256, 256, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/8859-2 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 256, 256, 256, 164, 256, 256, 167, 168, 256, 97, 256, 256, 173, 256, 256, +176, 256, 50, 51, 180, 256, 256, 256, 184, 49, 111, 256, 257, 261, 265, 256, +65, 193, 194, 65, 196, 65, 269, 199, 69, 201, 69, 203, 73, 205, 206, 73, +256, 78, 79, 211, 212, 79, 214, 215, 79, 85, 218, 85, 220, 221, 256, 223, +97, 225, 226, 97, 228, 97, 272, 231, 101, 233, 101, 235, 105, 237, 238, 105, +256, 110, 111, 243, 244, 111, 246, 247, 111, 117, 250, 117, 252, 253, 256, 121, +256, 161, 162, 163, 165, 166, 169, 170, 171, 172, 174, 175, 177, 178, 179, 181, +182, 183, 185, 186, 187, 188, 189, 190, 191, 192, 195, 197, 198, 200, 202, 204, +207, 208, 209, 210, 213, 216, 217, 219, 222, 224, 227, 229, 230, 232, 234, 236, +239, 240, 241, 242, 245, 248, 249, 251, 254, 255, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 256, 256, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 168, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 66, 98, 68, 87, 87, 100, 89, 89, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 256, 256, 256, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/8859-3 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 256, 256, 163, 164, 256, 256, 167, 168, 256, 97, 256, 256, 173, 256, 256, +176, 256, 178, 179, 180, 181, 256, 183, 184, 49, 111, 256, 257, 189, 265, 256, +192, 193, 194, 65, 196, 65, 269, 199, 200, 201, 202, 203, 204, 205, 206, 207, +256, 209, 210, 211, 212, 79, 214, 215, 79, 217, 218, 219, 220, 89, 256, 223, +224, 225, 226, 97, 228, 97, 272, 231, 232, 233, 234, 235, 236, 237, 238, 239, +256, 241, 242, 243, 244, 111, 246, 247, 111, 249, 250, 251, 252, 121, 256, 121, +256, 65, 162, 76, 76, 83, 83, 170, 84, 90, 90, 175, 97, 256, 108, 108, +115, 256, 115, 186, 116, 122, 256, 122, 191, 82, 65, 76, 67, 67, 69, 69, +68, 68, 78, 78, 79, 82, 85, 85, 84, 114, 97, 108, 99, 99, 101, 101, +100, 100, 110, 110, 111, 114, 117, 117, 116, 255, 161, 166, 169, 171, 172, 177, +182, 185, 187, 188, 197, 198, 213, 216, 221, 222, 229, 230, 245, 248, 253, 254, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 256, 256, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 168, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 66, 98, 68, 87, 87, 100, 89, 89, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 256, 256, 256, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/8859-4 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 256, 256, 256, 164, 256, 256, 167, 168, 256, 97, 256, 256, 173, 256, 175, +176, 256, 50, 51, 180, 256, 256, 256, 184, 49, 111, 256, 257, 261, 265, 256, +65, 193, 194, 195, 196, 197, 198, 67, 69, 201, 69, 203, 73, 205, 206, 73, +256, 78, 79, 79, 212, 213, 214, 215, 216, 85, 218, 219, 220, 89, 256, 223, +97, 225, 226, 227, 228, 229, 230, 99, 101, 233, 101, 235, 105, 237, 238, 105, +256, 110, 111, 111, 244, 245, 246, 247, 248, 117, 250, 251, 252, 121, 256, 121, +256, 161, 256, 76, 76, 83, 169, 83, 84, 90, 174, 90, 177, 178, 108, 108, +115, 183, 185, 115, 116, 122, 256, 190, 122, 82, 65, 76, 67, 200, 202, 69, +68, 208, 78, 78, 79, 82, 85, 85, 84, 114, 97, 108, 99, 232, 234, 101, +100, 240, 110, 110, 111, 114, 117, 117, 116, 255, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +162, 163, 165, 166, 170, 171, 172, 179, 181, 182, 186, 187, 188, 189, 191, 192, +199, 204, 207, 209, 210, 211, 217, 221, 222, 224, 231, 236, 239, 241, 242, 243, +249, 253, 254, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 168, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 66, 98, 68, 87, 87, 100, 89, 89, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 256, 256, 256, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/8859-5 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 256, 256, 256, 256, 256, 256, 253, 256, 256, 97, 256, 256, 173, 256, 256, +256, 256, 50, 51, 256, 256, 256, 256, 256, 49, 111, 256, 257, 261, 265, 256, +65, 65, 65, 65, 65, 65, 269, 67, 69, 69, 69, 69, 73, 73, 73, 73, +256, 78, 79, 79, 79, 79, 79, 256, 79, 85, 85, 85, 85, 89, 256, 256, +97, 97, 97, 97, 97, 97, 272, 99, 101, 101, 101, 101, 105, 105, 105, 105, +256, 110, 111, 111, 111, 111, 111, 256, 111, 117, 117, 117, 117, 121, 256, 121, +256, 65, 256, 76, 76, 83, 83, 83, 84, 90, 90, 90, 97, 256, 108, 108, +115, 256, 115, 115, 116, 122, 256, 122, 122, 82, 65, 76, 67, 67, 69, 69, +68, 68, 78, 78, 79, 82, 85, 85, 84, 114, 97, 108, 99, 99, 101, 101, +100, 100, 110, 110, 111, 114, 117, 117, 116, 256, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 256, 256, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 174, +175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, +191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, +207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, +223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, +239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 254, 255, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 66, 98, 68, 87, 87, 100, 89, 89, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 256, 256, 256, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/8859-6 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 256, 256, 256, 164, 256, 256, 256, 256, 256, 97, 256, 256, 173, 256, 256, +256, 256, 50, 51, 256, 256, 256, 256, 256, 49, 111, 256, 257, 261, 265, 256, +65, 65, 65, 65, 65, 65, 269, 67, 69, 69, 69, 69, 73, 73, 73, 73, +256, 78, 79, 79, 79, 79, 79, 256, 79, 85, 85, 85, 85, 89, 256, 256, +97, 97, 97, 97, 97, 97, 272, 99, 101, 101, 101, 101, 105, 105, 105, 105, +256, 110, 111, 111, 111, 111, 111, 256, 111, 117, 117, 117, 117, 121, 256, 121, +256, 65, 256, 76, 76, 83, 83, 83, 84, 90, 90, 90, 97, 256, 108, 108, +115, 256, 115, 115, 116, 122, 256, 122, 122, 82, 65, 76, 67, 67, 69, 69, +68, 68, 78, 78, 79, 82, 85, 85, 84, 114, 97, 108, 99, 99, 101, 101, +100, 100, 110, 110, 111, 114, 117, 117, 116, 256, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 256, 256, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +172, 187, 191, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, +206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 224, 225, 226, +227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 66, 98, 68, 87, 87, 100, 89, 89, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 256, 256, 256, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/8859-7 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 256, 256, 163, 256, 256, 166, 167, 168, 169, 97, 171, 172, 173, 256, 256, +176, 177, 178, 179, 256, 236, 256, 183, 256, 49, 111, 187, 257, 189, 265, 256, +65, 65, 65, 65, 65, 65, 269, 67, 69, 69, 69, 69, 73, 73, 73, 73, +256, 78, 79, 79, 79, 79, 79, 256, 79, 85, 85, 85, 85, 89, 256, 256, +97, 97, 97, 97, 97, 97, 272, 99, 101, 101, 101, 101, 105, 105, 105, 105, +256, 110, 111, 111, 111, 111, 111, 256, 111, 117, 117, 117, 117, 121, 256, 121, +256, 65, 256, 76, 76, 83, 83, 83, 84, 90, 90, 90, 97, 256, 108, 108, +115, 256, 115, 115, 116, 122, 256, 122, 122, 82, 65, 76, 67, 67, 69, 69, +68, 68, 78, 78, 79, 82, 85, 85, 84, 114, 97, 108, 99, 99, 101, 101, +100, 100, 110, 110, 111, 114, 117, 117, 116, 256, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 256, 256, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +161, 162, 175, 180, 181, 182, 184, 185, 186, 188, 190, 191, 192, 193, 194, 195, +196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 211, 212, +213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, +229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, +245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 66, 98, 68, 87, 87, 100, 89, 89, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 256, 256, 256, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/8859-8 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 256, 162, 163, 164, 165, 166, 167, 168, 169, 97, 171, 172, 173, 174, 175, +176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 111, 187, 188, 189, 190, 256, +65, 65, 65, 65, 65, 65, 269, 67, 69, 69, 69, 69, 73, 73, 73, 73, +256, 78, 79, 79, 79, 79, 79, 170, 79, 85, 85, 85, 85, 89, 256, 256, +97, 97, 97, 97, 97, 97, 272, 99, 101, 101, 101, 101, 105, 105, 105, 105, +256, 110, 111, 111, 111, 111, 111, 186, 111, 117, 117, 117, 117, 121, 256, 121, +256, 65, 256, 76, 76, 83, 83, 83, 84, 90, 90, 90, 97, 256, 108, 108, +115, 256, 115, 115, 116, 122, 256, 122, 122, 82, 65, 76, 67, 67, 69, 69, +68, 68, 78, 78, 79, 82, 85, 85, 84, 114, 97, 108, 99, 99, 101, 101, +100, 100, 110, 110, 111, 114, 117, 117, 116, 256, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 256, 256, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 168, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 223, 224, 225, 226, 227, 228, +229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, +245, 246, 247, 248, 249, 250, 253, 254, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 66, 98, 68, 87, 87, 100, 89, 89, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 256, 256, 256, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/8859-9 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, +176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +256, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 89, 256, 223, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +256, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 121, 256, 255, +256, 65, 256, 76, 76, 83, 83, 222, 84, 90, 90, 90, 97, 256, 108, 108, +115, 256, 115, 254, 116, 122, 256, 122, 122, 82, 65, 76, 67, 67, 69, 69, +68, 68, 78, 78, 79, 82, 85, 85, 84, 114, 97, 108, 99, 99, 101, 101, +100, 100, 110, 110, 111, 114, 117, 117, 116, 256, 72, 72, 221, 208, 74, 104, +104, 253, 240, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 256, 256, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 168, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 66, 98, 68, 87, 87, 100, 89, 89, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 256, 256, 256, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/8859-10 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 256, 256, 256, 256, 256, 256, 167, 256, 256, 97, 256, 256, 173, 256, 256, +176, 256, 50, 51, 256, 256, 256, 183, 256, 49, 111, 256, 257, 261, 265, 256, +65, 193, 194, 195, 196, 197, 198, 67, 69, 201, 69, 203, 73, 205, 206, 207, +208, 78, 79, 211, 212, 213, 214, 256, 216, 85, 218, 219, 220, 221, 222, 223, +97, 225, 226, 227, 228, 229, 230, 99, 101, 233, 101, 235, 105, 237, 238, 239, +240, 110, 111, 243, 244, 245, 246, 256, 248, 117, 250, 251, 252, 253, 254, 121, +256, 161, 256, 76, 76, 83, 170, 83, 84, 90, 172, 90, 177, 256, 108, 108, +115, 256, 186, 115, 116, 122, 256, 188, 122, 82, 65, 76, 67, 200, 202, 69, +68, 169, 78, 78, 79, 82, 85, 85, 84, 114, 97, 108, 99, 232, 234, 101, +100, 185, 110, 110, 111, 114, 117, 117, 116, 256, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +255, 82, 165, 168, 162, 163, 171, 114, 181, 184, 178, 179, 187, 175, 191, 192, +199, 204, 164, 209, 210, 166, 217, 215, 174, 224, 231, 236, 180, 241, 242, 182, +249, 247, 190, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 189, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 66, 98, 68, 87, 87, 100, 89, 89, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 256, 256, 256, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/8859-11 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 256, 256, 256, 256, 256, 256, 256, 256, 256, 97, 256, 256, 256, 256, 256, +256, 256, 50, 51, 256, 256, 256, 256, 256, 49, 111, 256, 257, 261, 265, 256, +65, 65, 65, 65, 65, 65, 269, 67, 69, 69, 69, 69, 73, 73, 73, 73, +256, 78, 79, 79, 79, 79, 79, 256, 79, 85, 85, 85, 85, 89, 256, 256, +97, 97, 97, 97, 97, 97, 272, 99, 101, 101, 101, 101, 105, 105, 105, 105, +256, 110, 111, 111, 111, 111, 111, 256, 111, 117, 117, 117, 117, 121, 256, 121, +256, 65, 256, 76, 76, 83, 83, 83, 84, 90, 90, 90, 97, 256, 108, 108, +115, 256, 115, 115, 116, 122, 256, 122, 122, 82, 65, 76, 67, 67, 69, 69, +68, 68, 78, 78, 79, 82, 85, 85, 84, 114, 97, 108, 99, 99, 101, 101, +100, 100, 110, 110, 111, 114, 117, 117, 116, 256, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 256, 256, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 161, 162, 163, 164, 165, 166, 167, 168, +169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, +185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, +201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, +217, 218, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, +237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 256, +256, 256, 66, 98, 68, 87, 87, 100, 89, 89, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 256, 256, 256, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/8859-13 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 256, 162, 163, 164, 256, 166, 167, 256, 169, 97, 171, 172, 173, 174, 256, +176, 177, 178, 179, 256, 181, 182, 183, 256, 185, 111, 187, 188, 189, 190, 256, +65, 65, 65, 65, 196, 197, 175, 67, 69, 201, 69, 69, 73, 73, 73, 73, +256, 78, 79, 211, 79, 213, 214, 215, 168, 85, 85, 85, 220, 89, 256, 223, +97, 97, 97, 97, 228, 229, 191, 99, 101, 233, 101, 101, 105, 105, 105, 105, +256, 110, 111, 243, 111, 245, 246, 247, 184, 117, 117, 117, 252, 121, 256, 121, +256, 192, 256, 217, 76, 218, 208, 83, 84, 202, 222, 221, 224, 256, 249, 108, +250, 256, 240, 115, 116, 234, 256, 254, 253, 82, 65, 76, 195, 200, 198, 69, +68, 68, 209, 78, 79, 82, 85, 85, 84, 114, 97, 108, 227, 232, 230, 101, +100, 100, 241, 110, 111, 114, 117, 117, 116, 256, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +256, 170, 73, 207, 199, 204, 84, 186, 105, 239, 231, 236, 116, 256, 256, 194, +193, 203, 206, 210, 212, 205, 216, 85, 219, 226, 225, 235, 238, 242, 244, 237, +248, 117, 251, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 255, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 161, +165, 180, 66, 98, 68, 87, 87, 100, 89, 89, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 256, 256, 256, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/8859-14 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 256, 256, 163, 256, 256, 256, 167, 256, 169, 97, 256, 256, 173, 174, 256, +256, 256, 50, 51, 256, 256, 182, 256, 256, 49, 111, 256, 257, 261, 265, 256, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +256, 209, 210, 211, 212, 213, 214, 256, 216, 217, 218, 219, 220, 221, 256, 223, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +256, 241, 242, 243, 244, 245, 246, 256, 248, 249, 250, 251, 252, 253, 256, 255, +256, 65, 256, 76, 76, 83, 83, 83, 84, 90, 90, 90, 97, 256, 108, 108, +115, 256, 115, 115, 116, 122, 256, 122, 122, 82, 65, 76, 67, 67, 69, 69, +68, 68, 78, 78, 79, 82, 85, 85, 84, 114, 97, 108, 99, 99, 101, 101, +100, 100, 110, 110, 111, 114, 117, 117, 116, 256, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 164, 67, 178, 71, 85, 83, 165, 99, 179, 103, 117, 115, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 256, 256, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 161, 162, 166, 168, 170, 171, 172, 175, 176, 177, 180, 181, 183, 184, +185, 186, 187, 188, 189, 190, 191, 208, 215, 222, 240, 247, 254, 256, 256, 256, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/8859-15 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 161, 162, 163, 256, 165, 256, 167, 256, 169, 170, 171, 172, 173, 174, 175, +176, 177, 178, 179, 256, 181, 182, 183, 256, 185, 186, 187, 257, 261, 265, 191, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, +256, 65, 256, 76, 76, 83, 166, 83, 84, 90, 180, 90, 97, 256, 108, 108, +115, 256, 168, 115, 116, 122, 256, 184, 122, 82, 65, 76, 67, 67, 69, 69, +68, 68, 78, 78, 79, 82, 85, 85, 84, 114, 97, 108, 99, 99, 101, 101, +100, 100, 110, 110, 111, 114, 117, 117, 116, 256, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 256, 256, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 66, 98, 68, 87, 87, 100, 89, 190, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 164, 188, 189, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/8859-16 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 256, 256, 256, 256, 256, 256, 167, 256, 169, 97, 171, 256, 173, 256, 256, +176, 177, 50, 51, 256, 256, 182, 183, 256, 49, 111, 187, 257, 261, 265, 256, +192, 193, 194, 65, 196, 65, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +256, 78, 210, 211, 212, 79, 214, 256, 79, 217, 218, 219, 220, 89, 256, 223, +224, 225, 226, 97, 228, 97, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +256, 110, 242, 243, 244, 111, 246, 256, 111, 249, 250, 251, 252, 121, 256, 255, +256, 161, 256, 163, 76, 215, 166, 83, 84, 172, 180, 175, 162, 256, 179, 108, +247, 256, 168, 115, 116, 174, 256, 184, 191, 82, 195, 76, 197, 178, 221, 69, +68, 208, 209, 78, 213, 82, 85, 216, 84, 114, 227, 108, 229, 185, 253, 101, +100, 240, 241, 110, 245, 114, 117, 248, 116, 256, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 256, 256, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 181, +165, 256, 66, 98, 68, 87, 87, 100, 89, 190, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 164, 188, 189, +170, 186, 222, 254, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/win-1250 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +160, 256, 256, 256, 164, 256, 166, 167, 168, 169, 97, 171, 172, 173, 174, 256, +176, 177, 50, 51, 180, 181, 182, 183, 184, 49, 111, 187, 257, 261, 265, 256, +65, 193, 194, 65, 196, 65, 269, 199, 69, 201, 69, 203, 73, 205, 206, 73, +256, 78, 79, 211, 212, 79, 214, 215, 79, 85, 218, 85, 220, 221, 256, 223, +97, 225, 226, 97, 228, 97, 272, 231, 101, 233, 101, 235, 105, 237, 238, 105, +256, 110, 111, 243, 244, 111, 246, 247, 111, 117, 250, 117, 252, 253, 256, 121, +256, 165, 162, 163, 188, 140, 138, 170, 141, 143, 142, 175, 185, 178, 179, 190, +156, 161, 154, 186, 157, 159, 189, 158, 191, 192, 195, 197, 198, 200, 202, 204, +207, 208, 209, 210, 213, 216, 217, 219, 222, 224, 227, 229, 230, 232, 234, 236, +239, 240, 241, 242, 245, 248, 249, 251, 254, 255, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 256, 256, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +145, 146, 256, 256, 168, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 148, +132, 147, 66, 98, 68, 87, 87, 100, 89, 89, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 128, 256, 256, +83, 115, 84, 116, 130, 133, 134, 135, 137, 139, 149, 150, 151, 153, 155, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/win-1251 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +160, 256, 256, 256, 164, 256, 166, 167, 256, 169, 97, 171, 172, 173, 174, 256, +176, 177, 50, 51, 256, 181, 182, 183, 256, 49, 111, 187, 257, 261, 265, 256, +65, 65, 65, 65, 65, 65, 269, 67, 69, 69, 69, 69, 73, 73, 73, 73, +256, 78, 79, 79, 79, 79, 79, 256, 79, 85, 85, 85, 85, 89, 256, 256, +97, 97, 97, 97, 97, 97, 272, 99, 101, 101, 101, 101, 105, 105, 105, 105, +256, 110, 111, 111, 111, 111, 111, 256, 111, 117, 117, 117, 117, 121, 256, 121, +256, 65, 256, 76, 76, 83, 83, 83, 84, 90, 90, 90, 97, 256, 108, 108, +115, 256, 115, 115, 116, 122, 256, 122, 122, 82, 65, 76, 67, 67, 69, 69, +68, 68, 78, 78, 79, 82, 85, 85, 84, 114, 97, 108, 99, 99, 101, 101, +100, 100, 110, 110, 111, 114, 117, 117, 116, 256, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 256, 256, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 168, 128, 129, 170, 189, 178, 175, 163, 138, 140, 142, 141, 161, +143, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, +207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, +223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, +239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, +255, 185, 184, 144, 131, 186, 190, 179, 191, 188, 154, 156, 158, 157, 162, 159, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +145, 146, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 148, +132, 147, 66, 98, 68, 87, 87, 100, 89, 89, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 136, 256, 256, +83, 115, 84, 116, 130, 133, 134, 135, 137, 139, 149, 150, 151, 153, 155, 165, +180, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/win-1252 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, +176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, +256, 65, 256, 76, 76, 83, 138, 83, 84, 90, 142, 90, 97, 256, 108, 108, +115, 256, 154, 115, 116, 122, 256, 158, 122, 82, 65, 76, 67, 67, 69, 69, +68, 68, 78, 78, 79, 82, 85, 85, 84, 114, 97, 108, 99, 99, 101, 101, +100, 100, 110, 110, 111, 114, 117, 117, 116, 256, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 256, 256, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +145, 146, 256, 256, 168, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 148, +132, 147, 66, 98, 68, 87, 87, 100, 89, 159, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 128, 140, 156, +83, 115, 84, 116, 130, 133, 134, 135, 137, 139, 149, 150, 151, 153, 155, 256, +256, 131, 136, 152, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/kamen-ctrl */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +255, 256, 256, 256, 256, 256, 256, 173, 256, 256, 97, 175, 256, 256, 256, 256, +248, 241, 253, 51, 256, 230, 256, 250, 256, 49, 111, 174, 172, 261, 265, 256, +65, 143, 65, 65, 142, 65, 269, 67, 69, 144, 69, 69, 73, 139, 73, 73, +256, 78, 79, 149, 167, 79, 153, 256, 79, 85, 151, 85, 154, 157, 256, 256, +97, 160, 97, 97, 132, 97, 272, 99, 101, 130, 101, 101, 105, 161, 105, 105, +256, 110, 111, 162, 147, 111, 148, 246, 111, 117, 163, 117, 129, 152, 256, 121, +256, 65, 256, 76, 156, 83, 155, 83, 134, 90, 146, 90, 97, 256, 108, 140, +115, 256, 168, 115, 159, 122, 256, 145, 122, 171, 65, 138, 67, 128, 69, 137, +133, 68, 78, 165, 79, 158, 166, 85, 84, 170, 97, 141, 99, 135, 101, 136, +131, 100, 110, 164, 111, 169, 150, 117, 116, 256, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 256, 256, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 234, 256, 256, 256, 256, +256, 256, 256, 256, 233, 256, 256, 256, 256, 256, 256, 256, 256, 256, 228, 256, +256, 232, 256, 256, 234, 256, 256, 224, 256, 256, 256, 256, 224, 225, 256, 235, +256, 256, 256, 256, 256, 256, 256, 230, 256, 256, 256, 227, 256, 256, 229, 231, +256, 237, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 66, 98, 68, 87, 87, 100, 89, 89, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 256, 256, 256, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 127, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, +187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, +203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, +219, 220, 221, 222, 223, 226, 236, 238, 239, 240, 242, 243, 244, 245, 247, 249, +251, 252, 254, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/koi8 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 97, 256, 256, 256, 256, 256, +254, 256, 50, 51, 224, 256, 256, 256, 256, 49, 111, 256, 257, 261, 265, 256, +65, 225, 291, 65, 241, 65, 269, 67, 69, 247, 294, 69, 73, 233, 297, 73, +256, 78, 79, 239, 240, 79, 237, 256, 79, 85, 245, 300, 232, 249, 256, 256, +216, 193, 303, 97, 209, 97, 272, 99, 101, 215, 306, 101, 105, 201, 309, 105, +256, 110, 111, 207, 208, 111, 205, 256, 111, 117, 213, 312, 200, 217, 256, 121, +256, 65, 256, 76, 236, 83, 243, 83, 244, 90, 250, 90, 97, 256, 108, 204, +115, 256, 211, 115, 212, 122, 256, 218, 122, 230, 248, 235, 67, 227, 69, 229, +228, 68, 78, 238, 79, 242, 234, 85, 84, 198, 97, 203, 99, 195, 101, 197, +196, 100, 110, 206, 111, 210, 202, 117, 116, 256, 72, 315, 73, 71, 318, 104, +321, 256, 103, 324, 67, 327, 71, 330, 85, 333, 99, 336, 103, 339, 117, 342, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 256, 256, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 66, 98, 68, 87, 87, 100, 89, 89, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 345, 84, 348, 351, 116, 354, 256, 256, 256, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 199, 220, 222, 231, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/pc-latin-2 */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +255, 256, 256, 256, 207, 256, 256, 245, 249, 256, 97, 175, 170, 256, 256, 256, +256, 256, 50, 51, 239, 256, 256, 256, 247, 49, 111, 174, 257, 261, 265, 256, +65, 181, 182, 65, 142, 65, 269, 128, 69, 144, 69, 211, 73, 214, 215, 357, +209, 78, 79, 224, 226, 79, 153, 158, 79, 85, 233, 85, 154, 237, 256, 225, +97, 160, 131, 97, 132, 97, 272, 135, 101, 130, 101, 137, 105, 161, 140, 360, +208, 110, 111, 162, 147, 111, 148, 246, 111, 117, 163, 117, 129, 236, 256, 363, +256, 164, 244, 157, 149, 151, 230, 184, 155, 141, 166, 189, 165, 242, 136, 150, +152, 256, 231, 173, 156, 171, 241, 167, 190, 232, 198, 145, 143, 172, 168, 183, +210, 68, 227, 213, 138, 252, 222, 235, 221, 234, 199, 146, 134, 159, 169, 216, +212, 100, 228, 229, 139, 253, 133, 251, 238, 250, 72, 72, 73, 366, 74, 104, +104, 256, 369, 106, 67, 67, 71, 71, 372, 83, 99, 99, 103, 103, 375, 115, +256, 378, 73, 381, 69, 384, 84, 387, 105, 390, 101, 393, 116, 256, 256, 65, +396, 69, 73, 399, 79, 402, 405, 85, 85, 97, 408, 101, 105, 411, 111, 414, +417, 117, 117, 249, 256, 256, 256, 256, 256, 249, 256, 256, 256, 256, 256, 244, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 244, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 244, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 249, 256, 256, 256, 256, 256, 249, 256, 256, 256, 256, 256, 244, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 249, 256, 256, 256, 256, 256, 256, 256, 249, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 249, 249, 256, 256, 256, 256, 249, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 249, 249, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 66, 98, 68, 87, 87, 100, 89, 420, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 423, 426, 115, 87, 84, 89, 119, 116, 121, 256, 256, 256, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 127, 176, 177, 178, 179, 180, 256, 256, 256, 256, 185, 186, +187, 188, 256, 256, 191, 192, 193, 194, 195, 196, 197, 256, 256, 200, 201, 202, +203, 204, 205, 206, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 217, 218, +219, 220, 256, 256, 223, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 254, 256, 243, 256, 256, 240, 241, 242, 244, 247, 249, 61, 103, 256, +256, 256, 256, 285, 288, 256, +}, + +/* set/macce */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +202, 256, 256, 163, 256, 256, 256, 164, 172, 169, 97, 199, 194, 256, 168, 256, +161, 256, 50, 51, 256, 256, 166, 256, 256, 49, 111, 200, 257, 261, 265, 256, +65, 231, 65, 65, 128, 65, 269, 67, 69, 131, 69, 429, 73, 234, 73, 432, +256, 78, 79, 238, 239, 205, 133, 256, 79, 85, 242, 85, 134, 248, 256, 167, +97, 135, 97, 97, 138, 97, 272, 99, 101, 142, 101, 435, 105, 146, 105, 438, +256, 110, 111, 151, 153, 155, 154, 214, 111, 117, 156, 117, 159, 249, 256, 441, +256, 132, 256, 252, 187, 229, 225, 83, 232, 143, 235, 251, 136, 256, 184, 188, +230, 256, 228, 115, 233, 144, 256, 236, 253, 217, 65, 189, 140, 137, 162, 157, +145, 68, 193, 197, 204, 219, 241, 244, 84, 218, 97, 190, 141, 139, 171, 158, +147, 100, 196, 203, 206, 222, 243, 245, 116, 256, 72, 72, 73, 71, 74, 104, +104, 256, 103, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +256, 223, 73, 185, 148, 254, 84, 224, 105, 186, 149, 103, 116, 256, 256, 129, +175, 150, 177, 191, 207, 181, 246, 85, 237, 130, 176, 152, 180, 192, 216, 250, +247, 117, 240, 172, 256, 256, 256, 256, 256, 172, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 172, 256, 256, 256, 256, 256, 172, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +212, 213, 256, 256, 172, 256, 256, 256, 256, 256, 256, 256, 172, 256, 256, 256, +198, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 172, 172, 256, 256, 256, 256, 172, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 172, 172, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 211, +227, 210, 66, 98, 68, 87, 87, 100, 89, 444, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 447, 450, 115, 87, 84, 89, 119, 116, 121, 256, 256, 256, +83, 115, 84, 116, 226, 278, 160, 256, 256, 220, 165, 209, 208, 170, 221, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 179, 178, 256, 256, 256, 256, +195, 110, 256, 256, 255, 256, 256, 256, 256, 256, 256, 256, 172, 173, 174, 182, +183, 201, 215, 285, 288, 256, +}, + +/* set/cork */ +{ +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 189, 256, 191, 256, 256, 256, 159, 256, 256, 97, 256, 256, 256, 256, 256, +256, 256, 50, 51, 256, 256, 256, 256, 256, 49, 111, 256, 257, 261, 265, 190, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +208, 209, 210, 211, 212, 213, 214, 256, 216, 217, 218, 219, 220, 221, 222, 255, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +240, 241, 242, 243, 244, 245, 246, 256, 248, 249, 250, 251, 252, 253, 254, 184, +256, 129, 256, 138, 137, 145, 146, 147, 148, 153, 154, 155, 161, 256, 170, 169, +177, 256, 178, 179, 180, 185, 256, 186, 187, 143, 128, 136, 130, 131, 134, 133, +132, 68, 139, 140, 142, 144, 151, 150, 149, 175, 160, 168, 162, 163, 166, 165, +164, 100, 171, 172, 174, 176, 183, 182, 181, 256, 72, 72, 157, 135, 74, 104, +104, 256, 167, 106, 67, 67, 71, 71, 85, 83, 99, 99, 103, 103, 117, 115, +256, 82, 73, 76, 69, 71, 84, 114, 105, 108, 101, 103, 116, 141, 173, 65, +73, 69, 73, 78, 79, 75, 85, 85, 85, 97, 105, 101, 105, 110, 111, 107, +117, 117, 117, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 275, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 66, 98, 68, 87, 87, 100, 89, 152, 70, 102, 77, 109, 80, 119, +112, 119, 83, 121, 87, 119, 115, 87, 84, 89, 119, 116, 121, 256, 215, 247, +83, 115, 84, 116, 256, 278, 256, 256, 256, 256, 256, 256, 256, 282, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, +256, 110, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 61, 103, 256, +256, 256, 256, 156, 188, 223, +}, +}; + +static unsigned char string_table[] = { +0, +3, 49, 47, 52, +3, 49, 47, 50, +3, 51, 47, 52, +2, 65, 69, +2, 97, 101, +2, 78, 111, +3, 46, 46, 46, +2, 84, 77, +2, 73, 74, +2, 105, 106, +2, 65, 222, +2, 69, 222, +2, 73, 222, +2, 85, 222, +2, 97, 222, +2, 101, 222, +2, 105, 222, +2, 117, 222, +2, 72, 222, +2, 74, 222, +2, 104, 222, +2, 106, 222, +2, 67, 222, +2, 71, 222, +2, 83, 222, +2, 99, 222, +2, 103, 222, +2, 115, 222, +2, 87, 222, +2, 89, 222, +2, 119, 222, +2, 121, 222, +2, 73, 249, +2, 105, 249, +2, 121, 249, +2, 71, 244, +2, 103, 244, +2, 85, 244, +2, 117, 244, +2, 82, 247, +2, 76, 247, +2, 71, 247, +2, 114, 247, +2, 108, 247, +2, 103, 247, +2, 73, 242, +2, 78, 247, +2, 75, 247, +2, 85, 242, +2, 105, 242, +2, 110, 247, +2, 107, 247, +2, 117, 242, +2, 89, 249, +2, 87, 249, +2, 119, 249, +2, 69, 172, +2, 73, 172, +2, 101, 172, +2, 105, 172, +2, 121, 172, +2, 89, 172, +2, 87, 172, +2, 119, 172, +}; diff --git a/libucw/charset/fb-charconv.c b/libucw/charset/fb-charconv.c new file mode 100644 index 0000000..c4c6b25 --- /dev/null +++ b/libucw/charset/fb-charconv.c @@ -0,0 +1,113 @@ +/* + * UCW Library -- Charset Conversion Wrapper for Fast Buffered I/O + * + * (c) 2003--2005 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include + +#define BUFSIZE 1024 + +struct fb_charconv { + struct fastbuf fb; + struct fastbuf *orig; + struct conv_context ctxt; + byte buf[BUFSIZE]; +}; +#define FB_CC(f) ((struct fb_charconv *)(f)) + +static void +fb_cc_spout(struct fastbuf *f) +{ + struct conv_context *ct = &FB_CC(f)->ctxt; + int flags; + + ct->source = f->buffer; + ct->source_end = f->bptr; + do + { + flags = conv_run(ct); + if (ct->dest > ct->dest_start) + bdirect_write_commit(FB_CC(f)->orig, ct->dest); + uint l = bdirect_write_prepare(FB_CC(f)->orig, &ct->dest_start); + ct->dest = ct->dest_start; + ct->dest_end = ct->dest + l; + } + while (!(flags & CONV_SOURCE_END)); + + f->bptr = f->buffer; +} + +static int +fb_cc_refill(struct fastbuf *f) +{ + struct conv_context *ct = &FB_CC(f)->ctxt; + int flags; + + f->bptr = f->bstop = f->buffer; + do + { + byte *src; + uint len = bdirect_read_prepare(FB_CC(f)->orig, &src); + if (!len) + break; + ct->source = src; + ct->source_end = ct->source + len; + ct->dest = ct->dest_start = f->bstop; + ct->dest_end = f->bufend; + flags = conv_run(ct); + bdirect_read_commit(FB_CC(f)->orig, (byte*)ct->source); + f->bstop = ct->dest; + } + while (!(flags & CONV_DEST_END)); + return (f->bstop > f->bptr); +} + +static void +fb_cc_close(struct fastbuf *f) +{ + bflush(FB_CC(f)->orig); + xfree(f); +} + +struct fastbuf * +fb_wrap_charconv_out(struct fastbuf *f, int cs_from, int cs_to) +{ + if (cs_from == cs_to) + return f; + + struct fastbuf *g = xmalloc_zero(sizeof(struct fb_charconv)); + FB_CC(g)->orig = f; + conv_init(&FB_CC(g)->ctxt); + conv_set_charset(&FB_CC(g)->ctxt, cs_from, cs_to); + g->name = ""; + g->spout = fb_cc_spout; + g->close = fb_cc_close; + g->buffer = g->bstop = g->bptr = FB_CC(g)->buf; + g->bufend = g->buffer + BUFSIZE; + return g; +} + +struct fastbuf * +fb_wrap_charconv_in(struct fastbuf *f, int cs_from, int cs_to) +{ + if (cs_from == cs_to) + return f; + + struct fastbuf *g = xmalloc_zero(sizeof(struct fb_charconv)); + FB_CC(g)->orig = f; + conv_init(&FB_CC(g)->ctxt); + conv_set_charset(&FB_CC(g)->ctxt, cs_from, cs_to); + g->name = ""; + g->refill = fb_cc_refill; + g->close = fb_cc_close; + g->buffer = g->bstop = g->bptr = FB_CC(g)->buf; + g->bufend = g->buffer + BUFSIZE; + return g; +} diff --git a/libucw/charset/fb-charconv.h b/libucw/charset/fb-charconv.h new file mode 100644 index 0000000..412c692 --- /dev/null +++ b/libucw/charset/fb-charconv.h @@ -0,0 +1,16 @@ +/* + * UCW Library -- Charset Conversion Wrapper for Fast Buffered I/O + * + * (c) 2003--2005 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifdef CONFIG_UCW_CLEAN_ABI +#define fb_wrap_charconv_in ucw_fb_wrap_charconv_in +#define fb_wrap_charconv_out ucw_fb_wrap_charconv_out +#endif + +struct fastbuf *fb_wrap_charconv_in(struct fastbuf *f, int cs_from, int cs_to); +struct fastbuf *fb_wrap_charconv_out(struct fastbuf *f, int cs_from, int cs_to); diff --git a/libucw/charset/libucw-charset.pc b/libucw/charset/libucw-charset.pc new file mode 100644 index 0000000..a324ef0 --- /dev/null +++ b/libucw/charset/libucw-charset.pc @@ -0,0 +1,11 @@ +# pkg-config metadata for libucw-charset + +libdir=@LIBDIR@ +incdir=. + +Name: libucw-charset +Description: Character set conversion library +Version: @UCW_VERSION@ +Cflags: -I${incdir} +Libs: -L${libdir} @SO_LINK_PATH@ -lucw-charset@UCW_ABI_SUFFIX@ +Requires.private: @DEPS@ diff --git a/libucw/charset/misc/add-charnames b/libucw/charset/misc/add-charnames new file mode 100755 index 0000000..5057d5f --- /dev/null +++ b/libucw/charset/misc/add-charnames @@ -0,0 +1,19 @@ +#!/usr/bin/perl +# +# Add Unicode Character Names to a character set table file +# (c) 1997 Martin Mares +# + +open (UNI, "unidata/UnicodeData.txt") || die "No Unicode Data File"; +while () { + ($num,$name) = split /;/; + $name{$num} = $name; +} +close UNI; + +while (<>) { + ($code,$uni) = split /[ \t\n]+/; + $name = $name{$uni}; + ($name eq "") && ($name = "????"); + print "$code\t$uni\t$name\n"; +} diff --git a/libucw/charset/misc/chartable.in b/libucw/charset/misc/chartable.in new file mode 100644 index 0000000..ca0c160 --- /dev/null +++ b/libucw/charset/misc/chartable.in @@ -0,0 +1,30 @@ +# List of character set tables for tabgen +# Ordering defines internal charset numbers +# Remember to change charconv.h and setnames.c accordingly! + +set/ascii +set/8859-1 +set/8859-2 +set/8859-3 +set/8859-4 +set/8859-5 +set/8859-6 +set/8859-7 +set/8859-8 +set/8859-9 +set/8859-10 +set/8859-11 +set/8859-13 +set/8859-14 +set/8859-15 +set/8859-16 +set/win-1250 +set/win-1251 +set/win-1252 +set/kamen-ctrl +set/koi8 +set/pc-latin-2 +set/macce +set/cork +#set/ibm-ctrl +#set/mac diff --git a/libucw/charset/misc/gen-basic b/libucw/charset/misc/gen-basic new file mode 100755 index 0000000..bd0044b --- /dev/null +++ b/libucw/charset/misc/gen-basic @@ -0,0 +1,36 @@ +#!/usr/bin/perl +# +# Split Unicode Data File +# (c) 1997--2003 Martin Mares +# + +open(I, "unidata/UnicodeData.txt") || die "Unable to open UniCode data file"; +open(C, ">misc/u-cat") || die "cat file open"; +open(U, ">misc/u-upper") || die "upper file open"; +open(L, ">misc/u-lower") || die "lower file open"; +open(G, ">misc/u-ligatures") || die "lig file open"; +while () { + chomp; + (/^$/ || /^#/) && next; + ($code,$name,$cat,$comb,$bidir,$decomp,$d0,$d1,$n0,$mirr,$cmt1,$cmt2,$upper,$lower,$title) = split /;/; + $code =~ /^....$/ || next; + if ($cat =~ /^C/) { $ccat = "_U_CTRL"; } + elsif ($cat =~ /^Z/) { $ccat = "_U_SPACE"; } + elsif ($decomp =~ // && $name =~ / LIGATURE /) { + $ccat = "_U_LIGATURE"; + print G "$code\n"; + } elsif ($cat =~ /^Ll/) { $ccat = "_U_LLOWER"; } + elsif ($cat =~ /^Lu/) { $ccat = "_U_LUPPER"; } + elsif ($cat =~ /^L/) { $ccat = "_U_LETTER"; } + elsif ($code ge "0030" && $code le "0039") { $ccat = "_U_DIGIT | _U_XDIGIT"; } + else { $ccat = ""; } + if ($code ge "0041" && $code le "0046" || $code ge "0061" && $code le "0066") { $ccat = $ccat . "|_U_XDIGIT"; } + if ($ccat ne "") { print C "$code\t$ccat\n"; } + if ($upper ne "") { print U "$code\t0x$upper\n"; } + if ($lower ne "") { print L "$code\t0x$lower\n"; } +} +close I; +close C; +close U; +close L; +close G; diff --git a/libucw/charset/misc/gen-charconv b/libucw/charset/misc/gen-charconv new file mode 100755 index 0000000..ab22b15 --- /dev/null +++ b/libucw/charset/misc/gen-charconv @@ -0,0 +1,194 @@ +#!/usr/bin/perl +# +# Character Set Table Generator 1.0 +# (c) 1998 Martin Mares +# +# This program can be freely distributed and used according to the terms +# of the GNU General Public License. +# + +# Internal codes 0..255 are mapped to UniCode 0..255 +# Internal code 256 is the replacement character (U#FFFD) + +$ncs = 0; + +print "/* Generated by tabgen 1.0, please don't edit manually. */\n\n"; + +print STDERR "Charset list...\n"; + +while (<>) { + chomp; + (/^\w*$/ || /^#/) && next; + $charsets[$ncs++] = $_; +} + +print STDERR "Found $ncs charsets, counting unique codes...\n"; + +for($unique=0; $unique<256; $unique++) { + $u2x{$unique} = $unique; + $x2u[$unique] = $unique; +} +$u2x{0xFFFD} = $unique; +$x2u[$unique++] = 0xFFFD; +print "static unsigned short int input_to_x[$ncs][256] = {\n"; +for($x=0; $x<$ncs; $x++) { + $a = $charsets[$x]; + print "\n/* $a */\n{\n"; + open (A, $a) || die "Error opening $a"; + while () { + chomp; + (/^\w*$/ || /^#/) && next; + ($i, $u, $c) = split /\t/; + $cc[$x][hex $i] = $u; + } + close A; + for($i=0; $i<256; $i++) { + $u = hex((defined $cc[$x][$i]) ? $cc[$x][$i] : "FFFD"); + if (!defined $u2x{$u}) { + $x2u[$unique] = $u; + $u2x{$u} = $unique++; + } + $o = $u2x{$u}; + print "$o,", ($i % 16 == 15) ? "\n" : " "; + $cc[$x][$i] = $o; + $cx[$x]{$o} = $i; + } + print "},\n"; +} +print "};\n\n"; + +print STDERR "$unique unique codes...\n"; + +print "static unsigned short int x_to_uni[$unique] = {\n"; +for($i=0; $i<$unique; $i++) { + print "$x2u[$i],", ($i % 16 == 15) ? "\n" : " "; +} +if ($i % 16) { print "\n"; } +print "};\n\n"; + +print STDERR "UNICODE table...\n"; +for($i=0; $i<$unique; $i++) { + $u = $x2u[$i]; + $p = $u / 256; + $pg[$p] = 1; +} +for($i=0; $i<256; $i++) { + if ($pg[$i]) { + print "static unsigned short int uni_to_x_$i\[256\] = {\n"; + for($j=0; $j<256; $j++) { + $u = 256*$i + $j; + $u = defined($u2x{$u}) ? $u2x{$u} : 256; + print "$u,", ($j % 16 == 15) ? "\n" : " "; + } + print "};\n\n"; + } +} +print "static unsigned short int *uni_to_x[256] = {\n"; +for($i=hex "FF00"; $i<=hex "FFFF"; $i++) { + if (defined $u2x{$i} && $i != 0xFFFD) { die "Invalid replacement strategy!"; } +} +for($i=0; $i<256; $i++) { + print "uni_to_x_", $pg[$i] ? $i : "255", ",", ($i % 4 == 3) ? "\n" : " "; +} +print "};\n\n"; + +print STDERR "UniData file...\n"; +open (U, "unidata/UnicodeData.txt") || die "No UnicodeData file"; +while () { + chomp; + ($num,$name,$_,$_,$_,$exp) = split /;/; + if ($exp ne "") { + $exp =~ s/^<.*> *//g; + $a = ""; + foreach $x (split (/ /, $exp)) { + if ($x ne "0020") { + $a = $a . " " . hex $x; + } + } + ($expand{hex $num} = $a) =~ s/^ //; + } +} +close U; + +print STDERR "Accent rules\n"; +if (open(ACC, "misc/user_unacc")) { + while () { + chomp; + (/^\s*$/ || /^#/) && next; + s/0x([0-9a-zA-Z]+)/hex($1)/ge; + (/^(\d+)\s+(\d+)$/) || die "Syntax error in user accent rules"; + $expand{$1} = $2; + } + close ACC; +} + +print STDERR "Character expansions\n"; +if (open(EXTRA, "misc/user_expand")) { + while () { + chomp; + (/^\s*$/ || /^#/) && next; + s/0x([0-9a-zA-Z]+)/hex($1)/ge; + (/^(\d+)\s+(.*)$/) || die "Syntax error in user expansions"; + $expand{$1} = $2; + } + close EXTRA; +} + +print "static unsigned short int x_to_output[$ncs][$unique] = {\n"; +$pstr = 256; +for($c=0; $c<$ncs; $c++) { + print "\n/* $charsets[$c] */\n{\n"; + for($i=0; $i<$unique; $i++) { + $u = $x2u[$i]; + do { + $r = $u; + $u = ""; + foreach $x (split (/ /, $r)) { + if (defined($k = $u2x{$x}) && defined $cx[$c]{$k}) { + $u = "$u $x"; + } elsif (defined($k = $expand{$x})) { + $u = "$u $k"; + } + } + $u =~ s/^ //; + } while ($r ne $u); + $u = ""; + foreach $x (split (/ /, $r)) { + if (defined($k = $u2x{$x})) { + if ($k != 256 && defined ($k = $cx[$c]{$k})) { + $u = $u . pack("C", $k); + } + } + } + if (length($u) == 1) { + $z = unpack("C", $u); + } else { + if (!defined($string{$u})) { + $string{$u} = $pstr; + $strval{$pstr} = $u; + $pstr += 1 + length($u); + } + $z = $string{$u}; + } + print "$z,", ($i % 16 == 15) ? "\n" : " "; + } + if ($i % 16) { print "\n"; } + print "},\n"; +} +print "};\n\n"; + +print STDERR "And Tubular Bells...\n"; +print "static unsigned char string_table[] = {\n"; +$i = 256; +while ($i < $pstr) { + $w = $strval{$i}; + print length $w, ","; + foreach $x (unpack("C256", $w)) { + print " $x,"; + } + print "\n"; + $i += 1 + length $w; +} +print "};\n"; + +print STDERR "Done.\n"; diff --git a/libucw/charset/misc/gen-ligatures b/libucw/charset/misc/gen-ligatures new file mode 100755 index 0000000..17469ea --- /dev/null +++ b/libucw/charset/misc/gen-ligatures @@ -0,0 +1,73 @@ +#!/usr/bin/perl +# +# Generate Expansion Table of Compatibility Ligatures +# (c) 2003 Martin Mares +# + +use strict; +use warnings; + +print STDERR "Reading ligature list\n"; +open(L, "misc/u-ligatures") || die "lig file open"; +my %ligs = (); +while () { + chomp; + $ligs{$_} = 1; +} +close L; + +print STDERR "Reading decompositions\n"; +open(I, "unidata/UnicodeData.txt") || die "Unable to open UniCode data file"; +my %decs = (); +while () { + chomp; + (/^$/ || /^#/) && next; + my ($code,$name,$cat,$comb,$bidir,$decomp,$d0,$d1,$n0,$mirr,$cmt1,$cmt2,$upper,$lower,$title) = split /;/; + $code =~ /^....$/ || next; + if (my ($d) = ($decomp =~ /^ (.*)/)) { + $decs{$code} = $d; + } +} +close I; + +sub expand($) { + my ($c) = @_; + if (defined $decs{$c}) { + return join (" ", map { expand($_) } split(/\s+/, $decs{$c})); + } else { + return $c; + } +} + +print STDERR "Searching for a perfect hash function\n"; +my $n = keys %ligs; +my $div = $n-1; +DIV: while (++$div) { + #print STDERR "Trying $div... "; + my @c = (); + foreach my $l (keys %ligs) { + my $i = (hex $l) % $div; + if (defined $c[$i]) { + #print STDERR "collision\n"; + next DIV; + } + $c[$i] = 1; + } + #print STDERR "FOUND\n"; + last; +} + +print STDERR "Filling hash table with $div entries for $n ligatures\n"; +my @ht = map { "NULL" } 1..$div; +foreach my $l (keys %ligs) { + my $i = (hex $l) % $div; + my $w = join(", ", map { "0x$_" } split(/ /, expand($l))); + $ht[$i] = "/* $l */ (const u16 []) { $w, 0 }"; +} + +print "#define LIG_HASH_SIZE $div\n\n"; +print "static const u16 *_U_lig_hash[] = {\n"; +for (my $i=0; $i<$div; $i++) { + print "\t", $ht[$i], ",\n"; +} +print "};\n"; diff --git a/libucw/charset/misc/gen-unacc b/libucw/charset/misc/gen-unacc new file mode 100755 index 0000000..9513fa2 --- /dev/null +++ b/libucw/charset/misc/gen-unacc @@ -0,0 +1,35 @@ +#!/usr/bin/perl +# +# Create Unicode Unaccenting Table +# (c) 1997 Martin Mares +# + +open (UNI, "unidata/UnicodeData.txt") || die "No Unicode Data File"; +while () { + chomp; + ($num,$name,$cat,$_,$_,$exp) = split /;/; + if ($cat =~ /^L[ul]$/) { $letter{$num} = 1; } + if ($cat =~ /^Mn$/) { $accent{$num} = 1; } +} +close UNI; + +open (UNI, "unidata/UnicodeData.txt") || die "No Unicode Data File"; +while () { + chomp; + ($num,$name,$cat,$_,$_,$exp) = split /;/; + $num =~ /^....$/ || next; + if ($exp ne "") { + $exp =~ s/^<.*> *//g; + $good = 1; + $e = ""; + foreach $a (split(/\s+/, $exp)) { + if ($accent{$a}) { } + elsif ($letter{$a}) { + if ($e ne "") { $good = 0; } + else { $e = $a; } + } else { $good = 0; } + } + if ($good && $e ne "") { print "$num\t0x$e\n"; } + } +} +close UNI; diff --git a/libucw/charset/misc/generate b/libucw/charset/misc/generate new file mode 100644 index 0000000..1d91b76 --- /dev/null +++ b/libucw/charset/misc/generate @@ -0,0 +1,14 @@ +#!/bin/sh +# Generate all data files for the charset conversion library +# (c) 2001--2003 Martin Mares + +set -ex +rm -f U-*.h chartable.h misc/u-* +misc/gen-basic +misc/table2h _U_cat byte U-cat.h +misc/table2h _U_lower u16 U-lower.h +misc/table2h _U_upper u16 U-upper.h +( cat misc/user_unacc && misc/gen-unacc ) >misc/u-unacc +misc/table2h _U_unaccent u16 U-unacc.h +misc/gen-ligatures >U-ligatures.h +misc/gen-charconv chartable.h diff --git a/libucw/charset/misc/import-recode b/libucw/charset/misc/import-recode new file mode 100755 index 0000000..07b5a85 --- /dev/null +++ b/libucw/charset/misc/import-recode @@ -0,0 +1,33 @@ +#!/usr/bin/perl +# +# Use `recode` to create a translation table +# (c) 2003, Robert Spalek +# + +use open IN => ":utf8"; + +foreach $charset (@ARGV) +{ + print "Charset: $charset\n"; + open(fi, "recode -s -f $charset/..utf-8/ tmp/$charset") || die; + + while () + { + chop; + (($number, $char) = /^([0-9A-F]{2})\t(.?)$/) || die "Cannot parse $_"; + $recode[hex $number] = $char ne "" ? ord $char : -1; + } + $#recode >= 0 || die "Empty recoding table"; + $recode[10] = 10; + $recode[13] = 13; + + for ($i=0; $i<=$#recode; $i++) + { + printf fo "%02X\t%04X\n", $i, $recode[$i] + if $recode[$i] >= 0; + } + + close(fo); + close(fi); +} diff --git a/libucw/charset/misc/import-unicode_org b/libucw/charset/misc/import-unicode_org new file mode 100755 index 0000000..6c61034 --- /dev/null +++ b/libucw/charset/misc/import-unicode_org @@ -0,0 +1,28 @@ +#!/usr/bin/perl +# Import charset tables from ftp.unicode.org +# (c) 2003, Robert Spalek + +foreach $file (@ARGV) +{ + print "Converting $file\n"; + ($prefix, $filename) = $file =~ m|^(.*/)([^/]*)$|; + + open(fi, "<$file") || die; + open(fo, ">$file-tr") || die; + print fo "# $filename charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/$file +# (c) 2003, Robert Spalek + +"; + while () + { + next if /^#/; + chop; + if (($code, $unicode, $comment) = /^0x(..)\t0x(....)\t#\t?(.*)$/) + { + print fo "$code\t$unicode\t$comment\n"; + } + } + close(fo); + close(fi); +} diff --git a/libucw/charset/misc/mktab256 b/libucw/charset/misc/mktab256 new file mode 100755 index 0000000..8cfc6e9 --- /dev/null +++ b/libucw/charset/misc/mktab256 @@ -0,0 +1,15 @@ +#!/usr/bin/perl +# +# Simply create a table of all 256 characters +# (c) 2003, Robert Spalek +# + +use open OUT => ":raw"; + +open(fo, '>tmp/tab256') || die; +for ($i=0; $i<256; $i++) +{ + next if $i==10 || $i==13; + printf fo "%02X\t%c\n", $i, $i; +} +close(fo); diff --git a/libucw/charset/misc/table2h b/libucw/charset/misc/table2h new file mode 100755 index 0000000..92a35d5 --- /dev/null +++ b/libucw/charset/misc/table2h @@ -0,0 +1,47 @@ +#!/usr/bin/perl +# +# Generate C Language Table for UniCode Data +# (c) 1997 Martin Mares +# + +$name=$ARGV[0]; +$type=$ARGV[1]; + +while () { + chomp; + /^#/ && next; + /^\s*$/ && next; + s/^0x//; + ($i,$j) = split/\s+/; + ($i =~ /^(..)(..)$/) || die "Syntax error at $i"; + $table{$1} = "$name" . "_$1"; + die if defined $val{$i}; + $val{$i} = $j; +} + +print "/* Generated automatically by gentab. Please don't edit. */\n\n"; + +for($i=0; $i<256; $i++) { + $x = sprintf("%02X", $i); + if (defined($table{$x})) { + print "static const $type $table{$x}\[256\] = \{\n"; + for($j=0; $j<256; $j++) { + $y = $x . sprintf("%02X", $j); + if ($val{$y}) { print $val{$y}; } + else { print "0"; } + if ($j != 255) { print ","; } + if ($j % 16 == 15) { print "\n"; } + } + print "\};\n\n"; + } +} + +print "const $type \*$name\[256\] = \{\n"; +for($j=0; $j<256; $j++) { + $y = sprintf("%02X", $j); + if (defined $table{$y}) { print $table{$y}; } + else { print "NULL"; } + if ($j != 255) { print ","; } + if ($j % 16 == 15) { print "\n"; } +} +print "\};\n"; diff --git a/libucw/charset/misc/user_expand b/libucw/charset/misc/user_expand new file mode 100644 index 0000000..c48aa8a --- /dev/null +++ b/libucw/charset/misc/user_expand @@ -0,0 +1,8 @@ +# User-defined character expansions + +# Fraction slash +0x2044 0x002f + +# `ae' and `AE' +0x00c6 0x0041 0x0045 +0x00e6 0x0061 0x0065 diff --git a/libucw/charset/misc/user_unacc b/libucw/charset/misc/user_unacc new file mode 100644 index 0000000..e08f4d1 --- /dev/null +++ b/libucw/charset/misc/user_unacc @@ -0,0 +1,22 @@ +# User-defined unaccenting rules + +# Stroked letters +0x00d8 0x004f +0x00f8 0x006f +0x0110 0x0044 +0x0111 0x0064 +0x0126 0x0048 +0x0127 0x0068 +0x0141 0x004c +0x0142 0x006c +0x0166 0x0054 +0x0167 0x0074 +0x0180 0x0042 +0x0197 0x0049 +0x01b5 0x005a +0x01b6 0x007a +0x01e4 0x0047 +0x01e5 0x0067 +0x01fe 0x004f +0x01ff 0x006f +0x0268 0x0069 diff --git a/libucw/charset/mp-charconv.c b/libucw/charset/mp-charconv.c new file mode 100644 index 0000000..48b08ad --- /dev/null +++ b/libucw/charset/mp-charconv.c @@ -0,0 +1,50 @@ +/* + * UCW Library -- Character Conversion with Allocation on a Memory Pool + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include + +byte * +mp_strconv(struct mempool *mp, const byte *s, uint in_cs, uint out_cs) +{ + if (in_cs == out_cs) + return mp_strdup(mp, s); + + struct conv_context c; + char *b[32]; + uint bs[32], n = 0, sum = 0; + uint l = strlen(s) + 1; + + conv_init(&c); + conv_set_charset(&c, in_cs, out_cs); + c.source = s; + c.source_end = s + l; + + for (;;) + { + l <<= 1; + c.dest_start = c.dest = b[n] = alloca(l); + c.dest_end = c.dest_start+ l; + uint r = conv_run(&c); + sum += bs[n++] = c.dest - c.dest_start; + if (r & CONV_SOURCE_END) + { + c.dest_start = c.dest = mp_alloc(mp, sum); + for (uint i = 0; i < n; i++) + { + memcpy(c.dest, b[i], bs[i]); + c.dest += bs[i]; + } + return c.dest_start; + } + } +} + diff --git a/libucw/charset/mp-charconv.h b/libucw/charset/mp-charconv.h new file mode 100644 index 0000000..d31f271 --- /dev/null +++ b/libucw/charset/mp-charconv.h @@ -0,0 +1,28 @@ +/* + * UCW Library -- Character Conversion with Allocation on a Memory Pool + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _CHARSET_MP_CHARCONV_H +#define _CHARSET_MP_CHARCONV_H + +#include +#include + +#ifdef CONFIG_UCW_CLEAN_ABI +#define mp_strconv ucw_mp_strconv +#endif + +byte *mp_strconv(struct mempool *mp, const byte *s, uint cs_in, uint cs_out); + +static inline byte *mp_strconv_to_utf8(struct mempool *mp, const byte *s, uint cs_in) +{ return mp_strconv(mp, s, cs_in, CONV_CHARSET_UTF8); } + +static inline byte *mp_strconv_from_utf8(struct mempool *mp, const byte *s, uint cs_out) +{ return mp_strconv(mp, s, CONV_CHARSET_UTF8, cs_out); } + +#endif diff --git a/libucw/charset/set/8859-1 b/libucw/charset/set/8859-1 new file mode 100644 index 0000000..2a1642e --- /dev/null +++ b/libucw/charset/set/8859-1 @@ -0,0 +1,260 @@ +# 8859-1.TXT charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT +# (c) 2003, Robert Spalek + +00 0000 NULL +01 0001 START OF HEADING +02 0002 START OF TEXT +03 0003 END OF TEXT +04 0004 END OF TRANSMISSION +05 0005 ENQUIRY +06 0006 ACKNOWLEDGE +07 0007 BELL +08 0008 BACKSPACE +09 0009 HORIZONTAL TABULATION +0A 000A LINE FEED +0B 000B VERTICAL TABULATION +0C 000C FORM FEED +0D 000D CARRIAGE RETURN +0E 000E SHIFT OUT +0F 000F SHIFT IN +10 0010 DATA LINK ESCAPE +11 0011 DEVICE CONTROL ONE +12 0012 DEVICE CONTROL TWO +13 0013 DEVICE CONTROL THREE +14 0014 DEVICE CONTROL FOUR +15 0015 NEGATIVE ACKNOWLEDGE +16 0016 SYNCHRONOUS IDLE +17 0017 END OF TRANSMISSION BLOCK +18 0018 CANCEL +19 0019 END OF MEDIUM +1A 001A SUBSTITUTE +1B 001B ESCAPE +1C 001C FILE SEPARATOR +1D 001D GROUP SEPARATOR +1E 001E RECORD SEPARATOR +1F 001F UNIT SEPARATOR +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F DELETE +80 0080 +81 0081 +82 0082 +83 0083 +84 0084 +85 0085 +86 0086 +87 0087 +88 0088 +89 0089 +8A 008A +8B 008B +8C 008C +8D 008D +8E 008E +8F 008F +90 0090 +91 0091 +92 0092 +93 0093 +94 0094 +95 0095 +96 0096 +97 0097 +98 0098 +99 0099 +9A 009A +9B 009B +9C 009C +9D 009D +9E 009E +9F 009F +A0 00A0 NO-BREAK SPACE +A1 00A1 INVERTED EXCLAMATION MARK +A2 00A2 CENT SIGN +A3 00A3 POUND SIGN +A4 00A4 CURRENCY SIGN +A5 00A5 YEN SIGN +A6 00A6 BROKEN BAR +A7 00A7 SECTION SIGN +A8 00A8 DIAERESIS +A9 00A9 COPYRIGHT SIGN +AA 00AA FEMININE ORDINAL INDICATOR +AB 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +AC 00AC NOT SIGN +AD 00AD SOFT HYPHEN +AE 00AE REGISTERED SIGN +AF 00AF MACRON +B0 00B0 DEGREE SIGN +B1 00B1 PLUS-MINUS SIGN +B2 00B2 SUPERSCRIPT TWO +B3 00B3 SUPERSCRIPT THREE +B4 00B4 ACUTE ACCENT +B5 00B5 MICRO SIGN +B6 00B6 PILCROW SIGN +B7 00B7 MIDDLE DOT +B8 00B8 CEDILLA +B9 00B9 SUPERSCRIPT ONE +BA 00BA MASCULINE ORDINAL INDICATOR +BB 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +BC 00BC VULGAR FRACTION ONE QUARTER +BD 00BD VULGAR FRACTION ONE HALF +BE 00BE VULGAR FRACTION THREE QUARTERS +BF 00BF INVERTED QUESTION MARK +C0 00C0 LATIN CAPITAL LETTER A WITH GRAVE +C1 00C1 LATIN CAPITAL LETTER A WITH ACUTE +C2 00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX +C3 00C3 LATIN CAPITAL LETTER A WITH TILDE +C4 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +C5 00C5 LATIN CAPITAL LETTER A WITH RING ABOVE +C6 00C6 LATIN CAPITAL LETTER AE +C7 00C7 LATIN CAPITAL LETTER C WITH CEDILLA +C8 00C8 LATIN CAPITAL LETTER E WITH GRAVE +C9 00C9 LATIN CAPITAL LETTER E WITH ACUTE +CA 00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX +CB 00CB LATIN CAPITAL LETTER E WITH DIAERESIS +CC 00CC LATIN CAPITAL LETTER I WITH GRAVE +CD 00CD LATIN CAPITAL LETTER I WITH ACUTE +CE 00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX +CF 00CF LATIN CAPITAL LETTER I WITH DIAERESIS +D0 00D0 LATIN CAPITAL LETTER ETH (Icelandic) +D1 00D1 LATIN CAPITAL LETTER N WITH TILDE +D2 00D2 LATIN CAPITAL LETTER O WITH GRAVE +D3 00D3 LATIN CAPITAL LETTER O WITH ACUTE +D4 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX +D5 00D5 LATIN CAPITAL LETTER O WITH TILDE +D6 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +D7 00D7 MULTIPLICATION SIGN +D8 00D8 LATIN CAPITAL LETTER O WITH STROKE +D9 00D9 LATIN CAPITAL LETTER U WITH GRAVE +DA 00DA LATIN CAPITAL LETTER U WITH ACUTE +DB 00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX +DC 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +DD 00DD LATIN CAPITAL LETTER Y WITH ACUTE +DE 00DE LATIN CAPITAL LETTER THORN (Icelandic) +DF 00DF LATIN SMALL LETTER SHARP S (German) +E0 00E0 LATIN SMALL LETTER A WITH GRAVE +E1 00E1 LATIN SMALL LETTER A WITH ACUTE +E2 00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX +E3 00E3 LATIN SMALL LETTER A WITH TILDE +E4 00E4 LATIN SMALL LETTER A WITH DIAERESIS +E5 00E5 LATIN SMALL LETTER A WITH RING ABOVE +E6 00E6 LATIN SMALL LETTER AE +E7 00E7 LATIN SMALL LETTER C WITH CEDILLA +E8 00E8 LATIN SMALL LETTER E WITH GRAVE +E9 00E9 LATIN SMALL LETTER E WITH ACUTE +EA 00EA LATIN SMALL LETTER E WITH CIRCUMFLEX +EB 00EB LATIN SMALL LETTER E WITH DIAERESIS +EC 00EC LATIN SMALL LETTER I WITH GRAVE +ED 00ED LATIN SMALL LETTER I WITH ACUTE +EE 00EE LATIN SMALL LETTER I WITH CIRCUMFLEX +EF 00EF LATIN SMALL LETTER I WITH DIAERESIS +F0 00F0 LATIN SMALL LETTER ETH (Icelandic) +F1 00F1 LATIN SMALL LETTER N WITH TILDE +F2 00F2 LATIN SMALL LETTER O WITH GRAVE +F3 00F3 LATIN SMALL LETTER O WITH ACUTE +F4 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +F5 00F5 LATIN SMALL LETTER O WITH TILDE +F6 00F6 LATIN SMALL LETTER O WITH DIAERESIS +F7 00F7 DIVISION SIGN +F8 00F8 LATIN SMALL LETTER O WITH STROKE +F9 00F9 LATIN SMALL LETTER U WITH GRAVE +FA 00FA LATIN SMALL LETTER U WITH ACUTE +FB 00FB LATIN SMALL LETTER U WITH CIRCUMFLEX +FC 00FC LATIN SMALL LETTER U WITH DIAERESIS +FD 00FD LATIN SMALL LETTER Y WITH ACUTE +FE 00FE LATIN SMALL LETTER THORN (Icelandic) +FF 00FF LATIN SMALL LETTER Y WITH DIAERESIS diff --git a/libucw/charset/set/8859-10 b/libucw/charset/set/8859-10 new file mode 100644 index 0000000..131cdf1 --- /dev/null +++ b/libucw/charset/set/8859-10 @@ -0,0 +1,260 @@ +# 8859-10.TXT charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-10.TXT +# (c) 2003, Robert Spalek + +00 0000 NULL +01 0001 START OF HEADING +02 0002 START OF TEXT +03 0003 END OF TEXT +04 0004 END OF TRANSMISSION +05 0005 ENQUIRY +06 0006 ACKNOWLEDGE +07 0007 BELL +08 0008 BACKSPACE +09 0009 HORIZONTAL TABULATION +0A 000A LINE FEED +0B 000B VERTICAL TABULATION +0C 000C FORM FEED +0D 000D CARRIAGE RETURN +0E 000E SHIFT OUT +0F 000F SHIFT IN +10 0010 DATA LINK ESCAPE +11 0011 DEVICE CONTROL ONE +12 0012 DEVICE CONTROL TWO +13 0013 DEVICE CONTROL THREE +14 0014 DEVICE CONTROL FOUR +15 0015 NEGATIVE ACKNOWLEDGE +16 0016 SYNCHRONOUS IDLE +17 0017 END OF TRANSMISSION BLOCK +18 0018 CANCEL +19 0019 END OF MEDIUM +1A 001A SUBSTITUTE +1B 001B ESCAPE +1C 001C FILE SEPARATOR +1D 001D GROUP SEPARATOR +1E 001E RECORD SEPARATOR +1F 001F UNIT SEPARATOR +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F DELETE +80 0080 +81 0081 +82 0082 +83 0083 +84 0084 +85 0085 +86 0086 +87 0087 +88 0088 +89 0089 +8A 008A +8B 008B +8C 008C +8D 008D +8E 008E +8F 008F +90 0090 +91 0091 +92 0092 +93 0093 +94 0094 +95 0095 +96 0096 +97 0097 +98 0098 +99 0099 +9A 009A +9B 009B +9C 009C +9D 009D +9E 009E +9F 009F +A0 00A0 NO-BREAK SPACE +A1 0104 LATIN CAPITAL LETTER A WITH OGONEK +A2 0112 LATIN CAPITAL LETTER E WITH MACRON +A3 0122 LATIN CAPITAL LETTER G WITH CEDILLA +A4 012A LATIN CAPITAL LETTER I WITH MACRON +A5 0128 LATIN CAPITAL LETTER I WITH TILDE +A6 0136 LATIN CAPITAL LETTER K WITH CEDILLA +A7 00A7 SECTION SIGN +A8 013B LATIN CAPITAL LETTER L WITH CEDILLA +A9 0110 LATIN CAPITAL LETTER D WITH STROKE +AA 0160 LATIN CAPITAL LETTER S WITH CARON +AB 0166 LATIN CAPITAL LETTER T WITH STROKE +AC 017D LATIN CAPITAL LETTER Z WITH CARON +AD 00AD SOFT HYPHEN +AE 016A LATIN CAPITAL LETTER U WITH MACRON +AF 014A LATIN CAPITAL LETTER ENG +B0 00B0 DEGREE SIGN +B1 0105 LATIN SMALL LETTER A WITH OGONEK +B2 0113 LATIN SMALL LETTER E WITH MACRON +B3 0123 LATIN SMALL LETTER G WITH CEDILLA +B4 012B LATIN SMALL LETTER I WITH MACRON +B5 0129 LATIN SMALL LETTER I WITH TILDE +B6 0137 LATIN SMALL LETTER K WITH CEDILLA +B7 00B7 MIDDLE DOT +B8 013C LATIN SMALL LETTER L WITH CEDILLA +B9 0111 LATIN SMALL LETTER D WITH STROKE +BA 0161 LATIN SMALL LETTER S WITH CARON +BB 0167 LATIN SMALL LETTER T WITH STROKE +BC 017E LATIN SMALL LETTER Z WITH CARON +BD 2015 HORIZONTAL BAR +BE 016B LATIN SMALL LETTER U WITH MACRON +BF 014B LATIN SMALL LETTER ENG +C0 0100 LATIN CAPITAL LETTER A WITH MACRON +C1 00C1 LATIN CAPITAL LETTER A WITH ACUTE +C2 00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX +C3 00C3 LATIN CAPITAL LETTER A WITH TILDE +C4 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +C5 00C5 LATIN CAPITAL LETTER A WITH RING ABOVE +C6 00C6 LATIN CAPITAL LETTER AE +C7 012E LATIN CAPITAL LETTER I WITH OGONEK +C8 010C LATIN CAPITAL LETTER C WITH CARON +C9 00C9 LATIN CAPITAL LETTER E WITH ACUTE +CA 0118 LATIN CAPITAL LETTER E WITH OGONEK +CB 00CB LATIN CAPITAL LETTER E WITH DIAERESIS +CC 0116 LATIN CAPITAL LETTER E WITH DOT ABOVE +CD 00CD LATIN CAPITAL LETTER I WITH ACUTE +CE 00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX +CF 00CF LATIN CAPITAL LETTER I WITH DIAERESIS +D0 00D0 LATIN CAPITAL LETTER ETH (Icelandic) +D1 0145 LATIN CAPITAL LETTER N WITH CEDILLA +D2 014C LATIN CAPITAL LETTER O WITH MACRON +D3 00D3 LATIN CAPITAL LETTER O WITH ACUTE +D4 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX +D5 00D5 LATIN CAPITAL LETTER O WITH TILDE +D6 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +D7 0168 LATIN CAPITAL LETTER U WITH TILDE +D8 00D8 LATIN CAPITAL LETTER O WITH STROKE +D9 0172 LATIN CAPITAL LETTER U WITH OGONEK +DA 00DA LATIN CAPITAL LETTER U WITH ACUTE +DB 00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX +DC 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +DD 00DD LATIN CAPITAL LETTER Y WITH ACUTE +DE 00DE LATIN CAPITAL LETTER THORN (Icelandic) +DF 00DF LATIN SMALL LETTER SHARP S (German) +E0 0101 LATIN SMALL LETTER A WITH MACRON +E1 00E1 LATIN SMALL LETTER A WITH ACUTE +E2 00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX +E3 00E3 LATIN SMALL LETTER A WITH TILDE +E4 00E4 LATIN SMALL LETTER A WITH DIAERESIS +E5 00E5 LATIN SMALL LETTER A WITH RING ABOVE +E6 00E6 LATIN SMALL LETTER AE +E7 012F LATIN SMALL LETTER I WITH OGONEK +E8 010D LATIN SMALL LETTER C WITH CARON +E9 00E9 LATIN SMALL LETTER E WITH ACUTE +EA 0119 LATIN SMALL LETTER E WITH OGONEK +EB 00EB LATIN SMALL LETTER E WITH DIAERESIS +EC 0117 LATIN SMALL LETTER E WITH DOT ABOVE +ED 00ED LATIN SMALL LETTER I WITH ACUTE +EE 00EE LATIN SMALL LETTER I WITH CIRCUMFLEX +EF 00EF LATIN SMALL LETTER I WITH DIAERESIS +F0 00F0 LATIN SMALL LETTER ETH (Icelandic) +F1 0146 LATIN SMALL LETTER N WITH CEDILLA +F2 014D LATIN SMALL LETTER O WITH MACRON +F3 00F3 LATIN SMALL LETTER O WITH ACUTE +F4 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +F5 00F5 LATIN SMALL LETTER O WITH TILDE +F6 00F6 LATIN SMALL LETTER O WITH DIAERESIS +F7 0169 LATIN SMALL LETTER U WITH TILDE +F8 00F8 LATIN SMALL LETTER O WITH STROKE +F9 0173 LATIN SMALL LETTER U WITH OGONEK +FA 00FA LATIN SMALL LETTER U WITH ACUTE +FB 00FB LATIN SMALL LETTER U WITH CIRCUMFLEX +FC 00FC LATIN SMALL LETTER U WITH DIAERESIS +FD 00FD LATIN SMALL LETTER Y WITH ACUTE +FE 00FE LATIN SMALL LETTER THORN (Icelandic) +FF 0138 LATIN SMALL LETTER KRA diff --git a/libucw/charset/set/8859-11 b/libucw/charset/set/8859-11 new file mode 100644 index 0000000..7ff1e7b --- /dev/null +++ b/libucw/charset/set/8859-11 @@ -0,0 +1,252 @@ +# 8859-11.TXT charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT +# (c) 2003, Robert Spalek + +00 0000 NULL +01 0001 START OF HEADING +02 0002 START OF TEXT +03 0003 END OF TEXT +04 0004 END OF TRANSMISSION +05 0005 ENQUIRY +06 0006 ACKNOWLEDGE +07 0007 BELL +08 0008 BACKSPACE +09 0009 HORIZONTAL TABULATION +0A 000A LINE FEED +0B 000B VERTICAL TABULATION +0C 000C FORM FEED +0D 000D CARRIAGE RETURN +0E 000E SHIFT OUT +0F 000F SHIFT IN +10 0010 DATA LINK ESCAPE +11 0011 DEVICE CONTROL ONE +12 0012 DEVICE CONTROL TWO +13 0013 DEVICE CONTROL THREE +14 0014 DEVICE CONTROL FOUR +15 0015 NEGATIVE ACKNOWLEDGE +16 0016 SYNCHRONOUS IDLE +17 0017 END OF TRANSMISSION BLOCK +18 0018 CANCEL +19 0019 END OF MEDIUM +1A 001A SUBSTITUTE +1B 001B ESCAPE +1C 001C FILE SEPARATOR +1D 001D GROUP SEPARATOR +1E 001E RECORD SEPARATOR +1F 001F UNIT SEPARATOR +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F DELETE +80 0080 +81 0081 +82 0082 +83 0083 +84 0084 +85 0085 +86 0086 +87 0087 +88 0088 +89 0089 +8A 008A +8B 008B +8C 008C +8D 008D +8E 008E +8F 008F +90 0090 +91 0091 +92 0092 +93 0093 +94 0094 +95 0095 +96 0096 +97 0097 +98 0098 +99 0099 +9A 009A +9B 009B +9C 009C +9D 009D +9E 009E +9F 009F +A0 00A0 NO-BREAK SPACE +A1 0E01 THAI CHARACTER KO KAI +A2 0E02 THAI CHARACTER KHO KHAI +A3 0E03 THAI CHARACTER KHO KHUAT +A4 0E04 THAI CHARACTER KHO KHWAI +A5 0E05 THAI CHARACTER KHO KHON +A6 0E06 THAI CHARACTER KHO RAKHANG +A7 0E07 THAI CHARACTER NGO NGU +A8 0E08 THAI CHARACTER CHO CHAN +A9 0E09 THAI CHARACTER CHO CHING +AA 0E0A THAI CHARACTER CHO CHANG +AB 0E0B THAI CHARACTER SO SO +AC 0E0C THAI CHARACTER CHO CHOE +AD 0E0D THAI CHARACTER YO YING +AE 0E0E THAI CHARACTER DO CHADA +AF 0E0F THAI CHARACTER TO PATAK +B0 0E10 THAI CHARACTER THO THAN +B1 0E11 THAI CHARACTER THO NANGMONTHO +B2 0E12 THAI CHARACTER THO PHUTHAO +B3 0E13 THAI CHARACTER NO NEN +B4 0E14 THAI CHARACTER DO DEK +B5 0E15 THAI CHARACTER TO TAO +B6 0E16 THAI CHARACTER THO THUNG +B7 0E17 THAI CHARACTER THO THAHAN +B8 0E18 THAI CHARACTER THO THONG +B9 0E19 THAI CHARACTER NO NU +BA 0E1A THAI CHARACTER BO BAIMAI +BB 0E1B THAI CHARACTER PO PLA +BC 0E1C THAI CHARACTER PHO PHUNG +BD 0E1D THAI CHARACTER FO FA +BE 0E1E THAI CHARACTER PHO PHAN +BF 0E1F THAI CHARACTER FO FAN +C0 0E20 THAI CHARACTER PHO SAMPHAO +C1 0E21 THAI CHARACTER MO MA +C2 0E22 THAI CHARACTER YO YAK +C3 0E23 THAI CHARACTER RO RUA +C4 0E24 THAI CHARACTER RU +C5 0E25 THAI CHARACTER LO LING +C6 0E26 THAI CHARACTER LU +C7 0E27 THAI CHARACTER WO WAEN +C8 0E28 THAI CHARACTER SO SALA +C9 0E29 THAI CHARACTER SO RUSI +CA 0E2A THAI CHARACTER SO SUA +CB 0E2B THAI CHARACTER HO HIP +CC 0E2C THAI CHARACTER LO CHULA +CD 0E2D THAI CHARACTER O ANG +CE 0E2E THAI CHARACTER HO NOKHUK +CF 0E2F THAI CHARACTER PAIYANNOI +D0 0E30 THAI CHARACTER SARA A +D1 0E31 THAI CHARACTER MAI HAN-AKAT +D2 0E32 THAI CHARACTER SARA AA +D3 0E33 THAI CHARACTER SARA AM +D4 0E34 THAI CHARACTER SARA I +D5 0E35 THAI CHARACTER SARA II +D6 0E36 THAI CHARACTER SARA UE +D7 0E37 THAI CHARACTER SARA UEE +D8 0E38 THAI CHARACTER SARA U +D9 0E39 THAI CHARACTER SARA UU +DA 0E3A THAI CHARACTER PHINTHU +DF 0E3F THAI CURRENCY SYMBOL BAHT +E0 0E40 THAI CHARACTER SARA E +E1 0E41 THAI CHARACTER SARA AE +E2 0E42 THAI CHARACTER SARA O +E3 0E43 THAI CHARACTER SARA AI MAIMUAN +E4 0E44 THAI CHARACTER SARA AI MAIMALAI +E5 0E45 THAI CHARACTER LAKKHANGYAO +E6 0E46 THAI CHARACTER MAIYAMOK +E7 0E47 THAI CHARACTER MAITAIKHU +E8 0E48 THAI CHARACTER MAI EK +E9 0E49 THAI CHARACTER MAI THO +EA 0E4A THAI CHARACTER MAI TRI +EB 0E4B THAI CHARACTER MAI CHATTAWA +EC 0E4C THAI CHARACTER THANTHAKHAT +ED 0E4D THAI CHARACTER NIKHAHIT +EE 0E4E THAI CHARACTER YAMAKKAN +EF 0E4F THAI CHARACTER FONGMAN +F0 0E50 THAI DIGIT ZERO +F1 0E51 THAI DIGIT ONE +F2 0E52 THAI DIGIT TWO +F3 0E53 THAI DIGIT THREE +F4 0E54 THAI DIGIT FOUR +F5 0E55 THAI DIGIT FIVE +F6 0E56 THAI DIGIT SIX +F7 0E57 THAI DIGIT SEVEN +F8 0E58 THAI DIGIT EIGHT +F9 0E59 THAI DIGIT NINE +FA 0E5A THAI CHARACTER ANGKHANKHU +FB 0E5B THAI CHARACTER KHOMUT diff --git a/libucw/charset/set/8859-13 b/libucw/charset/set/8859-13 new file mode 100644 index 0000000..b670022 --- /dev/null +++ b/libucw/charset/set/8859-13 @@ -0,0 +1,260 @@ +# 8859-13.TXT charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-13.TXT +# (c) 2003, Robert Spalek + +00 0000 NULL +01 0001 START OF HEADING +02 0002 START OF TEXT +03 0003 END OF TEXT +04 0004 END OF TRANSMISSION +05 0005 ENQUIRY +06 0006 ACKNOWLEDGE +07 0007 BELL +08 0008 BACKSPACE +09 0009 HORIZONTAL TABULATION +0A 000A LINE FEED +0B 000B VERTICAL TABULATION +0C 000C FORM FEED +0D 000D CARRIAGE RETURN +0E 000E SHIFT OUT +0F 000F SHIFT IN +10 0010 DATA LINK ESCAPE +11 0011 DEVICE CONTROL ONE +12 0012 DEVICE CONTROL TWO +13 0013 DEVICE CONTROL THREE +14 0014 DEVICE CONTROL FOUR +15 0015 NEGATIVE ACKNOWLEDGE +16 0016 SYNCHRONOUS IDLE +17 0017 END OF TRANSMISSION BLOCK +18 0018 CANCEL +19 0019 END OF MEDIUM +1A 001A SUBSTITUTE +1B 001B ESCAPE +1C 001C FILE SEPARATOR +1D 001D GROUP SEPARATOR +1E 001E RECORD SEPARATOR +1F 001F UNIT SEPARATOR +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F DELETE +80 0080 +81 0081 +82 0082 +83 0083 +84 0084 +85 0085 +86 0086 +87 0087 +88 0088 +89 0089 +8A 008A +8B 008B +8C 008C +8D 008D +8E 008E +8F 008F +90 0090 +91 0091 +92 0092 +93 0093 +94 0094 +95 0095 +96 0096 +97 0097 +98 0098 +99 0099 +9A 009A +9B 009B +9C 009C +9D 009D +9E 009E +9F 009F +A0 00A0 NO-BREAK SPACE +A1 201D RIGHT DOUBLE QUOTATION MARK +A2 00A2 CENT SIGN +A3 00A3 POUND SIGN +A4 00A4 CURRENCY SIGN +A5 201E DOUBLE LOW-9 QUOTATION MARK +A6 00A6 BROKEN BAR +A7 00A7 SECTION SIGN +A8 00D8 LATIN CAPITAL LETTER O WITH STROKE +A9 00A9 COPYRIGHT SIGN +AA 0156 LATIN CAPITAL LETTER R WITH CEDILLA +AB 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +AC 00AC NOT SIGN +AD 00AD SOFT HYPHEN +AE 00AE REGISTERED SIGN +AF 00C6 LATIN CAPITAL LETTER AE +B0 00B0 DEGREE SIGN +B1 00B1 PLUS-MINUS SIGN +B2 00B2 SUPERSCRIPT TWO +B3 00B3 SUPERSCRIPT THREE +B4 201C LEFT DOUBLE QUOTATION MARK +B5 00B5 MICRO SIGN +B6 00B6 PILCROW SIGN +B7 00B7 MIDDLE DOT +B8 00F8 LATIN SMALL LETTER O WITH STROKE +B9 00B9 SUPERSCRIPT ONE +BA 0157 LATIN SMALL LETTER R WITH CEDILLA +BB 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +BC 00BC VULGAR FRACTION ONE QUARTER +BD 00BD VULGAR FRACTION ONE HALF +BE 00BE VULGAR FRACTION THREE QUARTERS +BF 00E6 LATIN SMALL LETTER AE +C0 0104 LATIN CAPITAL LETTER A WITH OGONEK +C1 012E LATIN CAPITAL LETTER I WITH OGONEK +C2 0100 LATIN CAPITAL LETTER A WITH MACRON +C3 0106 LATIN CAPITAL LETTER C WITH ACUTE +C4 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +C5 00C5 LATIN CAPITAL LETTER A WITH RING ABOVE +C6 0118 LATIN CAPITAL LETTER E WITH OGONEK +C7 0112 LATIN CAPITAL LETTER E WITH MACRON +C8 010C LATIN CAPITAL LETTER C WITH CARON +C9 00C9 LATIN CAPITAL LETTER E WITH ACUTE +CA 0179 LATIN CAPITAL LETTER Z WITH ACUTE +CB 0116 LATIN CAPITAL LETTER E WITH DOT ABOVE +CC 0122 LATIN CAPITAL LETTER G WITH CEDILLA +CD 0136 LATIN CAPITAL LETTER K WITH CEDILLA +CE 012A LATIN CAPITAL LETTER I WITH MACRON +CF 013B LATIN CAPITAL LETTER L WITH CEDILLA +D0 0160 LATIN CAPITAL LETTER S WITH CARON +D1 0143 LATIN CAPITAL LETTER N WITH ACUTE +D2 0145 LATIN CAPITAL LETTER N WITH CEDILLA +D3 00D3 LATIN CAPITAL LETTER O WITH ACUTE +D4 014C LATIN CAPITAL LETTER O WITH MACRON +D5 00D5 LATIN CAPITAL LETTER O WITH TILDE +D6 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +D7 00D7 MULTIPLICATION SIGN +D8 0172 LATIN CAPITAL LETTER U WITH OGONEK +D9 0141 LATIN CAPITAL LETTER L WITH STROKE +DA 015A LATIN CAPITAL LETTER S WITH ACUTE +DB 016A LATIN CAPITAL LETTER U WITH MACRON +DC 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +DD 017B LATIN CAPITAL LETTER Z WITH DOT ABOVE +DE 017D LATIN CAPITAL LETTER Z WITH CARON +DF 00DF LATIN SMALL LETTER SHARP S (German) +E0 0105 LATIN SMALL LETTER A WITH OGONEK +E1 012F LATIN SMALL LETTER I WITH OGONEK +E2 0101 LATIN SMALL LETTER A WITH MACRON +E3 0107 LATIN SMALL LETTER C WITH ACUTE +E4 00E4 LATIN SMALL LETTER A WITH DIAERESIS +E5 00E5 LATIN SMALL LETTER A WITH RING ABOVE +E6 0119 LATIN SMALL LETTER E WITH OGONEK +E7 0113 LATIN SMALL LETTER E WITH MACRON +E8 010D LATIN SMALL LETTER C WITH CARON +E9 00E9 LATIN SMALL LETTER E WITH ACUTE +EA 017A LATIN SMALL LETTER Z WITH ACUTE +EB 0117 LATIN SMALL LETTER E WITH DOT ABOVE +EC 0123 LATIN SMALL LETTER G WITH CEDILLA +ED 0137 LATIN SMALL LETTER K WITH CEDILLA +EE 012B LATIN SMALL LETTER I WITH MACRON +EF 013C LATIN SMALL LETTER L WITH CEDILLA +F0 0161 LATIN SMALL LETTER S WITH CARON +F1 0144 LATIN SMALL LETTER N WITH ACUTE +F2 0146 LATIN SMALL LETTER N WITH CEDILLA +F3 00F3 LATIN SMALL LETTER O WITH ACUTE +F4 014D LATIN SMALL LETTER O WITH MACRON +F5 00F5 LATIN SMALL LETTER O WITH TILDE +F6 00F6 LATIN SMALL LETTER O WITH DIAERESIS +F7 00F7 DIVISION SIGN +F8 0173 LATIN SMALL LETTER U WITH OGONEK +F9 0142 LATIN SMALL LETTER L WITH STROKE +FA 015B LATIN SMALL LETTER S WITH ACUTE +FB 016B LATIN SMALL LETTER U WITH MACRON +FC 00FC LATIN SMALL LETTER U WITH DIAERESIS +FD 017C LATIN SMALL LETTER Z WITH DOT ABOVE +FE 017E LATIN SMALL LETTER Z WITH CARON +FF 2019 RIGHT SINGLE QUOTATION MARK diff --git a/libucw/charset/set/8859-14 b/libucw/charset/set/8859-14 new file mode 100644 index 0000000..5efde8b --- /dev/null +++ b/libucw/charset/set/8859-14 @@ -0,0 +1,260 @@ +# 8859-14.TXT charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-14.TXT +# (c) 2003, Robert Spalek + +00 0000 NULL +01 0001 START OF HEADING +02 0002 START OF TEXT +03 0003 END OF TEXT +04 0004 END OF TRANSMISSION +05 0005 ENQUIRY +06 0006 ACKNOWLEDGE +07 0007 BELL +08 0008 BACKSPACE +09 0009 HORIZONTAL TABULATION +0A 000A LINE FEED +0B 000B VERTICAL TABULATION +0C 000C FORM FEED +0D 000D CARRIAGE RETURN +0E 000E SHIFT OUT +0F 000F SHIFT IN +10 0010 DATA LINK ESCAPE +11 0011 DEVICE CONTROL ONE +12 0012 DEVICE CONTROL TWO +13 0013 DEVICE CONTROL THREE +14 0014 DEVICE CONTROL FOUR +15 0015 NEGATIVE ACKNOWLEDGE +16 0016 SYNCHRONOUS IDLE +17 0017 END OF TRANSMISSION BLOCK +18 0018 CANCEL +19 0019 END OF MEDIUM +1A 001A SUBSTITUTE +1B 001B ESCAPE +1C 001C FILE SEPARATOR +1D 001D GROUP SEPARATOR +1E 001E RECORD SEPARATOR +1F 001F UNIT SEPARATOR +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F DELETE +80 0080 +81 0081 +82 0082 +83 0083 +84 0084 +85 0085 +86 0086 +87 0087 +88 0088 +89 0089 +8A 008A +8B 008B +8C 008C +8D 008D +8E 008E +8F 008F +90 0090 +91 0091 +92 0092 +93 0093 +94 0094 +95 0095 +96 0096 +97 0097 +98 0098 +99 0099 +9A 009A +9B 009B +9C 009C +9D 009D +9E 009E +9F 009F +A0 00A0 NO-BREAK SPACE +A1 1E02 LATIN CAPITAL LETTER B WITH DOT ABOVE +A2 1E03 LATIN SMALL LETTER B WITH DOT ABOVE +A3 00A3 POUND SIGN +A4 010A LATIN CAPITAL LETTER C WITH DOT ABOVE +A5 010B LATIN SMALL LETTER C WITH DOT ABOVE +A6 1E0A LATIN CAPITAL LETTER D WITH DOT ABOVE +A7 00A7 SECTION SIGN +A8 1E80 LATIN CAPITAL LETTER W WITH GRAVE +A9 00A9 COPYRIGHT SIGN +AA 1E82 LATIN CAPITAL LETTER W WITH ACUTE +AB 1E0B LATIN SMALL LETTER D WITH DOT ABOVE +AC 1EF2 LATIN CAPITAL LETTER Y WITH GRAVE +AD 00AD SOFT HYPHEN +AE 00AE REGISTERED SIGN +AF 0178 LATIN CAPITAL LETTER Y WITH DIAERESIS +B0 1E1E LATIN CAPITAL LETTER F WITH DOT ABOVE +B1 1E1F LATIN SMALL LETTER F WITH DOT ABOVE +B2 0120 LATIN CAPITAL LETTER G WITH DOT ABOVE +B3 0121 LATIN SMALL LETTER G WITH DOT ABOVE +B4 1E40 LATIN CAPITAL LETTER M WITH DOT ABOVE +B5 1E41 LATIN SMALL LETTER M WITH DOT ABOVE +B6 00B6 PILCROW SIGN +B7 1E56 LATIN CAPITAL LETTER P WITH DOT ABOVE +B8 1E81 LATIN SMALL LETTER W WITH GRAVE +B9 1E57 LATIN SMALL LETTER P WITH DOT ABOVE +BA 1E83 LATIN SMALL LETTER W WITH ACUTE +BB 1E60 LATIN CAPITAL LETTER S WITH DOT ABOVE +BC 1EF3 LATIN SMALL LETTER Y WITH GRAVE +BD 1E84 LATIN CAPITAL LETTER W WITH DIAERESIS +BE 1E85 LATIN SMALL LETTER W WITH DIAERESIS +BF 1E61 LATIN SMALL LETTER S WITH DOT ABOVE +C0 00C0 LATIN CAPITAL LETTER A WITH GRAVE +C1 00C1 LATIN CAPITAL LETTER A WITH ACUTE +C2 00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX +C3 00C3 LATIN CAPITAL LETTER A WITH TILDE +C4 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +C5 00C5 LATIN CAPITAL LETTER A WITH RING ABOVE +C6 00C6 LATIN CAPITAL LETTER AE +C7 00C7 LATIN CAPITAL LETTER C WITH CEDILLA +C8 00C8 LATIN CAPITAL LETTER E WITH GRAVE +C9 00C9 LATIN CAPITAL LETTER E WITH ACUTE +CA 00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX +CB 00CB LATIN CAPITAL LETTER E WITH DIAERESIS +CC 00CC LATIN CAPITAL LETTER I WITH GRAVE +CD 00CD LATIN CAPITAL LETTER I WITH ACUTE +CE 00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX +CF 00CF LATIN CAPITAL LETTER I WITH DIAERESIS +D0 0174 LATIN CAPITAL LETTER W WITH CIRCUMFLEX +D1 00D1 LATIN CAPITAL LETTER N WITH TILDE +D2 00D2 LATIN CAPITAL LETTER O WITH GRAVE +D3 00D3 LATIN CAPITAL LETTER O WITH ACUTE +D4 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX +D5 00D5 LATIN CAPITAL LETTER O WITH TILDE +D6 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +D7 1E6A LATIN CAPITAL LETTER T WITH DOT ABOVE +D8 00D8 LATIN CAPITAL LETTER O WITH STROKE +D9 00D9 LATIN CAPITAL LETTER U WITH GRAVE +DA 00DA LATIN CAPITAL LETTER U WITH ACUTE +DB 00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX +DC 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +DD 00DD LATIN CAPITAL LETTER Y WITH ACUTE +DE 0176 LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +DF 00DF LATIN SMALL LETTER SHARP S +E0 00E0 LATIN SMALL LETTER A WITH GRAVE +E1 00E1 LATIN SMALL LETTER A WITH ACUTE +E2 00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX +E3 00E3 LATIN SMALL LETTER A WITH TILDE +E4 00E4 LATIN SMALL LETTER A WITH DIAERESIS +E5 00E5 LATIN SMALL LETTER A WITH RING ABOVE +E6 00E6 LATIN SMALL LETTER AE +E7 00E7 LATIN SMALL LETTER C WITH CEDILLA +E8 00E8 LATIN SMALL LETTER E WITH GRAVE +E9 00E9 LATIN SMALL LETTER E WITH ACUTE +EA 00EA LATIN SMALL LETTER E WITH CIRCUMFLEX +EB 00EB LATIN SMALL LETTER E WITH DIAERESIS +EC 00EC LATIN SMALL LETTER I WITH GRAVE +ED 00ED LATIN SMALL LETTER I WITH ACUTE +EE 00EE LATIN SMALL LETTER I WITH CIRCUMFLEX +EF 00EF LATIN SMALL LETTER I WITH DIAERESIS +F0 0175 LATIN SMALL LETTER W WITH CIRCUMFLEX +F1 00F1 LATIN SMALL LETTER N WITH TILDE +F2 00F2 LATIN SMALL LETTER O WITH GRAVE +F3 00F3 LATIN SMALL LETTER O WITH ACUTE +F4 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +F5 00F5 LATIN SMALL LETTER O WITH TILDE +F6 00F6 LATIN SMALL LETTER O WITH DIAERESIS +F7 1E6B LATIN SMALL LETTER T WITH DOT ABOVE +F8 00F8 LATIN SMALL LETTER O WITH STROKE +F9 00F9 LATIN SMALL LETTER U WITH GRAVE +FA 00FA LATIN SMALL LETTER U WITH ACUTE +FB 00FB LATIN SMALL LETTER U WITH CIRCUMFLEX +FC 00FC LATIN SMALL LETTER U WITH DIAERESIS +FD 00FD LATIN SMALL LETTER Y WITH ACUTE +FE 0177 LATIN SMALL LETTER Y WITH CIRCUMFLEX +FF 00FF LATIN SMALL LETTER Y WITH DIAERESIS diff --git a/libucw/charset/set/8859-15 b/libucw/charset/set/8859-15 new file mode 100644 index 0000000..175e47d --- /dev/null +++ b/libucw/charset/set/8859-15 @@ -0,0 +1,260 @@ +# 8859-15.TXT charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT +# (c) 2003, Robert Spalek + +00 0000 NULL +01 0001 START OF HEADING +02 0002 START OF TEXT +03 0003 END OF TEXT +04 0004 END OF TRANSMISSION +05 0005 ENQUIRY +06 0006 ACKNOWLEDGE +07 0007 BELL +08 0008 BACKSPACE +09 0009 HORIZONTAL TABULATION +0A 000A LINE FEED +0B 000B VERTICAL TABULATION +0C 000C FORM FEED +0D 000D CARRIAGE RETURN +0E 000E SHIFT OUT +0F 000F SHIFT IN +10 0010 DATA LINK ESCAPE +11 0011 DEVICE CONTROL ONE +12 0012 DEVICE CONTROL TWO +13 0013 DEVICE CONTROL THREE +14 0014 DEVICE CONTROL FOUR +15 0015 NEGATIVE ACKNOWLEDGE +16 0016 SYNCHRONOUS IDLE +17 0017 END OF TRANSMISSION BLOCK +18 0018 CANCEL +19 0019 END OF MEDIUM +1A 001A SUBSTITUTE +1B 001B ESCAPE +1C 001C FILE SEPARATOR +1D 001D GROUP SEPARATOR +1E 001E RECORD SEPARATOR +1F 001F UNIT SEPARATOR +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F DELETE +80 0080 +81 0081 +82 0082 +83 0083 +84 0084 +85 0085 +86 0086 +87 0087 +88 0088 +89 0089 +8A 008A +8B 008B +8C 008C +8D 008D +8E 008E +8F 008F +90 0090 +91 0091 +92 0092 +93 0093 +94 0094 +95 0095 +96 0096 +97 0097 +98 0098 +99 0099 +9A 009A +9B 009B +9C 009C +9D 009D +9E 009E +9F 009F +A0 00A0 NO-BREAK SPACE +A1 00A1 INVERTED EXCLAMATION MARK +A2 00A2 CENT SIGN +A3 00A3 POUND SIGN +A4 20AC EURO SIGN +A5 00A5 YEN SIGN +A6 0160 LATIN CAPITAL LETTER S WITH CARON +A7 00A7 SECTION SIGN +A8 0161 LATIN SMALL LETTER S WITH CARON +A9 00A9 COPYRIGHT SIGN +AA 00AA FEMININE ORDINAL INDICATOR +AB 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +AC 00AC NOT SIGN +AD 00AD SOFT HYPHEN +AE 00AE REGISTERED SIGN +AF 00AF MACRON +B0 00B0 DEGREE SIGN +B1 00B1 PLUS-MINUS SIGN +B2 00B2 SUPERSCRIPT TWO +B3 00B3 SUPERSCRIPT THREE +B4 017D LATIN CAPITAL LETTER Z WITH CARON +B5 00B5 MICRO SIGN +B6 00B6 PILCROW SIGN +B7 00B7 MIDDLE DOT +B8 017E LATIN SMALL LETTER Z WITH CARON +B9 00B9 SUPERSCRIPT ONE +BA 00BA MASCULINE ORDINAL INDICATOR +BB 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +BC 0152 LATIN CAPITAL LIGATURE OE +BD 0153 LATIN SMALL LIGATURE OE +BE 0178 LATIN CAPITAL LETTER Y WITH DIAERESIS +BF 00BF INVERTED QUESTION MARK +C0 00C0 LATIN CAPITAL LETTER A WITH GRAVE +C1 00C1 LATIN CAPITAL LETTER A WITH ACUTE +C2 00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX +C3 00C3 LATIN CAPITAL LETTER A WITH TILDE +C4 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +C5 00C5 LATIN CAPITAL LETTER A WITH RING ABOVE +C6 00C6 LATIN CAPITAL LETTER AE +C7 00C7 LATIN CAPITAL LETTER C WITH CEDILLA +C8 00C8 LATIN CAPITAL LETTER E WITH GRAVE +C9 00C9 LATIN CAPITAL LETTER E WITH ACUTE +CA 00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX +CB 00CB LATIN CAPITAL LETTER E WITH DIAERESIS +CC 00CC LATIN CAPITAL LETTER I WITH GRAVE +CD 00CD LATIN CAPITAL LETTER I WITH ACUTE +CE 00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX +CF 00CF LATIN CAPITAL LETTER I WITH DIAERESIS +D0 00D0 LATIN CAPITAL LETTER ETH +D1 00D1 LATIN CAPITAL LETTER N WITH TILDE +D2 00D2 LATIN CAPITAL LETTER O WITH GRAVE +D3 00D3 LATIN CAPITAL LETTER O WITH ACUTE +D4 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX +D5 00D5 LATIN CAPITAL LETTER O WITH TILDE +D6 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +D7 00D7 MULTIPLICATION SIGN +D8 00D8 LATIN CAPITAL LETTER O WITH STROKE +D9 00D9 LATIN CAPITAL LETTER U WITH GRAVE +DA 00DA LATIN CAPITAL LETTER U WITH ACUTE +DB 00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX +DC 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +DD 00DD LATIN CAPITAL LETTER Y WITH ACUTE +DE 00DE LATIN CAPITAL LETTER THORN +DF 00DF LATIN SMALL LETTER SHARP S +E0 00E0 LATIN SMALL LETTER A WITH GRAVE +E1 00E1 LATIN SMALL LETTER A WITH ACUTE +E2 00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX +E3 00E3 LATIN SMALL LETTER A WITH TILDE +E4 00E4 LATIN SMALL LETTER A WITH DIAERESIS +E5 00E5 LATIN SMALL LETTER A WITH RING ABOVE +E6 00E6 LATIN SMALL LETTER AE +E7 00E7 LATIN SMALL LETTER C WITH CEDILLA +E8 00E8 LATIN SMALL LETTER E WITH GRAVE +E9 00E9 LATIN SMALL LETTER E WITH ACUTE +EA 00EA LATIN SMALL LETTER E WITH CIRCUMFLEX +EB 00EB LATIN SMALL LETTER E WITH DIAERESIS +EC 00EC LATIN SMALL LETTER I WITH GRAVE +ED 00ED LATIN SMALL LETTER I WITH ACUTE +EE 00EE LATIN SMALL LETTER I WITH CIRCUMFLEX +EF 00EF LATIN SMALL LETTER I WITH DIAERESIS +F0 00F0 LATIN SMALL LETTER ETH +F1 00F1 LATIN SMALL LETTER N WITH TILDE +F2 00F2 LATIN SMALL LETTER O WITH GRAVE +F3 00F3 LATIN SMALL LETTER O WITH ACUTE +F4 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +F5 00F5 LATIN SMALL LETTER O WITH TILDE +F6 00F6 LATIN SMALL LETTER O WITH DIAERESIS +F7 00F7 DIVISION SIGN +F8 00F8 LATIN SMALL LETTER O WITH STROKE +F9 00F9 LATIN SMALL LETTER U WITH GRAVE +FA 00FA LATIN SMALL LETTER U WITH ACUTE +FB 00FB LATIN SMALL LETTER U WITH CIRCUMFLEX +FC 00FC LATIN SMALL LETTER U WITH DIAERESIS +FD 00FD LATIN SMALL LETTER Y WITH ACUTE +FE 00FE LATIN SMALL LETTER THORN +FF 00FF LATIN SMALL LETTER Y WITH DIAERESIS diff --git a/libucw/charset/set/8859-16 b/libucw/charset/set/8859-16 new file mode 100644 index 0000000..c4127f7 --- /dev/null +++ b/libucw/charset/set/8859-16 @@ -0,0 +1,260 @@ +# 8859-16.TXT charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-16.TXT +# (c) 2003, Robert Spalek + +00 0000 NULL +01 0001 START OF HEADING +02 0002 START OF TEXT +03 0003 END OF TEXT +04 0004 END OF TRANSMISSION +05 0005 ENQUIRY +06 0006 ACKNOWLEDGE +07 0007 BELL +08 0008 BACKSPACE +09 0009 HORIZONTAL TABULATION +0A 000A LINE FEED +0B 000B VERTICAL TABULATION +0C 000C FORM FEED +0D 000D CARRIAGE RETURN +0E 000E SHIFT OUT +0F 000F SHIFT IN +10 0010 DATA LINK ESCAPE +11 0011 DEVICE CONTROL ONE +12 0012 DEVICE CONTROL TWO +13 0013 DEVICE CONTROL THREE +14 0014 DEVICE CONTROL FOUR +15 0015 NEGATIVE ACKNOWLEDGE +16 0016 SYNCHRONOUS IDLE +17 0017 END OF TRANSMISSION BLOCK +18 0018 CANCEL +19 0019 END OF MEDIUM +1A 001A SUBSTITUTE +1B 001B ESCAPE +1C 001C FILE SEPARATOR +1D 001D GROUP SEPARATOR +1E 001E RECORD SEPARATOR +1F 001F UNIT SEPARATOR +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F DELETE +80 0080 +81 0081 +82 0082 +83 0083 +84 0084 +85 0085 +86 0086 +87 0087 +88 0088 +89 0089 +8A 008A +8B 008B +8C 008C +8D 008D +8E 008E +8F 008F +90 0090 +91 0091 +92 0092 +93 0093 +94 0094 +95 0095 +96 0096 +97 0097 +98 0098 +99 0099 +9A 009A +9B 009B +9C 009C +9D 009D +9E 009E +9F 009F +A0 00A0 NO-BREAK SPACE +A1 0104 LATIN CAPITAL LETTER A WITH OGONEK +A2 0105 LATIN SMALL LETTER A WITH OGONEK +A3 0141 LATIN CAPITAL LETTER L WITH STROKE +A4 20AC EURO SIGN +A5 201E DOUBLE LOW-9 QUOTATION MARK +A6 0160 LATIN CAPITAL LETTER S WITH CARON +A7 00A7 SECTION SIGN +A8 0161 LATIN SMALL LETTER S WITH CARON +A9 00A9 COPYRIGHT SIGN +AA 0218 LATIN CAPITAL LETTER S WITH COMMA BELOW +AB 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +AC 0179 LATIN CAPITAL LETTER Z WITH ACUTE +AD 00AD SOFT HYPHEN +AE 017A LATIN SMALL LETTER Z WITH ACUTE +AF 017B LATIN CAPITAL LETTER Z WITH DOT ABOVE +B0 00B0 DEGREE SIGN +B1 00B1 PLUS-MINUS SIGN +B2 010C LATIN CAPITAL LETTER C WITH CARON +B3 0142 LATIN SMALL LETTER L WITH STROKE +B4 017D LATIN CAPITAL LETTER Z WITH CARON +B5 201D RIGHT DOUBLE QUOTATION MARK +B6 00B6 PILCROW SIGN +B7 00B7 MIDDLE DOT +B8 017E LATIN SMALL LETTER Z WITH CARON +B9 010D LATIN SMALL LETTER C WITH CARON +BA 0219 LATIN SMALL LETTER S WITH COMMA BELOW +BB 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +BC 0152 LATIN CAPITAL LIGATURE OE +BD 0153 LATIN SMALL LIGATURE OE +BE 0178 LATIN CAPITAL LETTER Y WITH DIAERESIS +BF 017C LATIN SMALL LETTER Z WITH DOT ABOVE +C0 00C0 LATIN CAPITAL LETTER A WITH GRAVE +C1 00C1 LATIN CAPITAL LETTER A WITH ACUTE +C2 00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX +C3 0102 LATIN CAPITAL LETTER A WITH BREVE +C4 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +C5 0106 LATIN CAPITAL LETTER C WITH ACUTE +C6 00C6 LATIN CAPITAL LETTER AE +C7 00C7 LATIN CAPITAL LETTER C WITH CEDILLA +C8 00C8 LATIN CAPITAL LETTER E WITH GRAVE +C9 00C9 LATIN CAPITAL LETTER E WITH ACUTE +CA 00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX +CB 00CB LATIN CAPITAL LETTER E WITH DIAERESIS +CC 00CC LATIN CAPITAL LETTER I WITH GRAVE +CD 00CD LATIN CAPITAL LETTER I WITH ACUTE +CE 00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX +CF 00CF LATIN CAPITAL LETTER I WITH DIAERESIS +D0 0110 LATIN CAPITAL LETTER D WITH STROKE +D1 0143 LATIN CAPITAL LETTER N WITH ACUTE +D2 00D2 LATIN CAPITAL LETTER O WITH GRAVE +D3 00D3 LATIN CAPITAL LETTER O WITH ACUTE +D4 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX +D5 0150 LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +D6 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +D7 015A LATIN CAPITAL LETTER S WITH ACUTE +D8 0170 LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +D9 00D9 LATIN CAPITAL LETTER U WITH GRAVE +DA 00DA LATIN CAPITAL LETTER U WITH ACUTE +DB 00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX +DC 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +DD 0118 LATIN CAPITAL LETTER E WITH OGONEK +DE 021A LATIN CAPITAL LETTER T WITH COMMA BELOW +DF 00DF LATIN SMALL LETTER SHARP S +E0 00E0 LATIN SMALL LETTER A WITH GRAVE +E1 00E1 LATIN SMALL LETTER A WITH ACUTE +E2 00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX +E3 0103 LATIN SMALL LETTER A WITH BREVE +E4 00E4 LATIN SMALL LETTER A WITH DIAERESIS +E5 0107 LATIN SMALL LETTER C WITH ACUTE +E6 00E6 LATIN SMALL LETTER AE +E7 00E7 LATIN SMALL LETTER C WITH CEDILLA +E8 00E8 LATIN SMALL LETTER E WITH GRAVE +E9 00E9 LATIN SMALL LETTER E WITH ACUTE +EA 00EA LATIN SMALL LETTER E WITH CIRCUMFLEX +EB 00EB LATIN SMALL LETTER E WITH DIAERESIS +EC 00EC LATIN SMALL LETTER I WITH GRAVE +ED 00ED LATIN SMALL LETTER I WITH ACUTE +EE 00EE LATIN SMALL LETTER I WITH CIRCUMFLEX +EF 00EF LATIN SMALL LETTER I WITH DIAERESIS +F0 0111 LATIN SMALL LETTER D WITH STROKE +F1 0144 LATIN SMALL LETTER N WITH ACUTE +F2 00F2 LATIN SMALL LETTER O WITH GRAVE +F3 00F3 LATIN SMALL LETTER O WITH ACUTE +F4 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +F5 0151 LATIN SMALL LETTER O WITH DOUBLE ACUTE +F6 00F6 LATIN SMALL LETTER O WITH DIAERESIS +F7 015B LATIN SMALL LETTER S WITH ACUTE +F8 0171 LATIN SMALL LETTER U WITH DOUBLE ACUTE +F9 00F9 LATIN SMALL LETTER U WITH GRAVE +FA 00FA LATIN SMALL LETTER U WITH ACUTE +FB 00FB LATIN SMALL LETTER U WITH CIRCUMFLEX +FC 00FC LATIN SMALL LETTER U WITH DIAERESIS +FD 0119 LATIN SMALL LETTER E WITH OGONEK +FE 021B LATIN SMALL LETTER T WITH COMMA BELOW +FF 00FF LATIN SMALL LETTER Y WITH DIAERESIS diff --git a/libucw/charset/set/8859-2 b/libucw/charset/set/8859-2 new file mode 100644 index 0000000..1d3c4dd --- /dev/null +++ b/libucw/charset/set/8859-2 @@ -0,0 +1,260 @@ +# 8859-2.TXT charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT +# (c) 2003, Robert Spalek + +00 0000 NULL +01 0001 START OF HEADING +02 0002 START OF TEXT +03 0003 END OF TEXT +04 0004 END OF TRANSMISSION +05 0005 ENQUIRY +06 0006 ACKNOWLEDGE +07 0007 BELL +08 0008 BACKSPACE +09 0009 HORIZONTAL TABULATION +0A 000A LINE FEED +0B 000B VERTICAL TABULATION +0C 000C FORM FEED +0D 000D CARRIAGE RETURN +0E 000E SHIFT OUT +0F 000F SHIFT IN +10 0010 DATA LINK ESCAPE +11 0011 DEVICE CONTROL ONE +12 0012 DEVICE CONTROL TWO +13 0013 DEVICE CONTROL THREE +14 0014 DEVICE CONTROL FOUR +15 0015 NEGATIVE ACKNOWLEDGE +16 0016 SYNCHRONOUS IDLE +17 0017 END OF TRANSMISSION BLOCK +18 0018 CANCEL +19 0019 END OF MEDIUM +1A 001A SUBSTITUTE +1B 001B ESCAPE +1C 001C FILE SEPARATOR +1D 001D GROUP SEPARATOR +1E 001E RECORD SEPARATOR +1F 001F UNIT SEPARATOR +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F DELETE +80 0080 +81 0081 +82 0082 +83 0083 +84 0084 +85 0085 +86 0086 +87 0087 +88 0088 +89 0089 +8A 008A +8B 008B +8C 008C +8D 008D +8E 008E +8F 008F +90 0090 +91 0091 +92 0092 +93 0093 +94 0094 +95 0095 +96 0096 +97 0097 +98 0098 +99 0099 +9A 009A +9B 009B +9C 009C +9D 009D +9E 009E +9F 009F +A0 00A0 NO-BREAK SPACE +A1 0104 LATIN CAPITAL LETTER A WITH OGONEK +A2 02D8 BREVE +A3 0141 LATIN CAPITAL LETTER L WITH STROKE +A4 00A4 CURRENCY SIGN +A5 013D LATIN CAPITAL LETTER L WITH CARON +A6 015A LATIN CAPITAL LETTER S WITH ACUTE +A7 00A7 SECTION SIGN +A8 00A8 DIAERESIS +A9 0160 LATIN CAPITAL LETTER S WITH CARON +AA 015E LATIN CAPITAL LETTER S WITH CEDILLA +AB 0164 LATIN CAPITAL LETTER T WITH CARON +AC 0179 LATIN CAPITAL LETTER Z WITH ACUTE +AD 00AD SOFT HYPHEN +AE 017D LATIN CAPITAL LETTER Z WITH CARON +AF 017B LATIN CAPITAL LETTER Z WITH DOT ABOVE +B0 00B0 DEGREE SIGN +B1 0105 LATIN SMALL LETTER A WITH OGONEK +B2 02DB OGONEK +B3 0142 LATIN SMALL LETTER L WITH STROKE +B4 00B4 ACUTE ACCENT +B5 013E LATIN SMALL LETTER L WITH CARON +B6 015B LATIN SMALL LETTER S WITH ACUTE +B7 02C7 CARON +B8 00B8 CEDILLA +B9 0161 LATIN SMALL LETTER S WITH CARON +BA 015F LATIN SMALL LETTER S WITH CEDILLA +BB 0165 LATIN SMALL LETTER T WITH CARON +BC 017A LATIN SMALL LETTER Z WITH ACUTE +BD 02DD DOUBLE ACUTE ACCENT +BE 017E LATIN SMALL LETTER Z WITH CARON +BF 017C LATIN SMALL LETTER Z WITH DOT ABOVE +C0 0154 LATIN CAPITAL LETTER R WITH ACUTE +C1 00C1 LATIN CAPITAL LETTER A WITH ACUTE +C2 00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX +C3 0102 LATIN CAPITAL LETTER A WITH BREVE +C4 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +C5 0139 LATIN CAPITAL LETTER L WITH ACUTE +C6 0106 LATIN CAPITAL LETTER C WITH ACUTE +C7 00C7 LATIN CAPITAL LETTER C WITH CEDILLA +C8 010C LATIN CAPITAL LETTER C WITH CARON +C9 00C9 LATIN CAPITAL LETTER E WITH ACUTE +CA 0118 LATIN CAPITAL LETTER E WITH OGONEK +CB 00CB LATIN CAPITAL LETTER E WITH DIAERESIS +CC 011A LATIN CAPITAL LETTER E WITH CARON +CD 00CD LATIN CAPITAL LETTER I WITH ACUTE +CE 00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX +CF 010E LATIN CAPITAL LETTER D WITH CARON +D0 0110 LATIN CAPITAL LETTER D WITH STROKE +D1 0143 LATIN CAPITAL LETTER N WITH ACUTE +D2 0147 LATIN CAPITAL LETTER N WITH CARON +D3 00D3 LATIN CAPITAL LETTER O WITH ACUTE +D4 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX +D5 0150 LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +D6 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +D7 00D7 MULTIPLICATION SIGN +D8 0158 LATIN CAPITAL LETTER R WITH CARON +D9 016E LATIN CAPITAL LETTER U WITH RING ABOVE +DA 00DA LATIN CAPITAL LETTER U WITH ACUTE +DB 0170 LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +DC 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +DD 00DD LATIN CAPITAL LETTER Y WITH ACUTE +DE 0162 LATIN CAPITAL LETTER T WITH CEDILLA +DF 00DF LATIN SMALL LETTER SHARP S +E0 0155 LATIN SMALL LETTER R WITH ACUTE +E1 00E1 LATIN SMALL LETTER A WITH ACUTE +E2 00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX +E3 0103 LATIN SMALL LETTER A WITH BREVE +E4 00E4 LATIN SMALL LETTER A WITH DIAERESIS +E5 013A LATIN SMALL LETTER L WITH ACUTE +E6 0107 LATIN SMALL LETTER C WITH ACUTE +E7 00E7 LATIN SMALL LETTER C WITH CEDILLA +E8 010D LATIN SMALL LETTER C WITH CARON +E9 00E9 LATIN SMALL LETTER E WITH ACUTE +EA 0119 LATIN SMALL LETTER E WITH OGONEK +EB 00EB LATIN SMALL LETTER E WITH DIAERESIS +EC 011B LATIN SMALL LETTER E WITH CARON +ED 00ED LATIN SMALL LETTER I WITH ACUTE +EE 00EE LATIN SMALL LETTER I WITH CIRCUMFLEX +EF 010F LATIN SMALL LETTER D WITH CARON +F0 0111 LATIN SMALL LETTER D WITH STROKE +F1 0144 LATIN SMALL LETTER N WITH ACUTE +F2 0148 LATIN SMALL LETTER N WITH CARON +F3 00F3 LATIN SMALL LETTER O WITH ACUTE +F4 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +F5 0151 LATIN SMALL LETTER O WITH DOUBLE ACUTE +F6 00F6 LATIN SMALL LETTER O WITH DIAERESIS +F7 00F7 DIVISION SIGN +F8 0159 LATIN SMALL LETTER R WITH CARON +F9 016F LATIN SMALL LETTER U WITH RING ABOVE +FA 00FA LATIN SMALL LETTER U WITH ACUTE +FB 0171 LATIN SMALL LETTER U WITH DOUBLE ACUTE +FC 00FC LATIN SMALL LETTER U WITH DIAERESIS +FD 00FD LATIN SMALL LETTER Y WITH ACUTE +FE 0163 LATIN SMALL LETTER T WITH CEDILLA +FF 02D9 DOT ABOVE diff --git a/libucw/charset/set/8859-3 b/libucw/charset/set/8859-3 new file mode 100644 index 0000000..d047987 --- /dev/null +++ b/libucw/charset/set/8859-3 @@ -0,0 +1,253 @@ +# 8859-3.TXT charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-3.TXT +# (c) 2003, Robert Spalek + +00 0000 NULL +01 0001 START OF HEADING +02 0002 START OF TEXT +03 0003 END OF TEXT +04 0004 END OF TRANSMISSION +05 0005 ENQUIRY +06 0006 ACKNOWLEDGE +07 0007 BELL +08 0008 BACKSPACE +09 0009 HORIZONTAL TABULATION +0A 000A LINE FEED +0B 000B VERTICAL TABULATION +0C 000C FORM FEED +0D 000D CARRIAGE RETURN +0E 000E SHIFT OUT +0F 000F SHIFT IN +10 0010 DATA LINK ESCAPE +11 0011 DEVICE CONTROL ONE +12 0012 DEVICE CONTROL TWO +13 0013 DEVICE CONTROL THREE +14 0014 DEVICE CONTROL FOUR +15 0015 NEGATIVE ACKNOWLEDGE +16 0016 SYNCHRONOUS IDLE +17 0017 END OF TRANSMISSION BLOCK +18 0018 CANCEL +19 0019 END OF MEDIUM +1A 001A SUBSTITUTE +1B 001B ESCAPE +1C 001C FILE SEPARATOR +1D 001D GROUP SEPARATOR +1E 001E RECORD SEPARATOR +1F 001F UNIT SEPARATOR +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F DELETE +80 0080 +81 0081 +82 0082 +83 0083 +84 0084 +85 0085 +86 0086 +87 0087 +88 0088 +89 0089 +8A 008A +8B 008B +8C 008C +8D 008D +8E 008E +8F 008F +90 0090 +91 0091 +92 0092 +93 0093 +94 0094 +95 0095 +96 0096 +97 0097 +98 0098 +99 0099 +9A 009A +9B 009B +9C 009C +9D 009D +9E 009E +9F 009F +A0 00A0 NO-BREAK SPACE +A1 0126 LATIN CAPITAL LETTER H WITH STROKE +A2 02D8 BREVE +A3 00A3 POUND SIGN +A4 00A4 CURRENCY SIGN +A6 0124 LATIN CAPITAL LETTER H WITH CIRCUMFLEX +A7 00A7 SECTION SIGN +A8 00A8 DIAERESIS +A9 0130 LATIN CAPITAL LETTER I WITH DOT ABOVE +AA 015E LATIN CAPITAL LETTER S WITH CEDILLA +AB 011E LATIN CAPITAL LETTER G WITH BREVE +AC 0134 LATIN CAPITAL LETTER J WITH CIRCUMFLEX +AD 00AD SOFT HYPHEN +AF 017B LATIN CAPITAL LETTER Z WITH DOT ABOVE +B0 00B0 DEGREE SIGN +B1 0127 LATIN SMALL LETTER H WITH STROKE +B2 00B2 SUPERSCRIPT TWO +B3 00B3 SUPERSCRIPT THREE +B4 00B4 ACUTE ACCENT +B5 00B5 MICRO SIGN +B6 0125 LATIN SMALL LETTER H WITH CIRCUMFLEX +B7 00B7 MIDDLE DOT +B8 00B8 CEDILLA +B9 0131 LATIN SMALL LETTER DOTLESS I +BA 015F LATIN SMALL LETTER S WITH CEDILLA +BB 011F LATIN SMALL LETTER G WITH BREVE +BC 0135 LATIN SMALL LETTER J WITH CIRCUMFLEX +BD 00BD VULGAR FRACTION ONE HALF +BF 017C LATIN SMALL LETTER Z WITH DOT ABOVE +C0 00C0 LATIN CAPITAL LETTER A WITH GRAVE +C1 00C1 LATIN CAPITAL LETTER A WITH ACUTE +C2 00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX +C4 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +C5 010A LATIN CAPITAL LETTER C WITH DOT ABOVE +C6 0108 LATIN CAPITAL LETTER C WITH CIRCUMFLEX +C7 00C7 LATIN CAPITAL LETTER C WITH CEDILLA +C8 00C8 LATIN CAPITAL LETTER E WITH GRAVE +C9 00C9 LATIN CAPITAL LETTER E WITH ACUTE +CA 00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX +CB 00CB LATIN CAPITAL LETTER E WITH DIAERESIS +CC 00CC LATIN CAPITAL LETTER I WITH GRAVE +CD 00CD LATIN CAPITAL LETTER I WITH ACUTE +CE 00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX +CF 00CF LATIN CAPITAL LETTER I WITH DIAERESIS +D1 00D1 LATIN CAPITAL LETTER N WITH TILDE +D2 00D2 LATIN CAPITAL LETTER O WITH GRAVE +D3 00D3 LATIN CAPITAL LETTER O WITH ACUTE +D4 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX +D5 0120 LATIN CAPITAL LETTER G WITH DOT ABOVE +D6 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +D7 00D7 MULTIPLICATION SIGN +D8 011C LATIN CAPITAL LETTER G WITH CIRCUMFLEX +D9 00D9 LATIN CAPITAL LETTER U WITH GRAVE +DA 00DA LATIN CAPITAL LETTER U WITH ACUTE +DB 00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX +DC 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +DD 016C LATIN CAPITAL LETTER U WITH BREVE +DE 015C LATIN CAPITAL LETTER S WITH CIRCUMFLEX +DF 00DF LATIN SMALL LETTER SHARP S +E0 00E0 LATIN SMALL LETTER A WITH GRAVE +E1 00E1 LATIN SMALL LETTER A WITH ACUTE +E2 00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX +E4 00E4 LATIN SMALL LETTER A WITH DIAERESIS +E5 010B LATIN SMALL LETTER C WITH DOT ABOVE +E6 0109 LATIN SMALL LETTER C WITH CIRCUMFLEX +E7 00E7 LATIN SMALL LETTER C WITH CEDILLA +E8 00E8 LATIN SMALL LETTER E WITH GRAVE +E9 00E9 LATIN SMALL LETTER E WITH ACUTE +EA 00EA LATIN SMALL LETTER E WITH CIRCUMFLEX +EB 00EB LATIN SMALL LETTER E WITH DIAERESIS +EC 00EC LATIN SMALL LETTER I WITH GRAVE +ED 00ED LATIN SMALL LETTER I WITH ACUTE +EE 00EE LATIN SMALL LETTER I WITH CIRCUMFLEX +EF 00EF LATIN SMALL LETTER I WITH DIAERESIS +F1 00F1 LATIN SMALL LETTER N WITH TILDE +F2 00F2 LATIN SMALL LETTER O WITH GRAVE +F3 00F3 LATIN SMALL LETTER O WITH ACUTE +F4 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +F5 0121 LATIN SMALL LETTER G WITH DOT ABOVE +F6 00F6 LATIN SMALL LETTER O WITH DIAERESIS +F7 00F7 DIVISION SIGN +F8 011D LATIN SMALL LETTER G WITH CIRCUMFLEX +F9 00F9 LATIN SMALL LETTER U WITH GRAVE +FA 00FA LATIN SMALL LETTER U WITH ACUTE +FB 00FB LATIN SMALL LETTER U WITH CIRCUMFLEX +FC 00FC LATIN SMALL LETTER U WITH DIAERESIS +FD 016D LATIN SMALL LETTER U WITH BREVE +FE 015D LATIN SMALL LETTER S WITH CIRCUMFLEX +FF 02D9 DOT ABOVE diff --git a/libucw/charset/set/8859-4 b/libucw/charset/set/8859-4 new file mode 100644 index 0000000..b5338c1 --- /dev/null +++ b/libucw/charset/set/8859-4 @@ -0,0 +1,260 @@ +# 8859-4.TXT charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-4.TXT +# (c) 2003, Robert Spalek + +00 0000 NULL +01 0001 START OF HEADING +02 0002 START OF TEXT +03 0003 END OF TEXT +04 0004 END OF TRANSMISSION +05 0005 ENQUIRY +06 0006 ACKNOWLEDGE +07 0007 BELL +08 0008 BACKSPACE +09 0009 HORIZONTAL TABULATION +0A 000A LINE FEED +0B 000B VERTICAL TABULATION +0C 000C FORM FEED +0D 000D CARRIAGE RETURN +0E 000E SHIFT OUT +0F 000F SHIFT IN +10 0010 DATA LINK ESCAPE +11 0011 DEVICE CONTROL ONE +12 0012 DEVICE CONTROL TWO +13 0013 DEVICE CONTROL THREE +14 0014 DEVICE CONTROL FOUR +15 0015 NEGATIVE ACKNOWLEDGE +16 0016 SYNCHRONOUS IDLE +17 0017 END OF TRANSMISSION BLOCK +18 0018 CANCEL +19 0019 END OF MEDIUM +1A 001A SUBSTITUTE +1B 001B ESCAPE +1C 001C FILE SEPARATOR +1D 001D GROUP SEPARATOR +1E 001E RECORD SEPARATOR +1F 001F UNIT SEPARATOR +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F DELETE +80 0080 +81 0081 +82 0082 +83 0083 +84 0084 +85 0085 +86 0086 +87 0087 +88 0088 +89 0089 +8A 008A +8B 008B +8C 008C +8D 008D +8E 008E +8F 008F +90 0090 +91 0091 +92 0092 +93 0093 +94 0094 +95 0095 +96 0096 +97 0097 +98 0098 +99 0099 +9A 009A +9B 009B +9C 009C +9D 009D +9E 009E +9F 009F +A0 00A0 NO-BREAK SPACE +A1 0104 LATIN CAPITAL LETTER A WITH OGONEK +A2 0138 LATIN SMALL LETTER KRA +A3 0156 LATIN CAPITAL LETTER R WITH CEDILLA +A4 00A4 CURRENCY SIGN +A5 0128 LATIN CAPITAL LETTER I WITH TILDE +A6 013B LATIN CAPITAL LETTER L WITH CEDILLA +A7 00A7 SECTION SIGN +A8 00A8 DIAERESIS +A9 0160 LATIN CAPITAL LETTER S WITH CARON +AA 0112 LATIN CAPITAL LETTER E WITH MACRON +AB 0122 LATIN CAPITAL LETTER G WITH CEDILLA +AC 0166 LATIN CAPITAL LETTER T WITH STROKE +AD 00AD SOFT HYPHEN +AE 017D LATIN CAPITAL LETTER Z WITH CARON +AF 00AF MACRON +B0 00B0 DEGREE SIGN +B1 0105 LATIN SMALL LETTER A WITH OGONEK +B2 02DB OGONEK +B3 0157 LATIN SMALL LETTER R WITH CEDILLA +B4 00B4 ACUTE ACCENT +B5 0129 LATIN SMALL LETTER I WITH TILDE +B6 013C LATIN SMALL LETTER L WITH CEDILLA +B7 02C7 CARON +B8 00B8 CEDILLA +B9 0161 LATIN SMALL LETTER S WITH CARON +BA 0113 LATIN SMALL LETTER E WITH MACRON +BB 0123 LATIN SMALL LETTER G WITH CEDILLA +BC 0167 LATIN SMALL LETTER T WITH STROKE +BD 014A LATIN CAPITAL LETTER ENG +BE 017E LATIN SMALL LETTER Z WITH CARON +BF 014B LATIN SMALL LETTER ENG +C0 0100 LATIN CAPITAL LETTER A WITH MACRON +C1 00C1 LATIN CAPITAL LETTER A WITH ACUTE +C2 00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX +C3 00C3 LATIN CAPITAL LETTER A WITH TILDE +C4 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +C5 00C5 LATIN CAPITAL LETTER A WITH RING ABOVE +C6 00C6 LATIN CAPITAL LETTER AE +C7 012E LATIN CAPITAL LETTER I WITH OGONEK +C8 010C LATIN CAPITAL LETTER C WITH CARON +C9 00C9 LATIN CAPITAL LETTER E WITH ACUTE +CA 0118 LATIN CAPITAL LETTER E WITH OGONEK +CB 00CB LATIN CAPITAL LETTER E WITH DIAERESIS +CC 0116 LATIN CAPITAL LETTER E WITH DOT ABOVE +CD 00CD LATIN CAPITAL LETTER I WITH ACUTE +CE 00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX +CF 012A LATIN CAPITAL LETTER I WITH MACRON +D0 0110 LATIN CAPITAL LETTER D WITH STROKE +D1 0145 LATIN CAPITAL LETTER N WITH CEDILLA +D2 014C LATIN CAPITAL LETTER O WITH MACRON +D3 0136 LATIN CAPITAL LETTER K WITH CEDILLA +D4 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX +D5 00D5 LATIN CAPITAL LETTER O WITH TILDE +D6 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +D7 00D7 MULTIPLICATION SIGN +D8 00D8 LATIN CAPITAL LETTER O WITH STROKE +D9 0172 LATIN CAPITAL LETTER U WITH OGONEK +DA 00DA LATIN CAPITAL LETTER U WITH ACUTE +DB 00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX +DC 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +DD 0168 LATIN CAPITAL LETTER U WITH TILDE +DE 016A LATIN CAPITAL LETTER U WITH MACRON +DF 00DF LATIN SMALL LETTER SHARP S +E0 0101 LATIN SMALL LETTER A WITH MACRON +E1 00E1 LATIN SMALL LETTER A WITH ACUTE +E2 00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX +E3 00E3 LATIN SMALL LETTER A WITH TILDE +E4 00E4 LATIN SMALL LETTER A WITH DIAERESIS +E5 00E5 LATIN SMALL LETTER A WITH RING ABOVE +E6 00E6 LATIN SMALL LETTER AE +E7 012F LATIN SMALL LETTER I WITH OGONEK +E8 010D LATIN SMALL LETTER C WITH CARON +E9 00E9 LATIN SMALL LETTER E WITH ACUTE +EA 0119 LATIN SMALL LETTER E WITH OGONEK +EB 00EB LATIN SMALL LETTER E WITH DIAERESIS +EC 0117 LATIN SMALL LETTER E WITH DOT ABOVE +ED 00ED LATIN SMALL LETTER I WITH ACUTE +EE 00EE LATIN SMALL LETTER I WITH CIRCUMFLEX +EF 012B LATIN SMALL LETTER I WITH MACRON +F0 0111 LATIN SMALL LETTER D WITH STROKE +F1 0146 LATIN SMALL LETTER N WITH CEDILLA +F2 014D LATIN SMALL LETTER O WITH MACRON +F3 0137 LATIN SMALL LETTER K WITH CEDILLA +F4 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +F5 00F5 LATIN SMALL LETTER O WITH TILDE +F6 00F6 LATIN SMALL LETTER O WITH DIAERESIS +F7 00F7 DIVISION SIGN +F8 00F8 LATIN SMALL LETTER O WITH STROKE +F9 0173 LATIN SMALL LETTER U WITH OGONEK +FA 00FA LATIN SMALL LETTER U WITH ACUTE +FB 00FB LATIN SMALL LETTER U WITH CIRCUMFLEX +FC 00FC LATIN SMALL LETTER U WITH DIAERESIS +FD 0169 LATIN SMALL LETTER U WITH TILDE +FE 016B LATIN SMALL LETTER U WITH MACRON +FF 02D9 DOT ABOVE diff --git a/libucw/charset/set/8859-5 b/libucw/charset/set/8859-5 new file mode 100644 index 0000000..cb95544 --- /dev/null +++ b/libucw/charset/set/8859-5 @@ -0,0 +1,260 @@ +# 8859-5.TXT charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-5.TXT +# (c) 2003, Robert Spalek + +00 0000 NULL +01 0001 START OF HEADING +02 0002 START OF TEXT +03 0003 END OF TEXT +04 0004 END OF TRANSMISSION +05 0005 ENQUIRY +06 0006 ACKNOWLEDGE +07 0007 BELL +08 0008 BACKSPACE +09 0009 HORIZONTAL TABULATION +0A 000A LINE FEED +0B 000B VERTICAL TABULATION +0C 000C FORM FEED +0D 000D CARRIAGE RETURN +0E 000E SHIFT OUT +0F 000F SHIFT IN +10 0010 DATA LINK ESCAPE +11 0011 DEVICE CONTROL ONE +12 0012 DEVICE CONTROL TWO +13 0013 DEVICE CONTROL THREE +14 0014 DEVICE CONTROL FOUR +15 0015 NEGATIVE ACKNOWLEDGE +16 0016 SYNCHRONOUS IDLE +17 0017 END OF TRANSMISSION BLOCK +18 0018 CANCEL +19 0019 END OF MEDIUM +1A 001A SUBSTITUTE +1B 001B ESCAPE +1C 001C FILE SEPARATOR +1D 001D GROUP SEPARATOR +1E 001E RECORD SEPARATOR +1F 001F UNIT SEPARATOR +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F DELETE +80 0080 +81 0081 +82 0082 +83 0083 +84 0084 +85 0085 +86 0086 +87 0087 +88 0088 +89 0089 +8A 008A +8B 008B +8C 008C +8D 008D +8E 008E +8F 008F +90 0090 +91 0091 +92 0092 +93 0093 +94 0094 +95 0095 +96 0096 +97 0097 +98 0098 +99 0099 +9A 009A +9B 009B +9C 009C +9D 009D +9E 009E +9F 009F +A0 00A0 NO-BREAK SPACE +A1 0401 CYRILLIC CAPITAL LETTER IO +A2 0402 CYRILLIC CAPITAL LETTER DJE +A3 0403 CYRILLIC CAPITAL LETTER GJE +A4 0404 CYRILLIC CAPITAL LETTER UKRAINIAN IE +A5 0405 CYRILLIC CAPITAL LETTER DZE +A6 0406 CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +A7 0407 CYRILLIC CAPITAL LETTER YI +A8 0408 CYRILLIC CAPITAL LETTER JE +A9 0409 CYRILLIC CAPITAL LETTER LJE +AA 040A CYRILLIC CAPITAL LETTER NJE +AB 040B CYRILLIC CAPITAL LETTER TSHE +AC 040C CYRILLIC CAPITAL LETTER KJE +AD 00AD SOFT HYPHEN +AE 040E CYRILLIC CAPITAL LETTER SHORT U +AF 040F CYRILLIC CAPITAL LETTER DZHE +B0 0410 CYRILLIC CAPITAL LETTER A +B1 0411 CYRILLIC CAPITAL LETTER BE +B2 0412 CYRILLIC CAPITAL LETTER VE +B3 0413 CYRILLIC CAPITAL LETTER GHE +B4 0414 CYRILLIC CAPITAL LETTER DE +B5 0415 CYRILLIC CAPITAL LETTER IE +B6 0416 CYRILLIC CAPITAL LETTER ZHE +B7 0417 CYRILLIC CAPITAL LETTER ZE +B8 0418 CYRILLIC CAPITAL LETTER I +B9 0419 CYRILLIC CAPITAL LETTER SHORT I +BA 041A CYRILLIC CAPITAL LETTER KA +BB 041B CYRILLIC CAPITAL LETTER EL +BC 041C CYRILLIC CAPITAL LETTER EM +BD 041D CYRILLIC CAPITAL LETTER EN +BE 041E CYRILLIC CAPITAL LETTER O +BF 041F CYRILLIC CAPITAL LETTER PE +C0 0420 CYRILLIC CAPITAL LETTER ER +C1 0421 CYRILLIC CAPITAL LETTER ES +C2 0422 CYRILLIC CAPITAL LETTER TE +C3 0423 CYRILLIC CAPITAL LETTER U +C4 0424 CYRILLIC CAPITAL LETTER EF +C5 0425 CYRILLIC CAPITAL LETTER HA +C6 0426 CYRILLIC CAPITAL LETTER TSE +C7 0427 CYRILLIC CAPITAL LETTER CHE +C8 0428 CYRILLIC CAPITAL LETTER SHA +C9 0429 CYRILLIC CAPITAL LETTER SHCHA +CA 042A CYRILLIC CAPITAL LETTER HARD SIGN +CB 042B CYRILLIC CAPITAL LETTER YERU +CC 042C CYRILLIC CAPITAL LETTER SOFT SIGN +CD 042D CYRILLIC CAPITAL LETTER E +CE 042E CYRILLIC CAPITAL LETTER YU +CF 042F CYRILLIC CAPITAL LETTER YA +D0 0430 CYRILLIC SMALL LETTER A +D1 0431 CYRILLIC SMALL LETTER BE +D2 0432 CYRILLIC SMALL LETTER VE +D3 0433 CYRILLIC SMALL LETTER GHE +D4 0434 CYRILLIC SMALL LETTER DE +D5 0435 CYRILLIC SMALL LETTER IE +D6 0436 CYRILLIC SMALL LETTER ZHE +D7 0437 CYRILLIC SMALL LETTER ZE +D8 0438 CYRILLIC SMALL LETTER I +D9 0439 CYRILLIC SMALL LETTER SHORT I +DA 043A CYRILLIC SMALL LETTER KA +DB 043B CYRILLIC SMALL LETTER EL +DC 043C CYRILLIC SMALL LETTER EM +DD 043D CYRILLIC SMALL LETTER EN +DE 043E CYRILLIC SMALL LETTER O +DF 043F CYRILLIC SMALL LETTER PE +E0 0440 CYRILLIC SMALL LETTER ER +E1 0441 CYRILLIC SMALL LETTER ES +E2 0442 CYRILLIC SMALL LETTER TE +E3 0443 CYRILLIC SMALL LETTER U +E4 0444 CYRILLIC SMALL LETTER EF +E5 0445 CYRILLIC SMALL LETTER HA +E6 0446 CYRILLIC SMALL LETTER TSE +E7 0447 CYRILLIC SMALL LETTER CHE +E8 0448 CYRILLIC SMALL LETTER SHA +E9 0449 CYRILLIC SMALL LETTER SHCHA +EA 044A CYRILLIC SMALL LETTER HARD SIGN +EB 044B CYRILLIC SMALL LETTER YERU +EC 044C CYRILLIC SMALL LETTER SOFT SIGN +ED 044D CYRILLIC SMALL LETTER E +EE 044E CYRILLIC SMALL LETTER YU +EF 044F CYRILLIC SMALL LETTER YA +F0 2116 NUMERO SIGN +F1 0451 CYRILLIC SMALL LETTER IO +F2 0452 CYRILLIC SMALL LETTER DJE +F3 0453 CYRILLIC SMALL LETTER GJE +F4 0454 CYRILLIC SMALL LETTER UKRAINIAN IE +F5 0455 CYRILLIC SMALL LETTER DZE +F6 0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +F7 0457 CYRILLIC SMALL LETTER YI +F8 0458 CYRILLIC SMALL LETTER JE +F9 0459 CYRILLIC SMALL LETTER LJE +FA 045A CYRILLIC SMALL LETTER NJE +FB 045B CYRILLIC SMALL LETTER TSHE +FC 045C CYRILLIC SMALL LETTER KJE +FD 00A7 SECTION SIGN +FE 045E CYRILLIC SMALL LETTER SHORT U +FF 045F CYRILLIC SMALL LETTER DZHE diff --git a/libucw/charset/set/8859-6 b/libucw/charset/set/8859-6 new file mode 100644 index 0000000..7d23eb8 --- /dev/null +++ b/libucw/charset/set/8859-6 @@ -0,0 +1,215 @@ +# 8859-6.TXT charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-6.TXT +# (c) 2003, Robert Spalek + +00 0000 NULL +01 0001 START OF HEADING +02 0002 START OF TEXT +03 0003 END OF TEXT +04 0004 END OF TRANSMISSION +05 0005 ENQUIRY +06 0006 ACKNOWLEDGE +07 0007 BELL +08 0008 BACKSPACE +09 0009 HORIZONTAL TABULATION +0A 000A LINE FEED +0B 000B VERTICAL TABULATION +0C 000C FORM FEED +0D 000D CARRIAGE RETURN +0E 000E SHIFT OUT +0F 000F SHIFT IN +10 0010 DATA LINK ESCAPE +11 0011 DEVICE CONTROL ONE +12 0012 DEVICE CONTROL TWO +13 0013 DEVICE CONTROL THREE +14 0014 DEVICE CONTROL FOUR +15 0015 NEGATIVE ACKNOWLEDGE +16 0016 SYNCHRONOUS IDLE +17 0017 END OF TRANSMISSION BLOCK +18 0018 CANCEL +19 0019 END OF MEDIUM +1A 001A SUBSTITUTE +1B 001B ESCAPE +1C 001C FILE SEPARATOR +1D 001D GROUP SEPARATOR +1E 001E RECORD SEPARATOR +1F 001F UNIT SEPARATOR +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F DELETE +80 0080 +81 0081 +82 0082 +83 0083 +84 0084 +85 0085 +86 0086 +87 0087 +88 0088 +89 0089 +8A 008A +8B 008B +8C 008C +8D 008D +8E 008E +8F 008F +90 0090 +91 0091 +92 0092 +93 0093 +94 0094 +95 0095 +96 0096 +97 0097 +98 0098 +99 0099 +9A 009A +9B 009B +9C 009C +9D 009D +9E 009E +9F 009F +A0 00A0 NO-BREAK SPACE +A4 00A4 CURRENCY SIGN +AC 060C ARABIC COMMA +AD 00AD SOFT HYPHEN +BB 061B ARABIC SEMICOLON +BF 061F ARABIC QUESTION MARK +C1 0621 ARABIC LETTER HAMZA +C2 0622 ARABIC LETTER ALEF WITH MADDA ABOVE +C3 0623 ARABIC LETTER ALEF WITH HAMZA ABOVE +C4 0624 ARABIC LETTER WAW WITH HAMZA ABOVE +C5 0625 ARABIC LETTER ALEF WITH HAMZA BELOW +C6 0626 ARABIC LETTER YEH WITH HAMZA ABOVE +C7 0627 ARABIC LETTER ALEF +C8 0628 ARABIC LETTER BEH +C9 0629 ARABIC LETTER TEH MARBUTA +CA 062A ARABIC LETTER TEH +CB 062B ARABIC LETTER THEH +CC 062C ARABIC LETTER JEEM +CD 062D ARABIC LETTER HAH +CE 062E ARABIC LETTER KHAH +CF 062F ARABIC LETTER DAL +D0 0630 ARABIC LETTER THAL +D1 0631 ARABIC LETTER REH +D2 0632 ARABIC LETTER ZAIN +D3 0633 ARABIC LETTER SEEN +D4 0634 ARABIC LETTER SHEEN +D5 0635 ARABIC LETTER SAD +D6 0636 ARABIC LETTER DAD +D7 0637 ARABIC LETTER TAH +D8 0638 ARABIC LETTER ZAH +D9 0639 ARABIC LETTER AIN +DA 063A ARABIC LETTER GHAIN +E0 0640 ARABIC TATWEEL +E1 0641 ARABIC LETTER FEH +E2 0642 ARABIC LETTER QAF +E3 0643 ARABIC LETTER KAF +E4 0644 ARABIC LETTER LAM +E5 0645 ARABIC LETTER MEEM +E6 0646 ARABIC LETTER NOON +E7 0647 ARABIC LETTER HEH +E8 0648 ARABIC LETTER WAW +E9 0649 ARABIC LETTER ALEF MAKSURA +EA 064A ARABIC LETTER YEH +EB 064B ARABIC FATHATAN +EC 064C ARABIC DAMMATAN +ED 064D ARABIC KASRATAN +EE 064E ARABIC FATHA +EF 064F ARABIC DAMMA +F0 0650 ARABIC KASRA +F1 0651 ARABIC SHADDA +F2 0652 ARABIC SUKUN diff --git a/libucw/charset/set/8859-7 b/libucw/charset/set/8859-7 new file mode 100644 index 0000000..c67c8dc --- /dev/null +++ b/libucw/charset/set/8859-7 @@ -0,0 +1,254 @@ +# 8859-7.TXT charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT +# (c) 2003, Robert Spalek + +00 0000 NULL +01 0001 START OF HEADING +02 0002 START OF TEXT +03 0003 END OF TEXT +04 0004 END OF TRANSMISSION +05 0005 ENQUIRY +06 0006 ACKNOWLEDGE +07 0007 BELL +08 0008 BACKSPACE +09 0009 HORIZONTAL TABULATION +0A 000A LINE FEED +0B 000B VERTICAL TABULATION +0C 000C FORM FEED +0D 000D CARRIAGE RETURN +0E 000E SHIFT OUT +0F 000F SHIFT IN +10 0010 DATA LINK ESCAPE +11 0011 DEVICE CONTROL ONE +12 0012 DEVICE CONTROL TWO +13 0013 DEVICE CONTROL THREE +14 0014 DEVICE CONTROL FOUR +15 0015 NEGATIVE ACKNOWLEDGE +16 0016 SYNCHRONOUS IDLE +17 0017 END OF TRANSMISSION BLOCK +18 0018 CANCEL +19 0019 END OF MEDIUM +1A 001A SUBSTITUTE +1B 001B ESCAPE +1C 001C FILE SEPARATOR +1D 001D GROUP SEPARATOR +1E 001E RECORD SEPARATOR +1F 001F UNIT SEPARATOR +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F DELETE +80 0080 +81 0081 +82 0082 +83 0083 +84 0084 +85 0085 +86 0086 +87 0087 +88 0088 +89 0089 +8A 008A +8B 008B +8C 008C +8D 008D +8E 008E +8F 008F +90 0090 +91 0091 +92 0092 +93 0093 +94 0094 +95 0095 +96 0096 +97 0097 +98 0098 +99 0099 +9A 009A +9B 009B +9C 009C +9D 009D +9E 009E +9F 009F +A0 00A0 NO-BREAK SPACE +A1 2018 LEFT SINGLE QUOTATION MARK +A2 2019 RIGHT SINGLE QUOTATION MARK +A3 00A3 POUND SIGN +A6 00A6 BROKEN BAR +A7 00A7 SECTION SIGN +A8 00A8 DIAERESIS +A9 00A9 COPYRIGHT SIGN +AB 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +AC 00AC NOT SIGN +AD 00AD SOFT HYPHEN +AF 2015 HORIZONTAL BAR +B0 00B0 DEGREE SIGN +B1 00B1 PLUS-MINUS SIGN +B2 00B2 SUPERSCRIPT TWO +B3 00B3 SUPERSCRIPT THREE +B4 0384 GREEK TONOS +B5 0385 GREEK DIALYTIKA TONOS +B6 0386 GREEK CAPITAL LETTER ALPHA WITH TONOS +B7 00B7 MIDDLE DOT +B8 0388 GREEK CAPITAL LETTER EPSILON WITH TONOS +B9 0389 GREEK CAPITAL LETTER ETA WITH TONOS +BA 038A GREEK CAPITAL LETTER IOTA WITH TONOS +BB 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +BC 038C GREEK CAPITAL LETTER OMICRON WITH TONOS +BD 00BD VULGAR FRACTION ONE HALF +BE 038E GREEK CAPITAL LETTER UPSILON WITH TONOS +BF 038F GREEK CAPITAL LETTER OMEGA WITH TONOS +C0 0390 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +C1 0391 GREEK CAPITAL LETTER ALPHA +C2 0392 GREEK CAPITAL LETTER BETA +C3 0393 GREEK CAPITAL LETTER GAMMA +C4 0394 GREEK CAPITAL LETTER DELTA +C5 0395 GREEK CAPITAL LETTER EPSILON +C6 0396 GREEK CAPITAL LETTER ZETA +C7 0397 GREEK CAPITAL LETTER ETA +C8 0398 GREEK CAPITAL LETTER THETA +C9 0399 GREEK CAPITAL LETTER IOTA +CA 039A GREEK CAPITAL LETTER KAPPA +CB 039B GREEK CAPITAL LETTER LAMDA +CC 039C GREEK CAPITAL LETTER MU +CD 039D GREEK CAPITAL LETTER NU +CE 039E GREEK CAPITAL LETTER XI +CF 039F GREEK CAPITAL LETTER OMICRON +D0 03A0 GREEK CAPITAL LETTER PI +D1 03A1 GREEK CAPITAL LETTER RHO +D3 03A3 GREEK CAPITAL LETTER SIGMA +D4 03A4 GREEK CAPITAL LETTER TAU +D5 03A5 GREEK CAPITAL LETTER UPSILON +D6 03A6 GREEK CAPITAL LETTER PHI +D7 03A7 GREEK CAPITAL LETTER CHI +D8 03A8 GREEK CAPITAL LETTER PSI +D9 03A9 GREEK CAPITAL LETTER OMEGA +DA 03AA GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +DB 03AB GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +DC 03AC GREEK SMALL LETTER ALPHA WITH TONOS +DD 03AD GREEK SMALL LETTER EPSILON WITH TONOS +DE 03AE GREEK SMALL LETTER ETA WITH TONOS +DF 03AF GREEK SMALL LETTER IOTA WITH TONOS +E0 03B0 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +E1 03B1 GREEK SMALL LETTER ALPHA +E2 03B2 GREEK SMALL LETTER BETA +E3 03B3 GREEK SMALL LETTER GAMMA +E4 03B4 GREEK SMALL LETTER DELTA +E5 03B5 GREEK SMALL LETTER EPSILON +E6 03B6 GREEK SMALL LETTER ZETA +E7 03B7 GREEK SMALL LETTER ETA +E8 03B8 GREEK SMALL LETTER THETA +E9 03B9 GREEK SMALL LETTER IOTA +EA 03BA GREEK SMALL LETTER KAPPA +EB 03BB GREEK SMALL LETTER LAMDA +EC 03BC GREEK SMALL LETTER MU +ED 03BD GREEK SMALL LETTER NU +EE 03BE GREEK SMALL LETTER XI +EF 03BF GREEK SMALL LETTER OMICRON +F0 03C0 GREEK SMALL LETTER PI +F1 03C1 GREEK SMALL LETTER RHO +F2 03C2 GREEK SMALL LETTER FINAL SIGMA +F3 03C3 GREEK SMALL LETTER SIGMA +F4 03C4 GREEK SMALL LETTER TAU +F5 03C5 GREEK SMALL LETTER UPSILON +F6 03C6 GREEK SMALL LETTER PHI +F7 03C7 GREEK SMALL LETTER CHI +F8 03C8 GREEK SMALL LETTER PSI +F9 03C9 GREEK SMALL LETTER OMEGA +FA 03CA GREEK SMALL LETTER IOTA WITH DIALYTIKA +FB 03CB GREEK SMALL LETTER UPSILON WITH DIALYTIKA +FC 03CC GREEK SMALL LETTER OMICRON WITH TONOS +FD 03CD GREEK SMALL LETTER UPSILON WITH TONOS +FE 03CE GREEK SMALL LETTER OMEGA WITH TONOS diff --git a/libucw/charset/set/8859-8 b/libucw/charset/set/8859-8 new file mode 100644 index 0000000..ed0e279 --- /dev/null +++ b/libucw/charset/set/8859-8 @@ -0,0 +1,224 @@ +# 8859-8.TXT charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-8.TXT +# (c) 2003, Robert Spalek + +00 0000 NULL +01 0001 START OF HEADING +02 0002 START OF TEXT +03 0003 END OF TEXT +04 0004 END OF TRANSMISSION +05 0005 ENQUIRY +06 0006 ACKNOWLEDGE +07 0007 BELL +08 0008 BACKSPACE +09 0009 HORIZONTAL TABULATION +0A 000A LINE FEED +0B 000B VERTICAL TABULATION +0C 000C FORM FEED +0D 000D CARRIAGE RETURN +0E 000E SHIFT OUT +0F 000F SHIFT IN +10 0010 DATA LINK ESCAPE +11 0011 DEVICE CONTROL ONE +12 0012 DEVICE CONTROL TWO +13 0013 DEVICE CONTROL THREE +14 0014 DEVICE CONTROL FOUR +15 0015 NEGATIVE ACKNOWLEDGE +16 0016 SYNCHRONOUS IDLE +17 0017 END OF TRANSMISSION BLOCK +18 0018 CANCEL +19 0019 END OF MEDIUM +1A 001A SUBSTITUTE +1B 001B ESCAPE +1C 001C FILE SEPARATOR +1D 001D GROUP SEPARATOR +1E 001E RECORD SEPARATOR +1F 001F UNIT SEPARATOR +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F DELETE +80 0080 +81 0081 +82 0082 +83 0083 +84 0084 +85 0085 +86 0086 +87 0087 +88 0088 +89 0089 +8A 008A +8B 008B +8C 008C +8D 008D +8E 008E +8F 008F +90 0090 +91 0091 +92 0092 +93 0093 +94 0094 +95 0095 +96 0096 +97 0097 +98 0098 +99 0099 +9A 009A +9B 009B +9C 009C +9D 009D +9E 009E +9F 009F +A0 00A0 NO-BREAK SPACE +A2 00A2 CENT SIGN +A3 00A3 POUND SIGN +A4 00A4 CURRENCY SIGN +A5 00A5 YEN SIGN +A6 00A6 BROKEN BAR +A7 00A7 SECTION SIGN +A8 00A8 DIAERESIS +A9 00A9 COPYRIGHT SIGN +AA 00D7 MULTIPLICATION SIGN +AB 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +AC 00AC NOT SIGN +AD 00AD SOFT HYPHEN +AE 00AE REGISTERED SIGN +AF 00AF MACRON +B0 00B0 DEGREE SIGN +B1 00B1 PLUS-MINUS SIGN +B2 00B2 SUPERSCRIPT TWO +B3 00B3 SUPERSCRIPT THREE +B4 00B4 ACUTE ACCENT +B5 00B5 MICRO SIGN +B6 00B6 PILCROW SIGN +B7 00B7 MIDDLE DOT +B8 00B8 CEDILLA +B9 00B9 SUPERSCRIPT ONE +BA 00F7 DIVISION SIGN +BB 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +BC 00BC VULGAR FRACTION ONE QUARTER +BD 00BD VULGAR FRACTION ONE HALF +BE 00BE VULGAR FRACTION THREE QUARTERS +DF 2017 DOUBLE LOW LINE +E0 05D0 HEBREW LETTER ALEF +E1 05D1 HEBREW LETTER BET +E2 05D2 HEBREW LETTER GIMEL +E3 05D3 HEBREW LETTER DALET +E4 05D4 HEBREW LETTER HE +E5 05D5 HEBREW LETTER VAV +E6 05D6 HEBREW LETTER ZAYIN +E7 05D7 HEBREW LETTER HET +E8 05D8 HEBREW LETTER TET +E9 05D9 HEBREW LETTER YOD +EA 05DA HEBREW LETTER FINAL KAF +EB 05DB HEBREW LETTER KAF +EC 05DC HEBREW LETTER LAMED +ED 05DD HEBREW LETTER FINAL MEM +EE 05DE HEBREW LETTER MEM +EF 05DF HEBREW LETTER FINAL NUN +F0 05E0 HEBREW LETTER NUN +F1 05E1 HEBREW LETTER SAMEKH +F2 05E2 HEBREW LETTER AYIN +F3 05E3 HEBREW LETTER FINAL PE +F4 05E4 HEBREW LETTER PE +F5 05E5 HEBREW LETTER FINAL TSADI +F6 05E6 HEBREW LETTER TSADI +F7 05E7 HEBREW LETTER QOF +F8 05E8 HEBREW LETTER RESH +F9 05E9 HEBREW LETTER SHIN +FA 05EA HEBREW LETTER TAV +FD 200E LEFT-TO-RIGHT MARK +FE 200F RIGHT-TO-LEFT MARK diff --git a/libucw/charset/set/8859-9 b/libucw/charset/set/8859-9 new file mode 100644 index 0000000..3092624 --- /dev/null +++ b/libucw/charset/set/8859-9 @@ -0,0 +1,260 @@ +# 8859-9.TXT charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-9.TXT +# (c) 2003, Robert Spalek + +00 0000 NULL +01 0001 START OF HEADING +02 0002 START OF TEXT +03 0003 END OF TEXT +04 0004 END OF TRANSMISSION +05 0005 ENQUIRY +06 0006 ACKNOWLEDGE +07 0007 BELL +08 0008 BACKSPACE +09 0009 HORIZONTAL TABULATION +0A 000A LINE FEED +0B 000B VERTICAL TABULATION +0C 000C FORM FEED +0D 000D CARRIAGE RETURN +0E 000E SHIFT OUT +0F 000F SHIFT IN +10 0010 DATA LINK ESCAPE +11 0011 DEVICE CONTROL ONE +12 0012 DEVICE CONTROL TWO +13 0013 DEVICE CONTROL THREE +14 0014 DEVICE CONTROL FOUR +15 0015 NEGATIVE ACKNOWLEDGE +16 0016 SYNCHRONOUS IDLE +17 0017 END OF TRANSMISSION BLOCK +18 0018 CANCEL +19 0019 END OF MEDIUM +1A 001A SUBSTITUTE +1B 001B ESCAPE +1C 001C FILE SEPARATOR +1D 001D GROUP SEPARATOR +1E 001E RECORD SEPARATOR +1F 001F UNIT SEPARATOR +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F DELETE +80 0080 +81 0081 +82 0082 +83 0083 +84 0084 +85 0085 +86 0086 +87 0087 +88 0088 +89 0089 +8A 008A +8B 008B +8C 008C +8D 008D +8E 008E +8F 008F +90 0090 +91 0091 +92 0092 +93 0093 +94 0094 +95 0095 +96 0096 +97 0097 +98 0098 +99 0099 +9A 009A +9B 009B +9C 009C +9D 009D +9E 009E +9F 009F +A0 00A0 NO-BREAK SPACE +A1 00A1 INVERTED EXCLAMATION MARK +A2 00A2 CENT SIGN +A3 00A3 POUND SIGN +A4 00A4 CURRENCY SIGN +A5 00A5 YEN SIGN +A6 00A6 BROKEN BAR +A7 00A7 SECTION SIGN +A8 00A8 DIAERESIS +A9 00A9 COPYRIGHT SIGN +AA 00AA FEMININE ORDINAL INDICATOR +AB 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +AC 00AC NOT SIGN +AD 00AD SOFT HYPHEN +AE 00AE REGISTERED SIGN +AF 00AF MACRON +B0 00B0 DEGREE SIGN +B1 00B1 PLUS-MINUS SIGN +B2 00B2 SUPERSCRIPT TWO +B3 00B3 SUPERSCRIPT THREE +B4 00B4 ACUTE ACCENT +B5 00B5 MICRO SIGN +B6 00B6 PILCROW SIGN +B7 00B7 MIDDLE DOT +B8 00B8 CEDILLA +B9 00B9 SUPERSCRIPT ONE +BA 00BA MASCULINE ORDINAL INDICATOR +BB 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +BC 00BC VULGAR FRACTION ONE QUARTER +BD 00BD VULGAR FRACTION ONE HALF +BE 00BE VULGAR FRACTION THREE QUARTERS +BF 00BF INVERTED QUESTION MARK +C0 00C0 LATIN CAPITAL LETTER A WITH GRAVE +C1 00C1 LATIN CAPITAL LETTER A WITH ACUTE +C2 00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX +C3 00C3 LATIN CAPITAL LETTER A WITH TILDE +C4 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +C5 00C5 LATIN CAPITAL LETTER A WITH RING ABOVE +C6 00C6 LATIN CAPITAL LETTER AE +C7 00C7 LATIN CAPITAL LETTER C WITH CEDILLA +C8 00C8 LATIN CAPITAL LETTER E WITH GRAVE +C9 00C9 LATIN CAPITAL LETTER E WITH ACUTE +CA 00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX +CB 00CB LATIN CAPITAL LETTER E WITH DIAERESIS +CC 00CC LATIN CAPITAL LETTER I WITH GRAVE +CD 00CD LATIN CAPITAL LETTER I WITH ACUTE +CE 00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX +CF 00CF LATIN CAPITAL LETTER I WITH DIAERESIS +D0 011E LATIN CAPITAL LETTER G WITH BREVE +D1 00D1 LATIN CAPITAL LETTER N WITH TILDE +D2 00D2 LATIN CAPITAL LETTER O WITH GRAVE +D3 00D3 LATIN CAPITAL LETTER O WITH ACUTE +D4 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX +D5 00D5 LATIN CAPITAL LETTER O WITH TILDE +D6 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +D7 00D7 MULTIPLICATION SIGN +D8 00D8 LATIN CAPITAL LETTER O WITH STROKE +D9 00D9 LATIN CAPITAL LETTER U WITH GRAVE +DA 00DA LATIN CAPITAL LETTER U WITH ACUTE +DB 00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX +DC 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +DD 0130 LATIN CAPITAL LETTER I WITH DOT ABOVE +DE 015E LATIN CAPITAL LETTER S WITH CEDILLA +DF 00DF LATIN SMALL LETTER SHARP S +E0 00E0 LATIN SMALL LETTER A WITH GRAVE +E1 00E1 LATIN SMALL LETTER A WITH ACUTE +E2 00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX +E3 00E3 LATIN SMALL LETTER A WITH TILDE +E4 00E4 LATIN SMALL LETTER A WITH DIAERESIS +E5 00E5 LATIN SMALL LETTER A WITH RING ABOVE +E6 00E6 LATIN SMALL LETTER AE +E7 00E7 LATIN SMALL LETTER C WITH CEDILLA +E8 00E8 LATIN SMALL LETTER E WITH GRAVE +E9 00E9 LATIN SMALL LETTER E WITH ACUTE +EA 00EA LATIN SMALL LETTER E WITH CIRCUMFLEX +EB 00EB LATIN SMALL LETTER E WITH DIAERESIS +EC 00EC LATIN SMALL LETTER I WITH GRAVE +ED 00ED LATIN SMALL LETTER I WITH ACUTE +EE 00EE LATIN SMALL LETTER I WITH CIRCUMFLEX +EF 00EF LATIN SMALL LETTER I WITH DIAERESIS +F0 011F LATIN SMALL LETTER G WITH BREVE +F1 00F1 LATIN SMALL LETTER N WITH TILDE +F2 00F2 LATIN SMALL LETTER O WITH GRAVE +F3 00F3 LATIN SMALL LETTER O WITH ACUTE +F4 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +F5 00F5 LATIN SMALL LETTER O WITH TILDE +F6 00F6 LATIN SMALL LETTER O WITH DIAERESIS +F7 00F7 DIVISION SIGN +F8 00F8 LATIN SMALL LETTER O WITH STROKE +F9 00F9 LATIN SMALL LETTER U WITH GRAVE +FA 00FA LATIN SMALL LETTER U WITH ACUTE +FB 00FB LATIN SMALL LETTER U WITH CIRCUMFLEX +FC 00FC LATIN SMALL LETTER U WITH DIAERESIS +FD 0131 LATIN SMALL LETTER DOTLESS I +FE 015F LATIN SMALL LETTER S WITH CEDILLA +FF 00FF LATIN SMALL LETTER Y WITH DIAERESIS diff --git a/libucw/charset/set/ascii b/libucw/charset/set/ascii new file mode 100644 index 0000000..826f1f6 --- /dev/null +++ b/libucw/charset/set/ascii @@ -0,0 +1,131 @@ +# ASCII Charset File +# (c) 1997 Martin Mares + +00 0000 +01 0001 +02 0002 +03 0003 +04 0004 +05 0005 +06 0006 +07 0007 +08 0008 +09 0009 +0A 000A +0B 000B +0C 000C +0D 000D +0E 000E +0F 000F +10 0010 +11 0011 +12 0012 +13 0013 +14 0014 +15 0015 +16 0016 +17 0017 +18 0018 +19 0019 +1A 001A +1B 001B +1C 001C +1D 001D +1E 001E +1F 001F +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F diff --git a/libucw/charset/set/cork b/libucw/charset/set/cork new file mode 100644 index 0000000..b5846b7 --- /dev/null +++ b/libucw/charset/set/cork @@ -0,0 +1,259 @@ +# Cork Charset File +# Adapted from CStoCS "cork.enc" by Martin Mares + +00 0000 +01 0001 +02 0002 +03 0003 +04 0004 +05 0005 +06 0006 +07 0007 +08 0008 +09 0009 +0A 000A +0B 000B +0C 000C +0D 000D +0E 000E +0F 000F +10 0010 +11 0011 +12 0012 +13 0013 +14 0014 +15 0015 +16 0016 +17 0017 +18 0018 +19 0019 +1A 001A +1B 001B +1C 001C +1D 001D +1E 001E +1F 001F +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F +80 0102 LATIN CAPITAL LETTER A WITH BREVE +81 0104 LATIN CAPITAL LETTER A WITH OGONEK +82 0106 LATIN CAPITAL LETTER C WITH ACUTE +83 010C LATIN CAPITAL LETTER C WITH CARON +84 010E LATIN CAPITAL LETTER D WITH CARON +85 011A LATIN CAPITAL LETTER E WITH CARON +86 0118 LATIN CAPITAL LETTER E WITH OGONEK +87 011E LATIN CAPITAL LETTER G WITH BREVE +88 0139 LATIN CAPITAL LETTER L WITH ACUTE +89 013D LATIN CAPITAL LETTER L WITH CARON +8A 0141 LATIN CAPITAL LETTER L WITH STROKE +8B 0143 LATIN CAPITAL LETTER N WITH ACUTE +8C 0147 LATIN CAPITAL LETTER N WITH CARON +8D 014A LATIN CAPITAL LETTER ENG +8E 0150 LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +8F 0154 LATIN CAPITAL LETTER R WITH ACUTE +90 0158 LATIN CAPITAL LETTER R WITH CARON +91 015A LATIN CAPITAL LETTER S WITH ACUTE +92 0160 LATIN CAPITAL LETTER S WITH CARON +93 015E LATIN CAPITAL LETTER S WITH CEDILLA +94 0164 LATIN CAPITAL LETTER T WITH CARON +95 0162 LATIN CAPITAL LETTER T WITH CEDILLA +96 0170 LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +97 016E LATIN CAPITAL LETTER U WITH RING ABOVE +98 0178 LATIN CAPITAL LETTER Y WITH DIAERESIS +99 0179 LATIN CAPITAL LETTER Z WITH ACUTE +9A 017D LATIN CAPITAL LETTER Z WITH CARON +9B 017B LATIN CAPITAL LETTER Z WITH DOT ABOVE +9C 0132 LATIN CAPITAL LIGATURE IJ +9D 0130 LATIN CAPITAL LETTER I WITH DOT ABOVE +9E 00F0 LATIN SMALL LETTER ETH +9F 00A7 SECTION SIGN +A0 0103 LATIN SMALL LETTER A WITH BREVE +A1 0105 LATIN SMALL LETTER A WITH OGONEK +A2 0107 LATIN SMALL LETTER C WITH ACUTE +A3 010D LATIN SMALL LETTER C WITH CARON +A4 010F LATIN SMALL LETTER D WITH CARON +A5 011B LATIN SMALL LETTER E WITH CARON +A6 0119 LATIN SMALL LETTER E WITH OGONEK +A7 011F LATIN SMALL LETTER G WITH BREVE +A8 013A LATIN SMALL LETTER L WITH ACUTE +A9 013E LATIN SMALL LETTER L WITH CARON +AA 0142 LATIN SMALL LETTER L WITH STROKE +AB 0144 LATIN SMALL LETTER N WITH ACUTE +AC 0148 LATIN SMALL LETTER N WITH CARON +AD 014B LATIN SMALL LETTER ENG +AE 0151 LATIN SMALL LETTER O WITH DOUBLE ACUTE +AF 0155 LATIN SMALL LETTER R WITH ACUTE +B0 0159 LATIN SMALL LETTER R WITH CARON +B1 015B LATIN SMALL LETTER S WITH ACUTE +B2 0161 LATIN SMALL LETTER S WITH CARON +B3 015F LATIN SMALL LETTER S WITH CEDILLA +B4 0165 LATIN SMALL LETTER T WITH CARON +B5 0163 LATIN SMALL LETTER T WITH CEDILLA +B6 0171 LATIN SMALL LETTER U WITH DOUBLE ACUTE +B7 016F LATIN SMALL LETTER U WITH RING ABOVE +B8 00FF LATIN SMALL LETTER Y WITH DIAERESIS +B9 017A LATIN SMALL LETTER Z WITH ACUTE +BA 017E LATIN SMALL LETTER Z WITH CARON +BB 017C LATIN SMALL LETTER Z WITH DOT ABOVE +BC 0133 LATIN SMALL LIGATURE IJ +BD 00A1 INVERTED EXCLAMATION MARK +BE 00BF INVERTED QUESTION MARK +BF 00A3 POUND SIGN +C0 00C0 LATIN CAPITAL LETTER A WITH GRAVE +C1 00C1 LATIN CAPITAL LETTER A WITH ACUTE +C2 00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX +C3 00C3 LATIN CAPITAL LETTER A WITH TILDE +C4 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +C5 00C5 LATIN CAPITAL LETTER A WITH RING ABOVE +C6 00C6 LATIN CAPITAL LETTER AE +C7 00C7 LATIN CAPITAL LETTER C WITH CEDILLA +C8 00C8 LATIN CAPITAL LETTER E WITH GRAVE +C9 00C9 LATIN CAPITAL LETTER E WITH ACUTE +CA 00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX +CB 00CB LATIN CAPITAL LETTER E WITH DIAERESIS +CC 00CC LATIN CAPITAL LETTER I WITH GRAVE +CD 00CD LATIN CAPITAL LETTER I WITH ACUTE +CE 00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX +CF 00CF LATIN CAPITAL LETTER I WITH DIAERESIS +D0 00D0 LATIN CAPITAL LETTER ETH +D1 00D1 LATIN CAPITAL LETTER N WITH TILDE +D2 00D2 LATIN CAPITAL LETTER O WITH GRAVE +D3 00D3 LATIN CAPITAL LETTER O WITH ACUTE +D4 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX +D5 00D5 LATIN CAPITAL LETTER O WITH TILDE +D6 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +D7 0152 LATIN CAPITAL LIGATURE OE +D8 00D8 LATIN CAPITAL LETTER O WITH STROKE +D9 00D9 LATIN CAPITAL LETTER U WITH GRAVE +DA 00DA LATIN CAPITAL LETTER U WITH ACUTE +DB 00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX +DC 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +DD 00DD LATIN CAPITAL LETTER Y WITH ACUTE +DE 00DE LATIN CAPITAL LETTER THORN +DF F0DF ???? +E0 00E0 LATIN SMALL LETTER A WITH GRAVE +E1 00E1 LATIN SMALL LETTER A WITH ACUTE +E2 00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX +E3 00E3 LATIN SMALL LETTER A WITH TILDE +E4 00E4 LATIN SMALL LETTER A WITH DIAERESIS +E5 00E5 LATIN SMALL LETTER A WITH RING ABOVE +E6 00E6 LATIN SMALL LETTER AE +E7 00E7 LATIN SMALL LETTER C WITH CEDILLA +E8 00E8 LATIN SMALL LETTER E WITH GRAVE +E9 00E9 LATIN SMALL LETTER E WITH ACUTE +EA 00EA LATIN SMALL LETTER E WITH CIRCUMFLEX +EB 00EB LATIN SMALL LETTER E WITH DIAERESIS +EC 00EC LATIN SMALL LETTER I WITH GRAVE +ED 00ED LATIN SMALL LETTER I WITH ACUTE +EE 00EE LATIN SMALL LETTER I WITH CIRCUMFLEX +EF 00EF LATIN SMALL LETTER I WITH DIAERESIS +F0 00F0 LATIN SMALL LETTER ETH +F1 00F1 LATIN SMALL LETTER N WITH TILDE +F2 00F2 LATIN SMALL LETTER O WITH GRAVE +F3 00F3 LATIN SMALL LETTER O WITH ACUTE +F4 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +F5 00F5 LATIN SMALL LETTER O WITH TILDE +F6 00F6 LATIN SMALL LETTER O WITH DIAERESIS +F7 0153 LATIN SMALL LIGATURE OE +F8 00F8 LATIN SMALL LETTER O WITH STROKE +F9 00F9 LATIN SMALL LETTER U WITH GRAVE +FA 00FA LATIN SMALL LETTER U WITH ACUTE +FB 00FB LATIN SMALL LETTER U WITH CIRCUMFLEX +FC 00FC LATIN SMALL LETTER U WITH DIAERESIS +FD 00FD LATIN SMALL LETTER Y WITH ACUTE +FE 00FE LATIN SMALL LETTER THORN +FF 00DF LATIN SMALL LETTER SHARP S diff --git a/libucw/charset/set/ibm b/libucw/charset/set/ibm new file mode 100644 index 0000000..9cf6aaf --- /dev/null +++ b/libucw/charset/set/ibm @@ -0,0 +1,259 @@ +# IBM Charset File +# (c) 1997 Martin Mares + +00 0000 +01 263A WHITE SMILING FACE +02 263B BLACK SMILING FACE +03 2665 BLACK HEART SUIT +04 2666 BLACK DIAMOND SUIT +05 2663 BLACK CLUB SUIT +06 2660 BLACK SPADE SUIT +07 2022 BULLET +08 25D8 INVERSE BULLET +09 25CB WHITE CIRCLE +0A 25D9 INVERSE WHITE CIRCLE +0B 2642 MALE SIGN +0C 2640 FEMALE SIGN +0D 266A EIGHTH NOTE +0E 266B BEAMED EIGHTH NOTES +0F 263C WHITE SUN WITH RAYS +10 25B6 BLACK RIGHT-POINTING TRIANGLE +11 25C0 BLACK LEFT-POINTING TRIANGLE +12 2195 UP DOWN ARROW +13 203C DOUBLE EXCLAMATION MARK +14 00B6 PILCROW SIGN +15 00A7 SECTION SIGN +16 25AC BLACK RECTANGLE +17 21A8 UP DOWN ARROW WITH BASE +18 2191 UPWARDS ARROW +19 2193 DOWNWARDS ARROW +1A 2192 RIGHTWARDS ARROW +1B 2190 LEFTWARDS ARROW +1C 221F RIGHT ANGLE +1D 2194 LEFT RIGHT ARROW +1E 25B2 BLACK UP-POINTING TRIANGLE +1F 25BC BLACK DOWN-POINTING TRIANGLE +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 2302 HOUSE +80 00C7 LATIN CAPITAL LETTER C WITH CEDILLA +81 00FC LATIN SMALL LETTER U WITH DIAERESIS +82 00E9 LATIN SMALL LETTER E WITH ACUTE +83 00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX +84 00E4 LATIN SMALL LETTER A WITH DIAERESIS +85 00E0 LATIN SMALL LETTER A WITH GRAVE +86 00E5 LATIN SMALL LETTER A WITH RING ABOVE +87 00E7 LATIN SMALL LETTER C WITH CEDILLA +88 00EA LATIN SMALL LETTER E WITH CIRCUMFLEX +89 00EB LATIN SMALL LETTER E WITH DIAERESIS +8A 00E8 LATIN SMALL LETTER E WITH GRAVE +8B 00EF LATIN SMALL LETTER I WITH DIAERESIS +8C 00EE LATIN SMALL LETTER I WITH CIRCUMFLEX +8D 00EC LATIN SMALL LETTER I WITH GRAVE +8E 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +8F 00C5 LATIN CAPITAL LETTER A WITH RING ABOVE +90 00C9 LATIN CAPITAL LETTER E WITH ACUTE +91 00E6 LATIN SMALL LETTER AE +92 00C6 LATIN CAPITAL LETTER AE +93 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +94 00F6 LATIN SMALL LETTER O WITH DIAERESIS +95 00F2 LATIN SMALL LETTER O WITH GRAVE +96 00FB LATIN SMALL LETTER U WITH CIRCUMFLEX +97 00F9 LATIN SMALL LETTER U WITH GRAVE +98 00FF LATIN SMALL LETTER Y WITH DIAERESIS +99 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +9A 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +9B 00A2 CENT SIGN +9C 00A3 POUND SIGN +9D 00A5 YEN SIGN +9E 20A7 PESETA SIGN +9F 0192 LATIN SMALL LETTER F WITH HOOK +A0 00E1 LATIN SMALL LETTER A WITH ACUTE +A1 00ED LATIN SMALL LETTER I WITH ACUTE +A2 00F3 LATIN SMALL LETTER O WITH ACUTE +A3 00FA LATIN SMALL LETTER U WITH ACUTE +A4 00F1 LATIN SMALL LETTER N WITH TILDE +A5 00D1 LATIN CAPITAL LETTER N WITH TILDE +A6 00AA FEMININE ORDINAL INDICATOR +A7 00BA MASCULINE ORDINAL INDICATOR +A8 00BF INVERTED QUESTION MARK +A9 2310 REVERSED NOT SIGN +AA 00AC NOT SIGN +AB 00BD VULGAR FRACTION ONE HALF +AC 00BC VULGAR FRACTION ONE QUARTER +AD 00A1 INVERTED EXCLAMATION MARK +AE 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +AF 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +B0 2591 LIGHT SHADE +B1 2592 MEDIUM SHADE +B2 2593 DARK SHADE +B3 2502 BOX DRAWINGS LIGHT VERTICAL +B4 2524 BOX DRAWINGS LIGHT VERTICAL AND LEFT +B5 2561 BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +B6 2562 BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +B7 2556 BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +B8 2555 BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +B9 2563 BOX DRAWINGS DOUBLE VERTICAL AND LEFT +BA 2551 BOX DRAWINGS DOUBLE VERTICAL +BB 2557 BOX DRAWINGS DOUBLE DOWN AND LEFT +BC 255D BOX DRAWINGS DOUBLE UP AND LEFT +BD 255C BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +BE 255B BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +BF 2510 BOX DRAWINGS LIGHT DOWN AND LEFT +C0 2514 BOX DRAWINGS LIGHT UP AND RIGHT +C1 2534 BOX DRAWINGS LIGHT UP AND HORIZONTAL +C2 252C BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +C3 251C BOX DRAWINGS LIGHT VERTICAL AND RIGHT +C4 2500 BOX DRAWINGS LIGHT HORIZONTAL +C5 253C BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +C6 255E BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +C7 255F BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +C8 255A BOX DRAWINGS DOUBLE UP AND RIGHT +C9 2554 BOX DRAWINGS DOUBLE DOWN AND RIGHT +CA 2569 BOX DRAWINGS DOUBLE UP AND HORIZONTAL +CB 2566 BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +CC 2560 BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +CD 2550 BOX DRAWINGS DOUBLE HORIZONTAL +CE 256C BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +CF 2567 BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +D0 2568 BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +D1 2564 BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +D2 2565 BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +D3 2559 BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +D4 2558 BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +D5 2552 BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +D6 2553 BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +D7 256B BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +D8 256A BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +D9 2518 BOX DRAWINGS LIGHT UP AND LEFT +DA 250C BOX DRAWINGS LIGHT DOWN AND RIGHT +DB 2588 FULL BLOCK +DC 2584 LOWER HALF BLOCK +DD 258C LEFT HALF BLOCK +DE 2590 RIGHT HALF BLOCK +DF 2580 UPPER HALF BLOCK +E0 03B1 GREEK SMALL LETTER ALPHA +E1 00DF LATIN SMALL LETTER SHARP S +E2 0393 GREEK CAPITAL LETTER GAMMA +E3 03C0 GREEK SMALL LETTER PI +E4 03A3 GREEK CAPITAL LETTER SIGMA +E5 03C3 GREEK SMALL LETTER SIGMA +E6 00B5 MICRO SIGN +E7 03C4 GREEK SMALL LETTER TAU +E8 03A6 GREEK CAPITAL LETTER PHI +E9 0398 GREEK CAPITAL LETTER THETA +EA 03A9 GREEK CAPITAL LETTER OMEGA +EB 03B4 GREEK SMALL LETTER DELTA +EC 221E INFINITY +ED 03C6 GREEK SMALL LETTER PHI +EE 03B5 GREEK SMALL LETTER EPSILON +EF 2229 INTERSECTION +F0 2261 IDENTICAL TO +F1 00B1 PLUS-MINUS SIGN +F2 2265 GREATER-THAN OR EQUAL TO +F3 2264 LESS-THAN OR EQUAL TO +F4 2320 TOP HALF INTEGRAL +F5 2321 BOTTOM HALF INTEGRAL +F6 00F7 DIVISION SIGN +F7 2248 ALMOST EQUAL TO +F8 00B0 DEGREE SIGN +F9 2219 BULLET OPERATOR +FA 00B7 MIDDLE DOT +FB 221A SQUARE ROOT +FC 207F SUPERSCRIPT LATIN SMALL LETTER N +FD 00B2 SUPERSCRIPT TWO +FE 25A0 BLACK SQUARE +FF 00A0 NO-BREAK SPACE diff --git a/libucw/charset/set/ibm-ctrl b/libucw/charset/set/ibm-ctrl new file mode 100644 index 0000000..f6ee56a --- /dev/null +++ b/libucw/charset/set/ibm-ctrl @@ -0,0 +1,259 @@ +# IBM Charset File With Control Characters +# (c) 1997 Martin Mares + +00 0000 +01 0001 +02 0002 +03 0003 +04 0004 +05 0005 +06 0006 +07 0007 +08 0008 +09 0009 +0A 000A +0B 000B +0C 000C +0D 000D +0E 000E +0F 000F +10 0010 +11 0011 +12 0012 +13 0013 +14 0014 +15 0015 +16 0016 +17 0017 +18 0018 +19 0019 +1A 001A +1B 001B +1C 001C +1D 001D +1E 001E +1F 001F +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 2302 HOUSE +80 00C7 LATIN CAPITAL LETTER C WITH CEDILLA +81 00FC LATIN SMALL LETTER U WITH DIAERESIS +82 00E9 LATIN SMALL LETTER E WITH ACUTE +83 00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX +84 00E4 LATIN SMALL LETTER A WITH DIAERESIS +85 00E0 LATIN SMALL LETTER A WITH GRAVE +86 00E5 LATIN SMALL LETTER A WITH RING ABOVE +87 00E7 LATIN SMALL LETTER C WITH CEDILLA +88 00EA LATIN SMALL LETTER E WITH CIRCUMFLEX +89 00EB LATIN SMALL LETTER E WITH DIAERESIS +8A 00E8 LATIN SMALL LETTER E WITH GRAVE +8B 00EF LATIN SMALL LETTER I WITH DIAERESIS +8C 00EE LATIN SMALL LETTER I WITH CIRCUMFLEX +8D 00EC LATIN SMALL LETTER I WITH GRAVE +8E 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +8F 00C5 LATIN CAPITAL LETTER A WITH RING ABOVE +90 00C9 LATIN CAPITAL LETTER E WITH ACUTE +91 00E6 LATIN SMALL LETTER AE +92 00C6 LATIN CAPITAL LETTER AE +93 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +94 00F6 LATIN SMALL LETTER O WITH DIAERESIS +95 00F2 LATIN SMALL LETTER O WITH GRAVE +96 00FB LATIN SMALL LETTER U WITH CIRCUMFLEX +97 00F9 LATIN SMALL LETTER U WITH GRAVE +98 00FF LATIN SMALL LETTER Y WITH DIAERESIS +99 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +9A 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +9B 00A2 CENT SIGN +9C 00A3 POUND SIGN +9D 00A5 YEN SIGN +9E 20A7 PESETA SIGN +9F 0192 LATIN SMALL LETTER F WITH HOOK +A0 00E1 LATIN SMALL LETTER A WITH ACUTE +A1 00ED LATIN SMALL LETTER I WITH ACUTE +A2 00F3 LATIN SMALL LETTER O WITH ACUTE +A3 00FA LATIN SMALL LETTER U WITH ACUTE +A4 00F1 LATIN SMALL LETTER N WITH TILDE +A5 00D1 LATIN CAPITAL LETTER N WITH TILDE +A6 00AA FEMININE ORDINAL INDICATOR +A7 00BA MASCULINE ORDINAL INDICATOR +A8 00BF INVERTED QUESTION MARK +A9 2310 REVERSED NOT SIGN +AA 00AC NOT SIGN +AB 00BD VULGAR FRACTION ONE HALF +AC 00BC VULGAR FRACTION ONE QUARTER +AD 00A1 INVERTED EXCLAMATION MARK +AE 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +AF 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +B0 2591 LIGHT SHADE +B1 2592 MEDIUM SHADE +B2 2593 DARK SHADE +B3 2502 BOX DRAWINGS LIGHT VERTICAL +B4 2524 BOX DRAWINGS LIGHT VERTICAL AND LEFT +B5 2561 BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +B6 2562 BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +B7 2556 BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +B8 2555 BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +B9 2563 BOX DRAWINGS DOUBLE VERTICAL AND LEFT +BA 2551 BOX DRAWINGS DOUBLE VERTICAL +BB 2557 BOX DRAWINGS DOUBLE DOWN AND LEFT +BC 255D BOX DRAWINGS DOUBLE UP AND LEFT +BD 255C BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +BE 255B BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +BF 2510 BOX DRAWINGS LIGHT DOWN AND LEFT +C0 2514 BOX DRAWINGS LIGHT UP AND RIGHT +C1 2534 BOX DRAWINGS LIGHT UP AND HORIZONTAL +C2 252C BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +C3 251C BOX DRAWINGS LIGHT VERTICAL AND RIGHT +C4 2500 BOX DRAWINGS LIGHT HORIZONTAL +C5 253C BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +C6 255E BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +C7 255F BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +C8 255A BOX DRAWINGS DOUBLE UP AND RIGHT +C9 2554 BOX DRAWINGS DOUBLE DOWN AND RIGHT +CA 2569 BOX DRAWINGS DOUBLE UP AND HORIZONTAL +CB 2566 BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +CC 2560 BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +CD 2550 BOX DRAWINGS DOUBLE HORIZONTAL +CE 256C BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +CF 2567 BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +D0 2568 BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +D1 2564 BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +D2 2565 BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +D3 2559 BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +D4 2558 BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +D5 2552 BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +D6 2553 BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +D7 256B BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +D8 256A BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +D9 2518 BOX DRAWINGS LIGHT UP AND LEFT +DA 250C BOX DRAWINGS LIGHT DOWN AND RIGHT +DB 2588 FULL BLOCK +DC 2584 LOWER HALF BLOCK +DD 258C LEFT HALF BLOCK +DE 2590 RIGHT HALF BLOCK +DF 2580 UPPER HALF BLOCK +E0 03B1 GREEK SMALL LETTER ALPHA +E1 00DF LATIN SMALL LETTER SHARP S +E2 0393 GREEK CAPITAL LETTER GAMMA +E3 03C0 GREEK SMALL LETTER PI +E4 03A3 GREEK CAPITAL LETTER SIGMA +E5 03C3 GREEK SMALL LETTER SIGMA +E6 00B5 MICRO SIGN +E7 03C4 GREEK SMALL LETTER TAU +E8 03A6 GREEK CAPITAL LETTER PHI +E9 0398 GREEK CAPITAL LETTER THETA +EA 03A9 GREEK CAPITAL LETTER OMEGA +EB 03B4 GREEK SMALL LETTER DELTA +EC 221E INFINITY +ED 03C6 GREEK SMALL LETTER PHI +EE 03B5 GREEK SMALL LETTER EPSILON +EF 2229 INTERSECTION +F0 2261 IDENTICAL TO +F1 00B1 PLUS-MINUS SIGN +F2 2265 GREATER-THAN OR EQUAL TO +F3 2264 LESS-THAN OR EQUAL TO +F4 2320 TOP HALF INTEGRAL +F5 2321 BOTTOM HALF INTEGRAL +F6 00F7 DIVISION SIGN +F7 2248 ALMOST EQUAL TO +F8 00B0 DEGREE SIGN +F9 2219 BULLET OPERATOR +FA 00B7 MIDDLE DOT +FB 221A SQUARE ROOT +FC 207F SUPERSCRIPT LATIN SMALL LETTER N +FD 00B2 SUPERSCRIPT TWO +FE 25A0 BLACK SQUARE +FF 00A0 NO-BREAK SPACE diff --git a/libucw/charset/set/kamen b/libucw/charset/set/kamen new file mode 100644 index 0000000..8555c8e --- /dev/null +++ b/libucw/charset/set/kamen @@ -0,0 +1,260 @@ +# Kamenicky Brothers Charset File +# Czech characters extracted from CStoCS "kam.enc" and others copied from +# IBM charset by Martin Mares + +00 0000 +01 263A WHITE SMILING FACE +02 263B BLACK SMILING FACE +03 2665 BLACK HEART SUIT +04 2666 BLACK DIAMOND SUIT +05 2663 BLACK CLUB SUIT +06 2660 BLACK SPADE SUIT +07 2022 BULLET +08 25D8 INVERSE BULLET +09 25CB WHITE CIRCLE +0A 25D9 INVERSE WHITE CIRCLE +0B 2642 MALE SIGN +0C 2640 FEMALE SIGN +0D 266A EIGHTH NOTE +0E 266B BEAMED EIGHTH NOTES +0F 263C WHITE SUN WITH RAYS +10 25B6 BLACK RIGHT-POINTING TRIANGLE +11 25C0 BLACK LEFT-POINTING TRIANGLE +12 2195 UP DOWN ARROW +13 203C DOUBLE EXCLAMATION MARK +14 00B6 PILCROW SIGN +15 00A7 SECTION SIGN +16 25AC BLACK RECTANGLE +17 21A8 UP DOWN ARROW WITH BASE +18 2191 UPWARDS ARROW +19 2193 DOWNWARDS ARROW +1A 2192 RIGHTWARDS ARROW +1B 2190 LEFTWARDS ARROW +1C 221F RIGHT ANGLE +1D 2194 LEFT RIGHT ARROW +1E 25B2 BLACK UP-POINTING TRIANGLE +1F 25BC BLACK DOWN-POINTING TRIANGLE +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 2302 HOUSE +80 010C LATIN CAPITAL LETTER C WITH CARON +81 00FC LATIN SMALL LETTER U WITH DIAERESIS +82 00E9 LATIN SMALL LETTER E WITH ACUTE +83 010F LATIN SMALL LETTER D WITH CARON +84 00E4 LATIN SMALL LETTER A WITH DIAERESIS +85 010E LATIN CAPITAL LETTER D WITH CARON +86 0164 LATIN CAPITAL LETTER T WITH CARON +87 010D LATIN SMALL LETTER C WITH CARON +88 011B LATIN SMALL LETTER E WITH CARON +89 011A LATIN CAPITAL LETTER E WITH CARON +8A 0139 LATIN CAPITAL LETTER L WITH ACUTE +8B 00CD LATIN CAPITAL LETTER I WITH ACUTE +8C 013E LATIN SMALL LETTER L WITH CARON +8D 013A LATIN SMALL LETTER L WITH ACUTE +8E 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +8F 00C1 LATIN CAPITAL LETTER A WITH ACUTE +90 00C9 LATIN CAPITAL LETTER E WITH ACUTE +91 017E LATIN SMALL LETTER Z WITH CARON +92 017D LATIN CAPITAL LETTER Z WITH CARON +93 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +94 00F6 LATIN SMALL LETTER O WITH DIAERESIS +95 00D3 LATIN CAPITAL LETTER O WITH ACUTE +96 016F LATIN SMALL LETTER U WITH RING ABOVE +97 00DA LATIN CAPITAL LETTER U WITH ACUTE +98 00FD LATIN SMALL LETTER Y WITH ACUTE +99 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +9A 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +9B 0160 LATIN CAPITAL LETTER S WITH CARON +9C 013D LATIN CAPITAL LETTER L WITH CARON +9D 00DD LATIN CAPITAL LETTER Y WITH ACUTE +9E 0158 LATIN CAPITAL LETTER R WITH CARON +9F 0165 LATIN SMALL LETTER T WITH CARON +A0 00E1 LATIN SMALL LETTER A WITH ACUTE +A1 00ED LATIN SMALL LETTER I WITH ACUTE +A2 00F3 LATIN SMALL LETTER O WITH ACUTE +A3 00FA LATIN SMALL LETTER U WITH ACUTE +A4 0148 LATIN SMALL LETTER N WITH CARON +A5 0147 LATIN CAPITAL LETTER N WITH CARON +A6 016E LATIN CAPITAL LETTER U WITH RING ABOVE +A7 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX +A8 0161 LATIN SMALL LETTER S WITH CARON +A9 0159 LATIN SMALL LETTER R WITH CARON +AA 0155 LATIN SMALL LETTER R WITH ACUTE +AB 0154 LATIN CAPITAL LETTER R WITH ACUTE +AC 00BC VULGAR FRACTION ONE QUARTER +AD 00A7 SECTION SIGN +AE 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +AF 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +B0 2591 LIGHT SHADE +B1 2592 MEDIUM SHADE +B2 2593 DARK SHADE +B3 2502 BOX DRAWINGS LIGHT VERTICAL +B4 2524 BOX DRAWINGS LIGHT VERTICAL AND LEFT +B5 2561 BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +B6 2562 BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +B7 2556 BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +B8 2555 BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +B9 2563 BOX DRAWINGS DOUBLE VERTICAL AND LEFT +BA 2551 BOX DRAWINGS DOUBLE VERTICAL +BB 2557 BOX DRAWINGS DOUBLE DOWN AND LEFT +BC 255D BOX DRAWINGS DOUBLE UP AND LEFT +BD 255C BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +BE 255B BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +BF 2510 BOX DRAWINGS LIGHT DOWN AND LEFT +C0 2514 BOX DRAWINGS LIGHT UP AND RIGHT +C1 2534 BOX DRAWINGS LIGHT UP AND HORIZONTAL +C2 252C BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +C3 251C BOX DRAWINGS LIGHT VERTICAL AND RIGHT +C4 2500 BOX DRAWINGS LIGHT HORIZONTAL +C5 253C BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +C6 255E BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +C7 255F BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +C8 255A BOX DRAWINGS DOUBLE UP AND RIGHT +C9 2554 BOX DRAWINGS DOUBLE DOWN AND RIGHT +CA 2569 BOX DRAWINGS DOUBLE UP AND HORIZONTAL +CB 2566 BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +CC 2560 BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +CD 2550 BOX DRAWINGS DOUBLE HORIZONTAL +CE 256C BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +CF 2567 BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +D0 2568 BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +D1 2564 BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +D2 2565 BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +D3 2559 BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +D4 2558 BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +D5 2552 BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +D6 2553 BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +D7 256B BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +D8 256A BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +D9 2518 BOX DRAWINGS LIGHT UP AND LEFT +DA 250C BOX DRAWINGS LIGHT DOWN AND RIGHT +DB 2588 FULL BLOCK +DC 2584 LOWER HALF BLOCK +DD 258C LEFT HALF BLOCK +DE 2590 RIGHT HALF BLOCK +DF 2580 UPPER HALF BLOCK +E0 03B1 GREEK SMALL LETTER ALPHA +E1 03B2 GREEK SMALL LETTER BETA +E2 0194 LATIN CAPITAL LETTER GAMMA +E3 03C0 GREEK SMALL LETTER PI +E4 03A3 GREEK CAPITAL LETTER SIGMA +E5 03C3 GREEK SMALL LETTER SIGMA +E6 03BC GREEK SMALL LETTER MU +E7 03C4 GREEK SMALL LETTER TAU +E8 03A6 GREEK CAPITAL LETTER PHI +E9 0398 GREEK CAPITAL LETTER THETA +EA 03A9 GREEK CAPITAL LETTER OMEGA +EB 03B4 GREEK SMALL LETTER DELTA +EC 221E INFINITY +ED 03C6 GREEK SMALL LETTER PHI +EE 2208 ELEMENT OF +EF 2229 INTERSECTION +F0 224D EQUIVALENT TO +F1 00B1 PLUS-MINUS SIGN +F2 2265 GREATER-THAN OR EQUAL TO +F3 2264 LESS-THAN OR EQUAL TO +F4 2320 TOP HALF INTEGRAL +F5 2321 BOTTOM HALF INTEGRAL +F6 00F7 DIVISION SIGN +F7 2248 ALMOST EQUAL TO +F8 00B0 DEGREE SIGN +F9 2219 BULLET OPERATOR +FA 00B7 MIDDLE DOT +FB 221A SQUARE ROOT +FC 207F SUPERSCRIPT LATIN SMALL LETTER N +FD 00B2 SUPERSCRIPT TWO +FE 25A0 BLACK SQUARE +FF 00A0 NO-BREAK SPACE diff --git a/libucw/charset/set/kamen-ctrl b/libucw/charset/set/kamen-ctrl new file mode 100644 index 0000000..da69ead --- /dev/null +++ b/libucw/charset/set/kamen-ctrl @@ -0,0 +1,260 @@ +# Kamenicky Brothers Charset File With Control Characters +# Czech characters extracted from CStoCS "kam.enc" and others copied from +# IBM charset by Martin Mares + +00 0000 +01 0001 +02 0002 +03 0003 +04 0004 +05 0005 +06 0006 +07 0007 +08 0008 +09 0009 +0A 000A +0B 000B +0C 000C +0D 000D +0E 000E +0F 000F +10 0010 +11 0011 +12 0012 +13 0013 +14 0014 +15 0015 +16 0016 +17 0017 +18 0018 +19 0019 +1A 001A +1B 001B +1C 001C +1D 001D +1E 001E +1F 001F +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 2302 HOUSE +80 010C LATIN CAPITAL LETTER C WITH CARON +81 00FC LATIN SMALL LETTER U WITH DIAERESIS +82 00E9 LATIN SMALL LETTER E WITH ACUTE +83 010F LATIN SMALL LETTER D WITH CARON +84 00E4 LATIN SMALL LETTER A WITH DIAERESIS +85 010E LATIN CAPITAL LETTER D WITH CARON +86 0164 LATIN CAPITAL LETTER T WITH CARON +87 010D LATIN SMALL LETTER C WITH CARON +88 011B LATIN SMALL LETTER E WITH CARON +89 011A LATIN CAPITAL LETTER E WITH CARON +8A 0139 LATIN CAPITAL LETTER L WITH ACUTE +8B 00CD LATIN CAPITAL LETTER I WITH ACUTE +8C 013E LATIN SMALL LETTER L WITH CARON +8D 013A LATIN SMALL LETTER L WITH ACUTE +8E 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +8F 00C1 LATIN CAPITAL LETTER A WITH ACUTE +90 00C9 LATIN CAPITAL LETTER E WITH ACUTE +91 017E LATIN SMALL LETTER Z WITH CARON +92 017D LATIN CAPITAL LETTER Z WITH CARON +93 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +94 00F6 LATIN SMALL LETTER O WITH DIAERESIS +95 00D3 LATIN CAPITAL LETTER O WITH ACUTE +96 016F LATIN SMALL LETTER U WITH RING ABOVE +97 00DA LATIN CAPITAL LETTER U WITH ACUTE +98 00FD LATIN SMALL LETTER Y WITH ACUTE +99 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +9A 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +9B 0160 LATIN CAPITAL LETTER S WITH CARON +9C 013D LATIN CAPITAL LETTER L WITH CARON +9D 00DD LATIN CAPITAL LETTER Y WITH ACUTE +9E 0158 LATIN CAPITAL LETTER R WITH CARON +9F 0165 LATIN SMALL LETTER T WITH CARON +A0 00E1 LATIN SMALL LETTER A WITH ACUTE +A1 00ED LATIN SMALL LETTER I WITH ACUTE +A2 00F3 LATIN SMALL LETTER O WITH ACUTE +A3 00FA LATIN SMALL LETTER U WITH ACUTE +A4 0148 LATIN SMALL LETTER N WITH CARON +A5 0147 LATIN CAPITAL LETTER N WITH CARON +A6 016E LATIN CAPITAL LETTER U WITH RING ABOVE +A7 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX +A8 0161 LATIN SMALL LETTER S WITH CARON +A9 0159 LATIN SMALL LETTER R WITH CARON +AA 0155 LATIN SMALL LETTER R WITH ACUTE +AB 0154 LATIN CAPITAL LETTER R WITH ACUTE +AC 00BC VULGAR FRACTION ONE QUARTER +AD 00A7 SECTION SIGN +AE 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +AF 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +B0 2591 LIGHT SHADE +B1 2592 MEDIUM SHADE +B2 2593 DARK SHADE +B3 2502 BOX DRAWINGS LIGHT VERTICAL +B4 2524 BOX DRAWINGS LIGHT VERTICAL AND LEFT +B5 2561 BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +B6 2562 BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +B7 2556 BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +B8 2555 BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +B9 2563 BOX DRAWINGS DOUBLE VERTICAL AND LEFT +BA 2551 BOX DRAWINGS DOUBLE VERTICAL +BB 2557 BOX DRAWINGS DOUBLE DOWN AND LEFT +BC 255D BOX DRAWINGS DOUBLE UP AND LEFT +BD 255C BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +BE 255B BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +BF 2510 BOX DRAWINGS LIGHT DOWN AND LEFT +C0 2514 BOX DRAWINGS LIGHT UP AND RIGHT +C1 2534 BOX DRAWINGS LIGHT UP AND HORIZONTAL +C2 252C BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +C3 251C BOX DRAWINGS LIGHT VERTICAL AND RIGHT +C4 2500 BOX DRAWINGS LIGHT HORIZONTAL +C5 253C BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +C6 255E BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +C7 255F BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +C8 255A BOX DRAWINGS DOUBLE UP AND RIGHT +C9 2554 BOX DRAWINGS DOUBLE DOWN AND RIGHT +CA 2569 BOX DRAWINGS DOUBLE UP AND HORIZONTAL +CB 2566 BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +CC 2560 BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +CD 2550 BOX DRAWINGS DOUBLE HORIZONTAL +CE 256C BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +CF 2567 BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +D0 2568 BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +D1 2564 BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +D2 2565 BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +D3 2559 BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +D4 2558 BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +D5 2552 BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +D6 2553 BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +D7 256B BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +D8 256A BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +D9 2518 BOX DRAWINGS LIGHT UP AND LEFT +DA 250C BOX DRAWINGS LIGHT DOWN AND RIGHT +DB 2588 FULL BLOCK +DC 2584 LOWER HALF BLOCK +DD 258C LEFT HALF BLOCK +DE 2590 RIGHT HALF BLOCK +DF 2580 UPPER HALF BLOCK +E0 03B1 GREEK SMALL LETTER ALPHA +E1 03B2 GREEK SMALL LETTER BETA +E2 0194 LATIN CAPITAL LETTER GAMMA +E3 03C0 GREEK SMALL LETTER PI +E4 03A3 GREEK CAPITAL LETTER SIGMA +E5 03C3 GREEK SMALL LETTER SIGMA +E6 03BC GREEK SMALL LETTER MU +E7 03C4 GREEK SMALL LETTER TAU +E8 03A6 GREEK CAPITAL LETTER PHI +E9 0398 GREEK CAPITAL LETTER THETA +EA 03A9 GREEK CAPITAL LETTER OMEGA +EB 03B4 GREEK SMALL LETTER DELTA +EC 221E INFINITY +ED 03C6 GREEK SMALL LETTER PHI +EE 2208 ELEMENT OF +EF 2229 INTERSECTION +F0 224D EQUIVALENT TO +F1 00B1 PLUS-MINUS SIGN +F2 2265 GREATER-THAN OR EQUAL TO +F3 2264 LESS-THAN OR EQUAL TO +F4 2320 TOP HALF INTEGRAL +F5 2321 BOTTOM HALF INTEGRAL +F6 00F7 DIVISION SIGN +F7 2248 ALMOST EQUAL TO +F8 00B0 DEGREE SIGN +F9 2219 BULLET OPERATOR +FA 00B7 MIDDLE DOT +FB 221A SQUARE ROOT +FC 207F SUPERSCRIPT LATIN SMALL LETTER N +FD 00B2 SUPERSCRIPT TWO +FE 25A0 BLACK SQUARE +FF 00A0 NO-BREAK SPACE diff --git a/libucw/charset/set/koi8 b/libucw/charset/set/koi8 new file mode 100644 index 0000000..cc908d5 --- /dev/null +++ b/libucw/charset/set/koi8 @@ -0,0 +1,259 @@ +# KOI-8 CS Charset File +# Adapted from CStoCS "koi8.enc" by Martin Mares + +00 0000 +01 0001 +02 0002 +03 0003 +04 0004 +05 0005 +06 0006 +07 0007 +08 0008 +09 0009 +0A 000A +0B 000B +0C 000C +0D 000D +0E 000E +0F 000F +10 0010 +11 0011 +12 0012 +13 0013 +14 0014 +15 0015 +16 0016 +17 0017 +18 0018 +19 0019 +1A 001A +1B 001B +1C 001C +1D 001D +1E 001E +1F 001F +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F +80 FFFD REPLACEMENT CHARACTER +81 FFFD REPLACEMENT CHARACTER +82 FFFD REPLACEMENT CHARACTER +83 FFFD REPLACEMENT CHARACTER +84 FFFD REPLACEMENT CHARACTER +85 FFFD REPLACEMENT CHARACTER +86 FFFD REPLACEMENT CHARACTER +87 FFFD REPLACEMENT CHARACTER +88 FFFD REPLACEMENT CHARACTER +89 FFFD REPLACEMENT CHARACTER +8A FFFD REPLACEMENT CHARACTER +8B FFFD REPLACEMENT CHARACTER +8C FFFD REPLACEMENT CHARACTER +8D FFFD REPLACEMENT CHARACTER +8E FFFD REPLACEMENT CHARACTER +8F FFFD REPLACEMENT CHARACTER +90 FFFD REPLACEMENT CHARACTER +91 FFFD REPLACEMENT CHARACTER +92 FFFD REPLACEMENT CHARACTER +93 FFFD REPLACEMENT CHARACTER +94 FFFD REPLACEMENT CHARACTER +95 FFFD REPLACEMENT CHARACTER +96 FFFD REPLACEMENT CHARACTER +97 FFFD REPLACEMENT CHARACTER +98 FFFD REPLACEMENT CHARACTER +99 FFFD REPLACEMENT CHARACTER +9A FFFD REPLACEMENT CHARACTER +9B FFFD REPLACEMENT CHARACTER +9C FFFD REPLACEMENT CHARACTER +9D FFFD REPLACEMENT CHARACTER +9E FFFD REPLACEMENT CHARACTER +9F FFFD REPLACEMENT CHARACTER +A0 FFFD REPLACEMENT CHARACTER +A1 FFFD REPLACEMENT CHARACTER +A2 FFFD REPLACEMENT CHARACTER +A3 FFFD REPLACEMENT CHARACTER +A4 FFFD REPLACEMENT CHARACTER +A5 FFFD REPLACEMENT CHARACTER +A6 FFFD REPLACEMENT CHARACTER +A7 FFFD REPLACEMENT CHARACTER +A8 FFFD REPLACEMENT CHARACTER +A9 FFFD REPLACEMENT CHARACTER +AA FFFD REPLACEMENT CHARACTER +AB FFFD REPLACEMENT CHARACTER +AC FFFD REPLACEMENT CHARACTER +AD FFFD REPLACEMENT CHARACTER +AE FFFD REPLACEMENT CHARACTER +AF FFFD REPLACEMENT CHARACTER +B0 FFFD REPLACEMENT CHARACTER +B1 FFFD REPLACEMENT CHARACTER +B2 FFFD REPLACEMENT CHARACTER +B3 FFFD REPLACEMENT CHARACTER +B4 FFFD REPLACEMENT CHARACTER +B5 FFFD REPLACEMENT CHARACTER +B6 FFFD REPLACEMENT CHARACTER +B7 FFFD REPLACEMENT CHARACTER +B8 FFFD REPLACEMENT CHARACTER +B9 FFFD REPLACEMENT CHARACTER +BA FFFD REPLACEMENT CHARACTER +BB FFFD REPLACEMENT CHARACTER +BC FFFD REPLACEMENT CHARACTER +BD FFFD REPLACEMENT CHARACTER +BE FFFD REPLACEMENT CHARACTER +BF FFFD REPLACEMENT CHARACTER +C0 FFFD REPLACEMENT CHARACTER +C1 00E1 LATIN SMALL LETTER A WITH ACUTE +C2 FFFD REPLACEMENT CHARACTER +C3 010D LATIN SMALL LETTER C WITH CARON +C4 010F LATIN SMALL LETTER D WITH CARON +C5 011B LATIN SMALL LETTER E WITH CARON +C6 0155 LATIN SMALL LETTER R WITH ACUTE +C7 F002 ???? +C8 00FC LATIN SMALL LETTER U WITH DIAERESIS +C9 00ED LATIN SMALL LETTER I WITH ACUTE +CA 016F LATIN SMALL LETTER U WITH RING ABOVE +CB 013A LATIN SMALL LETTER L WITH ACUTE +CC 013E LATIN SMALL LETTER L WITH CARON +CD 00F6 LATIN SMALL LETTER O WITH DIAERESIS +CE 0148 LATIN SMALL LETTER N WITH CARON +CF 00F3 LATIN SMALL LETTER O WITH ACUTE +D0 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +D1 00E4 LATIN SMALL LETTER A WITH DIAERESIS +D2 0159 LATIN SMALL LETTER R WITH CARON +D3 0161 LATIN SMALL LETTER S WITH CARON +D4 0165 LATIN SMALL LETTER T WITH CARON +D5 00FA LATIN SMALL LETTER U WITH ACUTE +D6 FFFD REPLACEMENT CHARACTER +D7 00E9 LATIN SMALL LETTER E WITH ACUTE +D8 00E0 LATIN SMALL LETTER A WITH GRAVE +D9 00FD LATIN SMALL LETTER Y WITH ACUTE +DA 017E LATIN SMALL LETTER Z WITH CARON +DB FFFD REPLACEMENT CHARACTER +DC 030C COMBINING CARON +DD FFFD REPLACEMENT CHARACTER +DE 0302 COMBINING CIRCUMFLEX ACCENT +DF FFFD REPLACEMENT CHARACTER +E0 00B4 ACUTE ACCENT +E1 00C1 LATIN CAPITAL LETTER A WITH ACUTE +E2 FFFD REPLACEMENT CHARACTER +E3 010C LATIN CAPITAL LETTER C WITH CARON +E4 010E LATIN CAPITAL LETTER D WITH CARON +E5 011A LATIN CAPITAL LETTER E WITH CARON +E6 0154 LATIN CAPITAL LETTER R WITH ACUTE +E7 F000 ???? +E8 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +E9 00CD LATIN CAPITAL LETTER I WITH ACUTE +EA 016E LATIN CAPITAL LETTER U WITH RING ABOVE +EB 0139 LATIN CAPITAL LETTER L WITH ACUTE +EC 013D LATIN CAPITAL LETTER L WITH CARON +ED 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +EE 0147 LATIN CAPITAL LETTER N WITH CARON +EF 00D3 LATIN CAPITAL LETTER O WITH ACUTE +F0 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX +F1 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +F2 0158 LATIN CAPITAL LETTER R WITH CARON +F3 0160 LATIN CAPITAL LETTER S WITH CARON +F4 0164 LATIN CAPITAL LETTER T WITH CARON +F5 00DA LATIN CAPITAL LETTER U WITH ACUTE +F6 FFFD REPLACEMENT CHARACTER +F7 00C9 LATIN CAPITAL LETTER E WITH ACUTE +F8 0102 LATIN CAPITAL LETTER A WITH BREVE +F9 00DD LATIN CAPITAL LETTER Y WITH ACUTE +FA 017D LATIN CAPITAL LETTER Z WITH CARON +FB FFFD REPLACEMENT CHARACTER +FC FFFD REPLACEMENT CHARACTER +FD FFFD REPLACEMENT CHARACTER +FE 00B0 DEGREE SIGN +FF FFFD REPLACEMENT CHARACTER diff --git a/libucw/charset/set/mac b/libucw/charset/set/mac new file mode 100644 index 0000000..004a7d9 --- /dev/null +++ b/libucw/charset/set/mac @@ -0,0 +1,259 @@ +# Macintosh Charset File +# Adapted from CStoCS "mac.enc" by Martin Mares + +00 0000 +01 0001 +02 0002 +03 0003 +04 0004 +05 0005 +06 0006 +07 0007 +08 0008 +09 0009 +0A 000A +0B 000B +0C 000C +0D 000D +0E 000E +0F 000F +10 0010 +11 0011 +12 0012 +13 0013 +14 0014 +15 0015 +16 0016 +17 0017 +18 0018 +19 0019 +1A 001A +1B 001B +1C 001C +1D 001D +1E 001E +1F 001F +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F +80 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +81 00C5 LATIN CAPITAL LETTER A WITH RING ABOVE +82 00C7 LATIN CAPITAL LETTER C WITH CEDILLA +83 00C9 LATIN CAPITAL LETTER E WITH ACUTE +84 00D1 LATIN CAPITAL LETTER N WITH TILDE +85 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +86 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +87 00E1 LATIN SMALL LETTER A WITH ACUTE +88 00E0 LATIN SMALL LETTER A WITH GRAVE +89 00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX +8A 00E4 LATIN SMALL LETTER A WITH DIAERESIS +8B 00E3 LATIN SMALL LETTER A WITH TILDE +8C 00E5 LATIN SMALL LETTER A WITH RING ABOVE +8D 00E7 LATIN SMALL LETTER C WITH CEDILLA +8E 00E9 LATIN SMALL LETTER E WITH ACUTE +8F 00E8 LATIN SMALL LETTER E WITH GRAVE +90 00EA LATIN SMALL LETTER E WITH CIRCUMFLEX +91 00EB LATIN SMALL LETTER E WITH DIAERESIS +92 00ED LATIN SMALL LETTER I WITH ACUTE +93 00EC LATIN SMALL LETTER I WITH GRAVE +94 00EE LATIN SMALL LETTER I WITH CIRCUMFLEX +95 00EF LATIN SMALL LETTER I WITH DIAERESIS +96 00F1 LATIN SMALL LETTER N WITH TILDE +97 00F3 LATIN SMALL LETTER O WITH ACUTE +98 00F2 LATIN SMALL LETTER O WITH GRAVE +99 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +9A 00F6 LATIN SMALL LETTER O WITH DIAERESIS +9B 00F5 LATIN SMALL LETTER O WITH TILDE +9C 00FA LATIN SMALL LETTER U WITH ACUTE +9D 00F9 LATIN SMALL LETTER U WITH GRAVE +9E 00FB LATIN SMALL LETTER U WITH CIRCUMFLEX +9F 00FC LATIN SMALL LETTER U WITH DIAERESIS +A0 2020 DAGGER +A1 00B0 DEGREE SIGN +A2 00A2 CENT SIGN +A3 00A3 POUND SIGN +A4 00A7 SECTION SIGN +A5 2022 BULLET +A6 00B6 PILCROW SIGN +A7 00DF LATIN SMALL LETTER SHARP S +A8 00AE REGISTERED SIGN +A9 00A9 COPYRIGHT SIGN +AA 2122 TRADE MARK SIGN +AB 00B4 ACUTE ACCENT +AC 0308 COMBINING DIAERESIS +AD 2260 NOT EQUAL TO +AE 00C6 LATIN CAPITAL LETTER AE +AF 00D8 LATIN CAPITAL LETTER O WITH STROKE +B0 221E INFINITY +B1 00B1 PLUS-MINUS SIGN +B2 2264 LESS-THAN OR EQUAL TO +B3 2265 GREATER-THAN OR EQUAL TO +B4 00A5 YEN SIGN +B5 03BC GREEK SMALL LETTER MU +B6 2202 PARTIAL DIFFERENTIAL +B7 2211 N-ARY SUMMATION +B8 220F N-ARY PRODUCT +B9 03C0 GREEK SMALL LETTER PI +BA 222B INTEGRAL +BB 00AA FEMININE ORDINAL INDICATOR +BC 00BA MASCULINE ORDINAL INDICATOR +BD 03A9 GREEK CAPITAL LETTER OMEGA +BE 00E6 LATIN SMALL LETTER AE +BF 00F8 LATIN SMALL LETTER O WITH STROKE +C0 00BF INVERTED QUESTION MARK +C1 00A1 INVERTED EXCLAMATION MARK +C2 00AC NOT SIGN +C3 221A SQUARE ROOT +C4 F003 ???? +C5 2248 ALMOST EQUAL TO +C6 0394 GREEK CAPITAL LETTER DELTA +C7 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +C8 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +C9 0EAF LAO ELLIPSIS +CA 00A0 NO-BREAK SPACE +CB 00C0 LATIN CAPITAL LETTER A WITH GRAVE +CC 00C3 LATIN CAPITAL LETTER A WITH TILDE +CD 00D5 LATIN CAPITAL LETTER O WITH TILDE +CE 0152 LATIN CAPITAL LIGATURE OE +CF 0153 LATIN SMALL LIGATURE OE +D0 2014 EM DASH +D1 2013 EN DASH +D2 201C LEFT DOUBLE QUOTATION MARK +D3 201D RIGHT DOUBLE QUOTATION MARK +D4 2018 LEFT SINGLE QUOTATION MARK +D5 2019 RIGHT SINGLE QUOTATION MARK +D6 00F7 DIVISION SIGN +D7 22C4 DIAMOND OPERATOR +D8 00FF LATIN SMALL LETTER Y WITH DIAERESIS +D9 0178 LATIN CAPITAL LETTER Y WITH DIAERESIS +DA 2044 FRACTION SLASH +DB 00A4 CURRENCY SIGN +DC 2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK +DD 203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +DE FB01 LATIN SMALL LIGATURE FI +DF FB02 LATIN SMALL LIGATURE FL +E0 2021 DOUBLE DAGGER +E1 00B7 MIDDLE DOT +E2 201A SINGLE LOW-9 QUOTATION MARK +E3 201E DOUBLE LOW-9 QUOTATION MARK +E4 2030 PER MILLE SIGN +E5 00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX +E6 00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX +E7 00C1 LATIN CAPITAL LETTER A WITH ACUTE +E8 00CB LATIN CAPITAL LETTER E WITH DIAERESIS +E9 00C8 LATIN CAPITAL LETTER E WITH GRAVE +EA 00CD LATIN CAPITAL LETTER I WITH ACUTE +EB 00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX +EC 00CF LATIN CAPITAL LETTER I WITH DIAERESIS +ED 00CC LATIN CAPITAL LETTER I WITH GRAVE +EE 00D3 LATIN CAPITAL LETTER O WITH ACUTE +EF 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX +F0 FFFD REPLACEMENT CHARACTER +F1 00D2 LATIN CAPITAL LETTER O WITH GRAVE +F2 00DA LATIN CAPITAL LETTER U WITH ACUTE +F3 00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX +F4 00D9 LATIN CAPITAL LETTER U WITH GRAVE +F5 0131 LATIN SMALL LETTER DOTLESS I +F6 0302 COMBINING CIRCUMFLEX ACCENT +F7 0303 COMBINING TILDE +F8 0304 COMBINING MACRON +F9 0306 COMBINING BREVE +FA 02D9 DOT ABOVE +FB 030A COMBINING RING ABOVE +FC 0327 COMBINING CEDILLA +FD 030B COMBINING DOUBLE ACUTE ACCENT +FE 0328 COMBINING OGONEK +FF 030C COMBINING CARON diff --git a/libucw/charset/set/macce b/libucw/charset/set/macce new file mode 100644 index 0000000..6e4f3b1 --- /dev/null +++ b/libucw/charset/set/macce @@ -0,0 +1,259 @@ +# Czech Macintosh Charset File +# Adapted from CStoCS "macce.enc" by Martin Mares + +00 0000 +01 0001 +02 0002 +03 0003 +04 0004 +05 0005 +06 0006 +07 0007 +08 0008 +09 0009 +0A 000A +0B 000B +0C 000C +0D 000D +0E 000E +0F 000F +10 0010 +11 0011 +12 0012 +13 0013 +14 0014 +15 0015 +16 0016 +17 0017 +18 0018 +19 0019 +1A 001A +1B 001B +1C 001C +1D 001D +1E 001E +1F 001F +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F +80 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +81 0100 LATIN CAPITAL LETTER A WITH MACRON +82 0101 LATIN SMALL LETTER A WITH MACRON +83 00C9 LATIN CAPITAL LETTER E WITH ACUTE +84 0104 LATIN CAPITAL LETTER A WITH OGONEK +85 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +86 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +87 00E1 LATIN SMALL LETTER A WITH ACUTE +88 0105 LATIN SMALL LETTER A WITH OGONEK +89 010C LATIN CAPITAL LETTER C WITH CARON +8A 00E4 LATIN SMALL LETTER A WITH DIAERESIS +8B 010D LATIN SMALL LETTER C WITH CARON +8C 0106 LATIN CAPITAL LETTER C WITH ACUTE +8D 0107 LATIN SMALL LETTER C WITH ACUTE +8E 00E9 LATIN SMALL LETTER E WITH ACUTE +8F 0179 LATIN CAPITAL LETTER Z WITH ACUTE +90 017A LATIN SMALL LETTER Z WITH ACUTE +91 010E LATIN CAPITAL LETTER D WITH CARON +92 00ED LATIN SMALL LETTER I WITH ACUTE +93 010F LATIN SMALL LETTER D WITH CARON +94 0112 LATIN CAPITAL LETTER E WITH MACRON +95 0113 LATIN SMALL LETTER E WITH MACRON +96 0116 LATIN CAPITAL LETTER E WITH DOT ABOVE +97 00F3 LATIN SMALL LETTER O WITH ACUTE +98 0117 LATIN SMALL LETTER E WITH DOT ABOVE +99 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +9A 00F6 LATIN SMALL LETTER O WITH DIAERESIS +9B 00F5 LATIN SMALL LETTER O WITH TILDE +9C 00FA LATIN SMALL LETTER U WITH ACUTE +9D 011A LATIN CAPITAL LETTER E WITH CARON +9E 011B LATIN SMALL LETTER E WITH CARON +9F 00FC LATIN SMALL LETTER U WITH DIAERESIS +A0 2020 DAGGER +A1 00B0 DEGREE SIGN +A2 0118 LATIN CAPITAL LETTER E WITH OGONEK +A3 00A3 POUND SIGN +A4 00A7 SECTION SIGN +A5 2022 BULLET +A6 00B6 PILCROW SIGN +A7 00DF LATIN SMALL LETTER SHARP S +A8 00AE REGISTERED SIGN +A9 00A9 COPYRIGHT SIGN +AA 2122 TRADE MARK SIGN +AB 0119 LATIN SMALL LETTER E WITH OGONEK +AC 0308 COMBINING DIAERESIS +AD 2260 NOT EQUAL TO +AE 01F5 LATIN SMALL LETTER G WITH ACUTE +AF 012E LATIN CAPITAL LETTER I WITH OGONEK +B0 012F LATIN SMALL LETTER I WITH OGONEK +B1 012A LATIN CAPITAL LETTER I WITH MACRON +B2 2264 LESS-THAN OR EQUAL TO +B3 2265 GREATER-THAN OR EQUAL TO +B4 012B LATIN SMALL LETTER I WITH MACRON +B5 0136 LATIN CAPITAL LETTER K WITH CEDILLA +B6 2202 PARTIAL DIFFERENTIAL +B7 2211 N-ARY SUMMATION +B8 0142 LATIN SMALL LETTER L WITH STROKE +B9 013B LATIN CAPITAL LETTER L WITH CEDILLA +BA 013C LATIN SMALL LETTER L WITH CEDILLA +BB 013D LATIN CAPITAL LETTER L WITH CARON +BC 013E LATIN SMALL LETTER L WITH CARON +BD 0139 LATIN CAPITAL LETTER L WITH ACUTE +BE 013A LATIN SMALL LETTER L WITH ACUTE +BF 0145 LATIN CAPITAL LETTER N WITH CEDILLA +C0 0146 LATIN SMALL LETTER N WITH CEDILLA +C1 0143 LATIN CAPITAL LETTER N WITH ACUTE +C2 00AC NOT SIGN +C3 221A SQUARE ROOT +C4 0144 LATIN SMALL LETTER N WITH ACUTE +C5 0147 LATIN CAPITAL LETTER N WITH CARON +C6 0394 GREEK CAPITAL LETTER DELTA +C7 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +C8 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +C9 0EAF LAO ELLIPSIS +CA 00A0 NO-BREAK SPACE +CB 0148 LATIN SMALL LETTER N WITH CARON +CC 0150 LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +CD 00D5 LATIN CAPITAL LETTER O WITH TILDE +CE 0151 LATIN SMALL LETTER O WITH DOUBLE ACUTE +CF 014C LATIN CAPITAL LETTER O WITH MACRON +D0 2014 EM DASH +D1 2013 EN DASH +D2 201C LEFT DOUBLE QUOTATION MARK +D3 201D RIGHT DOUBLE QUOTATION MARK +D4 2018 LEFT SINGLE QUOTATION MARK +D5 2019 RIGHT SINGLE QUOTATION MARK +D6 00F7 DIVISION SIGN +D7 22C4 DIAMOND OPERATOR +D8 014D LATIN SMALL LETTER O WITH MACRON +D9 0154 LATIN CAPITAL LETTER R WITH ACUTE +DA 0155 LATIN SMALL LETTER R WITH ACUTE +DB 0158 LATIN CAPITAL LETTER R WITH CARON +DC 2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK +DD 203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +DE 0159 LATIN SMALL LETTER R WITH CARON +DF 0156 LATIN CAPITAL LETTER R WITH CEDILLA +E0 0157 LATIN SMALL LETTER R WITH CEDILLA +E1 0160 LATIN CAPITAL LETTER S WITH CARON +E2 201A SINGLE LOW-9 QUOTATION MARK +E3 201E DOUBLE LOW-9 QUOTATION MARK +E4 0161 LATIN SMALL LETTER S WITH CARON +E5 015A LATIN CAPITAL LETTER S WITH ACUTE +E6 015B LATIN SMALL LETTER S WITH ACUTE +E7 00C1 LATIN CAPITAL LETTER A WITH ACUTE +E8 0164 LATIN CAPITAL LETTER T WITH CARON +E9 0165 LATIN SMALL LETTER T WITH CARON +EA 00CD LATIN CAPITAL LETTER I WITH ACUTE +EB 017D LATIN CAPITAL LETTER Z WITH CARON +EC 017E LATIN SMALL LETTER Z WITH CARON +ED 016A LATIN CAPITAL LETTER U WITH MACRON +EE 00D3 LATIN CAPITAL LETTER O WITH ACUTE +EF 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX +F0 016B LATIN SMALL LETTER U WITH MACRON +F1 016E LATIN CAPITAL LETTER U WITH RING ABOVE +F2 00DA LATIN CAPITAL LETTER U WITH ACUTE +F3 016F LATIN SMALL LETTER U WITH RING ABOVE +F4 0170 LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +F5 0171 LATIN SMALL LETTER U WITH DOUBLE ACUTE +F6 0172 LATIN CAPITAL LETTER U WITH OGONEK +F7 0173 LATIN SMALL LETTER U WITH OGONEK +F8 00DD LATIN CAPITAL LETTER Y WITH ACUTE +F9 00FD LATIN SMALL LETTER Y WITH ACUTE +FA 0137 LATIN SMALL LETTER K WITH CEDILLA +FB 017B LATIN CAPITAL LETTER Z WITH DOT ABOVE +FC 0141 LATIN CAPITAL LETTER L WITH STROKE +FD 017C LATIN SMALL LETTER Z WITH DOT ABOVE +FE 0122 LATIN CAPITAL LETTER G WITH CEDILLA +FF 030C COMBINING CARON diff --git a/libucw/charset/set/pc-latin-2 b/libucw/charset/set/pc-latin-2 new file mode 100644 index 0000000..8327ceb --- /dev/null +++ b/libucw/charset/set/pc-latin-2 @@ -0,0 +1,260 @@ +# PC Latin-2 Charset File +# Adapted from CStoCS "pc2.enc" and gaps filled by info from the +# IBM charset by Martin Mares + +00 0000 +01 0001 +02 0002 +03 0003 +04 0004 +05 0005 +06 0006 +07 0007 +08 0008 +09 0009 +0A 000A +0B 000B +0C 000C +0D 000D +0E 000E +0F 000F +10 0010 +11 0011 +12 0012 +13 0013 +14 0014 +15 0015 +16 0016 +17 0017 +18 0018 +19 0019 +1A 001A +1B 001B +1C 001C +1D 001D +1E 001E +1F 001F +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 2302 HOUSE +80 00C7 LATIN CAPITAL LETTER C WITH CEDILLA +81 00FC LATIN SMALL LETTER U WITH DIAERESIS +82 00E9 LATIN SMALL LETTER E WITH ACUTE +83 00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX +84 00E4 LATIN SMALL LETTER A WITH DIAERESIS +85 016F LATIN SMALL LETTER U WITH RING ABOVE +86 0107 LATIN SMALL LETTER C WITH ACUTE +87 00E7 LATIN SMALL LETTER C WITH CEDILLA +88 0142 LATIN SMALL LETTER L WITH STROKE +89 00EB LATIN SMALL LETTER E WITH DIAERESIS +8A 0150 LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +8B 0151 LATIN SMALL LETTER O WITH DOUBLE ACUTE +8C 00EE LATIN SMALL LETTER I WITH CIRCUMFLEX +8D 0179 LATIN CAPITAL LETTER Z WITH ACUTE +8E 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +8F 0106 LATIN CAPITAL LETTER C WITH ACUTE +90 00C9 LATIN CAPITAL LETTER E WITH ACUTE +91 0139 LATIN CAPITAL LETTER L WITH ACUTE +92 013A LATIN SMALL LETTER L WITH ACUTE +93 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +94 00F6 LATIN SMALL LETTER O WITH DIAERESIS +95 013D LATIN CAPITAL LETTER L WITH CARON +96 013E LATIN SMALL LETTER L WITH CARON +97 015A LATIN CAPITAL LETTER S WITH ACUTE +98 015B LATIN SMALL LETTER S WITH ACUTE +99 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +9A 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +9B 0164 LATIN CAPITAL LETTER T WITH CARON +9C 0165 LATIN SMALL LETTER T WITH CARON +9D 0141 LATIN CAPITAL LETTER L WITH STROKE +9E 00D7 MULTIPLICATION SIGN +9F 010D LATIN SMALL LETTER C WITH CARON +A0 00E1 LATIN SMALL LETTER A WITH ACUTE +A1 00ED LATIN SMALL LETTER I WITH ACUTE +A2 00F3 LATIN SMALL LETTER O WITH ACUTE +A3 00FA LATIN SMALL LETTER U WITH ACUTE +A4 0104 LATIN CAPITAL LETTER A WITH OGONEK +A5 0105 LATIN SMALL LETTER A WITH OGONEK +A6 017D LATIN CAPITAL LETTER Z WITH CARON +A7 017E LATIN SMALL LETTER Z WITH CARON +A8 0118 LATIN CAPITAL LETTER E WITH OGONEK +A9 0119 LATIN SMALL LETTER E WITH OGONEK +AA 00AC NOT SIGN +AB 017A LATIN SMALL LETTER Z WITH ACUTE +AC 010C LATIN CAPITAL LETTER C WITH CARON +AD 015F LATIN SMALL LETTER S WITH CEDILLA +AE 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +AF 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +B0 2591 LIGHT SHADE +B1 2592 MEDIUM SHADE +B2 2593 DARK SHADE +B3 2502 BOX DRAWINGS LIGHT VERTICAL +B4 2524 BOX DRAWINGS LIGHT VERTICAL AND LEFT +B5 00C1 LATIN CAPITAL LETTER A WITH ACUTE +B6 00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX +B7 011A LATIN CAPITAL LETTER E WITH CARON +B8 015E LATIN CAPITAL LETTER S WITH CEDILLA +B9 2563 BOX DRAWINGS DOUBLE VERTICAL AND LEFT +BA 2551 BOX DRAWINGS DOUBLE VERTICAL +BB 2557 BOX DRAWINGS DOUBLE DOWN AND LEFT +BC 255D BOX DRAWINGS DOUBLE UP AND LEFT +BD 017B LATIN CAPITAL LETTER Z WITH DOT ABOVE +BE 017C LATIN SMALL LETTER Z WITH DOT ABOVE +BF 2510 BOX DRAWINGS LIGHT DOWN AND LEFT +C0 2514 BOX DRAWINGS LIGHT UP AND RIGHT +C1 2534 BOX DRAWINGS LIGHT UP AND HORIZONTAL +C2 252C BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +C3 251C BOX DRAWINGS LIGHT VERTICAL AND RIGHT +C4 2500 BOX DRAWINGS LIGHT HORIZONTAL +C5 253C BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +C6 0102 LATIN CAPITAL LETTER A WITH BREVE +C7 0103 LATIN SMALL LETTER A WITH BREVE +C8 255A BOX DRAWINGS DOUBLE UP AND RIGHT +C9 2554 BOX DRAWINGS DOUBLE DOWN AND RIGHT +CA 2569 BOX DRAWINGS DOUBLE UP AND HORIZONTAL +CB 2566 BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +CC 2560 BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +CD 2550 BOX DRAWINGS DOUBLE HORIZONTAL +CE 256C BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +CF 00A4 CURRENCY SIGN +D0 00F0 LATIN SMALL LETTER ETH +D1 00D0 LATIN CAPITAL LETTER ETH +D2 010E LATIN CAPITAL LETTER D WITH CARON +D3 00CB LATIN CAPITAL LETTER E WITH DIAERESIS +D4 010F LATIN SMALL LETTER D WITH CARON +D5 0147 LATIN CAPITAL LETTER N WITH CARON +D6 00CD LATIN CAPITAL LETTER I WITH ACUTE +D7 00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX +D8 011B LATIN SMALL LETTER E WITH CARON +D9 2518 BOX DRAWINGS LIGHT UP AND LEFT +DA 250C BOX DRAWINGS LIGHT DOWN AND RIGHT +DB 2588 FULL BLOCK +DC 2584 LOWER HALF BLOCK +DD 0162 LATIN CAPITAL LETTER T WITH CEDILLA +DE 016E LATIN CAPITAL LETTER U WITH RING ABOVE +DF 2580 UPPER HALF BLOCK +E0 00D3 LATIN CAPITAL LETTER O WITH ACUTE +E1 00DF LATIN SMALL LETTER SHARP S +E2 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX +E3 0143 LATIN CAPITAL LETTER N WITH ACUTE +E4 0144 LATIN SMALL LETTER N WITH ACUTE +E5 0148 LATIN SMALL LETTER N WITH CARON +E6 0160 LATIN CAPITAL LETTER S WITH CARON +E7 0161 LATIN SMALL LETTER S WITH CARON +E8 0154 LATIN CAPITAL LETTER R WITH ACUTE +E9 00DA LATIN CAPITAL LETTER U WITH ACUTE +EA 0155 LATIN SMALL LETTER R WITH ACUTE +EB 0170 LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +EC 00FD LATIN SMALL LETTER Y WITH ACUTE +ED 00DD LATIN CAPITAL LETTER Y WITH ACUTE +EE 0163 LATIN SMALL LETTER T WITH CEDILLA +EF 00B4 ACUTE ACCENT +F0 2261 IDENTICAL TO +F1 030B COMBINING DOUBLE ACUTE ACCENT +F2 0328 COMBINING OGONEK +F3 030C COMBINING CARON +F4 0306 COMBINING BREVE +F5 00A7 SECTION SIGN +F6 00F7 DIVISION SIGN +F7 0327 COMBINING CEDILLA +F8 FFFD REPLACEMENT CHARACTER +F9 0308 COMBINING DIAERESIS +FA 02D9 DOT ABOVE +FB 0171 LATIN SMALL LETTER U WITH DOUBLE ACUTE +FC 0158 LATIN CAPITAL LETTER R WITH CARON +FD 0159 LATIN SMALL LETTER R WITH CARON +FE 25A0 BLACK SQUARE +FF 00A0 NO-BREAK SPACE diff --git a/libucw/charset/set/win-1250 b/libucw/charset/set/win-1250 new file mode 100644 index 0000000..a934ef6 --- /dev/null +++ b/libucw/charset/set/win-1250 @@ -0,0 +1,255 @@ +# CP1250.TXT charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT +# (c) 2003, Robert Spalek + +00 0000 NULL +01 0001 START OF HEADING +02 0002 START OF TEXT +03 0003 END OF TEXT +04 0004 END OF TRANSMISSION +05 0005 ENQUIRY +06 0006 ACKNOWLEDGE +07 0007 BELL +08 0008 BACKSPACE +09 0009 HORIZONTAL TABULATION +0A 000A LINE FEED +0B 000B VERTICAL TABULATION +0C 000C FORM FEED +0D 000D CARRIAGE RETURN +0E 000E SHIFT OUT +0F 000F SHIFT IN +10 0010 DATA LINK ESCAPE +11 0011 DEVICE CONTROL ONE +12 0012 DEVICE CONTROL TWO +13 0013 DEVICE CONTROL THREE +14 0014 DEVICE CONTROL FOUR +15 0015 NEGATIVE ACKNOWLEDGE +16 0016 SYNCHRONOUS IDLE +17 0017 END OF TRANSMISSION BLOCK +18 0018 CANCEL +19 0019 END OF MEDIUM +1A 001A SUBSTITUTE +1B 001B ESCAPE +1C 001C FILE SEPARATOR +1D 001D GROUP SEPARATOR +1E 001E RECORD SEPARATOR +1F 001F UNIT SEPARATOR +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F DELETE +80 20AC EURO SIGN +82 201A SINGLE LOW-9 QUOTATION MARK +84 201E DOUBLE LOW-9 QUOTATION MARK +85 2026 HORIZONTAL ELLIPSIS +86 2020 DAGGER +87 2021 DOUBLE DAGGER +89 2030 PER MILLE SIGN +8A 0160 LATIN CAPITAL LETTER S WITH CARON +8B 2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK +8C 015A LATIN CAPITAL LETTER S WITH ACUTE +8D 0164 LATIN CAPITAL LETTER T WITH CARON +8E 017D LATIN CAPITAL LETTER Z WITH CARON +8F 0179 LATIN CAPITAL LETTER Z WITH ACUTE +91 2018 LEFT SINGLE QUOTATION MARK +92 2019 RIGHT SINGLE QUOTATION MARK +93 201C LEFT DOUBLE QUOTATION MARK +94 201D RIGHT DOUBLE QUOTATION MARK +95 2022 BULLET +96 2013 EN DASH +97 2014 EM DASH +99 2122 TRADE MARK SIGN +9A 0161 LATIN SMALL LETTER S WITH CARON +9B 203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +9C 015B LATIN SMALL LETTER S WITH ACUTE +9D 0165 LATIN SMALL LETTER T WITH CARON +9E 017E LATIN SMALL LETTER Z WITH CARON +9F 017A LATIN SMALL LETTER Z WITH ACUTE +A0 00A0 NO-BREAK SPACE +A1 02C7 CARON +A2 02D8 BREVE +A3 0141 LATIN CAPITAL LETTER L WITH STROKE +A4 00A4 CURRENCY SIGN +A5 0104 LATIN CAPITAL LETTER A WITH OGONEK +A6 00A6 BROKEN BAR +A7 00A7 SECTION SIGN +A8 00A8 DIAERESIS +A9 00A9 COPYRIGHT SIGN +AA 015E LATIN CAPITAL LETTER S WITH CEDILLA +AB 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +AC 00AC NOT SIGN +AD 00AD SOFT HYPHEN +AE 00AE REGISTERED SIGN +AF 017B LATIN CAPITAL LETTER Z WITH DOT ABOVE +B0 00B0 DEGREE SIGN +B1 00B1 PLUS-MINUS SIGN +B2 02DB OGONEK +B3 0142 LATIN SMALL LETTER L WITH STROKE +B4 00B4 ACUTE ACCENT +B5 00B5 MICRO SIGN +B6 00B6 PILCROW SIGN +B7 00B7 MIDDLE DOT +B8 00B8 CEDILLA +B9 0105 LATIN SMALL LETTER A WITH OGONEK +BA 015F LATIN SMALL LETTER S WITH CEDILLA +BB 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +BC 013D LATIN CAPITAL LETTER L WITH CARON +BD 02DD DOUBLE ACUTE ACCENT +BE 013E LATIN SMALL LETTER L WITH CARON +BF 017C LATIN SMALL LETTER Z WITH DOT ABOVE +C0 0154 LATIN CAPITAL LETTER R WITH ACUTE +C1 00C1 LATIN CAPITAL LETTER A WITH ACUTE +C2 00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX +C3 0102 LATIN CAPITAL LETTER A WITH BREVE +C4 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +C5 0139 LATIN CAPITAL LETTER L WITH ACUTE +C6 0106 LATIN CAPITAL LETTER C WITH ACUTE +C7 00C7 LATIN CAPITAL LETTER C WITH CEDILLA +C8 010C LATIN CAPITAL LETTER C WITH CARON +C9 00C9 LATIN CAPITAL LETTER E WITH ACUTE +CA 0118 LATIN CAPITAL LETTER E WITH OGONEK +CB 00CB LATIN CAPITAL LETTER E WITH DIAERESIS +CC 011A LATIN CAPITAL LETTER E WITH CARON +CD 00CD LATIN CAPITAL LETTER I WITH ACUTE +CE 00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX +CF 010E LATIN CAPITAL LETTER D WITH CARON +D0 0110 LATIN CAPITAL LETTER D WITH STROKE +D1 0143 LATIN CAPITAL LETTER N WITH ACUTE +D2 0147 LATIN CAPITAL LETTER N WITH CARON +D3 00D3 LATIN CAPITAL LETTER O WITH ACUTE +D4 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX +D5 0150 LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +D6 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +D7 00D7 MULTIPLICATION SIGN +D8 0158 LATIN CAPITAL LETTER R WITH CARON +D9 016E LATIN CAPITAL LETTER U WITH RING ABOVE +DA 00DA LATIN CAPITAL LETTER U WITH ACUTE +DB 0170 LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +DC 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +DD 00DD LATIN CAPITAL LETTER Y WITH ACUTE +DE 0162 LATIN CAPITAL LETTER T WITH CEDILLA +DF 00DF LATIN SMALL LETTER SHARP S +E0 0155 LATIN SMALL LETTER R WITH ACUTE +E1 00E1 LATIN SMALL LETTER A WITH ACUTE +E2 00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX +E3 0103 LATIN SMALL LETTER A WITH BREVE +E4 00E4 LATIN SMALL LETTER A WITH DIAERESIS +E5 013A LATIN SMALL LETTER L WITH ACUTE +E6 0107 LATIN SMALL LETTER C WITH ACUTE +E7 00E7 LATIN SMALL LETTER C WITH CEDILLA +E8 010D LATIN SMALL LETTER C WITH CARON +E9 00E9 LATIN SMALL LETTER E WITH ACUTE +EA 0119 LATIN SMALL LETTER E WITH OGONEK +EB 00EB LATIN SMALL LETTER E WITH DIAERESIS +EC 011B LATIN SMALL LETTER E WITH CARON +ED 00ED LATIN SMALL LETTER I WITH ACUTE +EE 00EE LATIN SMALL LETTER I WITH CIRCUMFLEX +EF 010F LATIN SMALL LETTER D WITH CARON +F0 0111 LATIN SMALL LETTER D WITH STROKE +F1 0144 LATIN SMALL LETTER N WITH ACUTE +F2 0148 LATIN SMALL LETTER N WITH CARON +F3 00F3 LATIN SMALL LETTER O WITH ACUTE +F4 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +F5 0151 LATIN SMALL LETTER O WITH DOUBLE ACUTE +F6 00F6 LATIN SMALL LETTER O WITH DIAERESIS +F7 00F7 DIVISION SIGN +F8 0159 LATIN SMALL LETTER R WITH CARON +F9 016F LATIN SMALL LETTER U WITH RING ABOVE +FA 00FA LATIN SMALL LETTER U WITH ACUTE +FB 0171 LATIN SMALL LETTER U WITH DOUBLE ACUTE +FC 00FC LATIN SMALL LETTER U WITH DIAERESIS +FD 00FD LATIN SMALL LETTER Y WITH ACUTE +FE 0163 LATIN SMALL LETTER T WITH CEDILLA +FF 02D9 DOT ABOVE diff --git a/libucw/charset/set/win-1251 b/libucw/charset/set/win-1251 new file mode 100644 index 0000000..d9b2d7c --- /dev/null +++ b/libucw/charset/set/win-1251 @@ -0,0 +1,260 @@ +# CP1252.TXT charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT +# (c) 2005, Martin Mares + +00 0000 NULL +01 0001 START OF HEADING +02 0002 START OF TEXT +03 0003 END OF TEXT +04 0004 END OF TRANSMISSION +05 0005 ENQUIRY +06 0006 ACKNOWLEDGE +07 0007 BELL +08 0008 BACKSPACE +09 0009 HORIZONTAL TABULATION +0A 000A LINE FEED +0B 000B VERTICAL TABULATION +0C 000C FORM FEED +0D 000D CARRIAGE RETURN +0E 000E SHIFT OUT +0F 000F SHIFT IN +10 0010 DATA LINK ESCAPE +11 0011 DEVICE CONTROL ONE +12 0012 DEVICE CONTROL TWO +13 0013 DEVICE CONTROL THREE +14 0014 DEVICE CONTROL FOUR +15 0015 NEGATIVE ACKNOWLEDGE +16 0016 SYNCHRONOUS IDLE +17 0017 END OF TRANSMISSION BLOCK +18 0018 CANCEL +19 0019 END OF MEDIUM +1A 001A SUBSTITUTE +1B 001B ESCAPE +1C 001C FILE SEPARATOR +1D 001D GROUP SEPARATOR +1E 001E RECORD SEPARATOR +1F 001F UNIT SEPARATOR +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F DELETE +80 0402 CYRILLIC CAPITAL LETTER DJE +81 0403 CYRILLIC CAPITAL LETTER GJE +82 201A SINGLE LOW-9 QUOTATION MARK +83 0453 CYRILLIC SMALL LETTER GJE +84 201E DOUBLE LOW-9 QUOTATION MARK +85 2026 HORIZONTAL ELLIPSIS +86 2020 DAGGER +87 2021 DOUBLE DAGGER +88 20AC EURO SIGN +89 2030 PER MILLE SIGN +8A 0409 CYRILLIC CAPITAL LETTER LJE +8B 2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK +8C 040A CYRILLIC CAPITAL LETTER NJE +8D 040C CYRILLIC CAPITAL LETTER KJE +8E 040B CYRILLIC CAPITAL LETTER TSHE +8F 040F CYRILLIC CAPITAL LETTER DZHE +90 0452 CYRILLIC SMALL LETTER DJE +91 2018 LEFT SINGLE QUOTATION MARK +92 2019 RIGHT SINGLE QUOTATION MARK +93 201C LEFT DOUBLE QUOTATION MARK +94 201D RIGHT DOUBLE QUOTATION MARK +95 2022 BULLET +96 2013 EN DASH +97 2014 EM DASH +98 FFFD REPLACEMENT CHARACTER +99 2122 TRADE MARK SIGN +9A 0459 CYRILLIC SMALL LETTER LJE +9B 203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +9C 045A CYRILLIC SMALL LETTER NJE +9D 045C CYRILLIC SMALL LETTER KJE +9E 045B CYRILLIC SMALL LETTER TSHE +9F 045F CYRILLIC SMALL LETTER DZHE +A0 00A0 NO-BREAK SPACE +A1 040E CYRILLIC CAPITAL LETTER SHORT U +A2 045E CYRILLIC SMALL LETTER SHORT U +A3 0408 CYRILLIC CAPITAL LETTER JE +A4 00A4 CURRENCY SIGN +A5 0490 CYRILLIC CAPITAL LETTER GHE WITH UPTURN +A6 00A6 BROKEN BAR +A7 00A7 SECTION SIGN +A8 0401 CYRILLIC CAPITAL LETTER IO +A9 00A9 COPYRIGHT SIGN +AA 0404 CYRILLIC CAPITAL LETTER UKRAINIAN IE +AB 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +AC 00AC NOT SIGN +AD 00AD SOFT HYPHEN +AE 00AE REGISTERED SIGN +AF 0407 CYRILLIC CAPITAL LETTER YI +B0 00B0 DEGREE SIGN +B1 00B1 PLUS-MINUS SIGN +B2 0406 CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +B3 0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +B4 0491 CYRILLIC SMALL LETTER GHE WITH UPTURN +B5 00B5 MICRO SIGN +B6 00B6 PILCROW SIGN +B7 00B7 MIDDLE DOT +B8 0451 CYRILLIC SMALL LETTER IO +B9 2116 NUMERO SIGN +BA 0454 CYRILLIC SMALL LETTER UKRAINIAN IE +BB 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +BC 0458 CYRILLIC SMALL LETTER JE +BD 0405 CYRILLIC CAPITAL LETTER DZE +BE 0455 CYRILLIC SMALL LETTER DZE +BF 0457 CYRILLIC SMALL LETTER YI +C0 0410 CYRILLIC CAPITAL LETTER A +C1 0411 CYRILLIC CAPITAL LETTER BE +C2 0412 CYRILLIC CAPITAL LETTER VE +C3 0413 CYRILLIC CAPITAL LETTER GHE +C4 0414 CYRILLIC CAPITAL LETTER DE +C5 0415 CYRILLIC CAPITAL LETTER IE +C6 0416 CYRILLIC CAPITAL LETTER ZHE +C7 0417 CYRILLIC CAPITAL LETTER ZE +C8 0418 CYRILLIC CAPITAL LETTER I +C9 0419 CYRILLIC CAPITAL LETTER SHORT I +CA 041A CYRILLIC CAPITAL LETTER KA +CB 041B CYRILLIC CAPITAL LETTER EL +CC 041C CYRILLIC CAPITAL LETTER EM +CD 041D CYRILLIC CAPITAL LETTER EN +CE 041E CYRILLIC CAPITAL LETTER O +CF 041F CYRILLIC CAPITAL LETTER PE +D0 0420 CYRILLIC CAPITAL LETTER ER +D1 0421 CYRILLIC CAPITAL LETTER ES +D2 0422 CYRILLIC CAPITAL LETTER TE +D3 0423 CYRILLIC CAPITAL LETTER U +D4 0424 CYRILLIC CAPITAL LETTER EF +D5 0425 CYRILLIC CAPITAL LETTER HA +D6 0426 CYRILLIC CAPITAL LETTER TSE +D7 0427 CYRILLIC CAPITAL LETTER CHE +D8 0428 CYRILLIC CAPITAL LETTER SHA +D9 0429 CYRILLIC CAPITAL LETTER SHCHA +DA 042A CYRILLIC CAPITAL LETTER HARD SIGN +DB 042B CYRILLIC CAPITAL LETTER YERU +DC 042C CYRILLIC CAPITAL LETTER SOFT SIGN +DD 042D CYRILLIC CAPITAL LETTER E +DE 042E CYRILLIC CAPITAL LETTER YU +DF 042F CYRILLIC CAPITAL LETTER YA +E0 0430 CYRILLIC SMALL LETTER A +E1 0431 CYRILLIC SMALL LETTER BE +E2 0432 CYRILLIC SMALL LETTER VE +E3 0433 CYRILLIC SMALL LETTER GHE +E4 0434 CYRILLIC SMALL LETTER DE +E5 0435 CYRILLIC SMALL LETTER IE +E6 0436 CYRILLIC SMALL LETTER ZHE +E7 0437 CYRILLIC SMALL LETTER ZE +E8 0438 CYRILLIC SMALL LETTER I +E9 0439 CYRILLIC SMALL LETTER SHORT I +EA 043A CYRILLIC SMALL LETTER KA +EB 043B CYRILLIC SMALL LETTER EL +EC 043C CYRILLIC SMALL LETTER EM +ED 043D CYRILLIC SMALL LETTER EN +EE 043E CYRILLIC SMALL LETTER O +EF 043F CYRILLIC SMALL LETTER PE +F0 0440 CYRILLIC SMALL LETTER ER +F1 0441 CYRILLIC SMALL LETTER ES +F2 0442 CYRILLIC SMALL LETTER TE +F3 0443 CYRILLIC SMALL LETTER U +F4 0444 CYRILLIC SMALL LETTER EF +F5 0445 CYRILLIC SMALL LETTER HA +F6 0446 CYRILLIC SMALL LETTER TSE +F7 0447 CYRILLIC SMALL LETTER CHE +F8 0448 CYRILLIC SMALL LETTER SHA +F9 0449 CYRILLIC SMALL LETTER SHCHA +FA 044A CYRILLIC SMALL LETTER HARD SIGN +FB 044B CYRILLIC SMALL LETTER YERU +FC 044C CYRILLIC SMALL LETTER SOFT SIGN +FD 044D CYRILLIC SMALL LETTER E +FE 044E CYRILLIC SMALL LETTER YU +FF 044F CYRILLIC SMALL LETTER YA diff --git a/libucw/charset/set/win-1252 b/libucw/charset/set/win-1252 new file mode 100644 index 0000000..0e9a324 --- /dev/null +++ b/libucw/charset/set/win-1252 @@ -0,0 +1,255 @@ +# CP1252.TXT charset file +# Imported from ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT +# (c) 2003, Robert Spalek + +00 0000 NULL +01 0001 START OF HEADING +02 0002 START OF TEXT +03 0003 END OF TEXT +04 0004 END OF TRANSMISSION +05 0005 ENQUIRY +06 0006 ACKNOWLEDGE +07 0007 BELL +08 0008 BACKSPACE +09 0009 HORIZONTAL TABULATION +0A 000A LINE FEED +0B 000B VERTICAL TABULATION +0C 000C FORM FEED +0D 000D CARRIAGE RETURN +0E 000E SHIFT OUT +0F 000F SHIFT IN +10 0010 DATA LINK ESCAPE +11 0011 DEVICE CONTROL ONE +12 0012 DEVICE CONTROL TWO +13 0013 DEVICE CONTROL THREE +14 0014 DEVICE CONTROL FOUR +15 0015 NEGATIVE ACKNOWLEDGE +16 0016 SYNCHRONOUS IDLE +17 0017 END OF TRANSMISSION BLOCK +18 0018 CANCEL +19 0019 END OF MEDIUM +1A 001A SUBSTITUTE +1B 001B ESCAPE +1C 001C FILE SEPARATOR +1D 001D GROUP SEPARATOR +1E 001E RECORD SEPARATOR +1F 001F UNIT SEPARATOR +20 0020 SPACE +21 0021 EXCLAMATION MARK +22 0022 QUOTATION MARK +23 0023 NUMBER SIGN +24 0024 DOLLAR SIGN +25 0025 PERCENT SIGN +26 0026 AMPERSAND +27 0027 APOSTROPHE +28 0028 LEFT PARENTHESIS +29 0029 RIGHT PARENTHESIS +2A 002A ASTERISK +2B 002B PLUS SIGN +2C 002C COMMA +2D 002D HYPHEN-MINUS +2E 002E FULL STOP +2F 002F SOLIDUS +30 0030 DIGIT ZERO +31 0031 DIGIT ONE +32 0032 DIGIT TWO +33 0033 DIGIT THREE +34 0034 DIGIT FOUR +35 0035 DIGIT FIVE +36 0036 DIGIT SIX +37 0037 DIGIT SEVEN +38 0038 DIGIT EIGHT +39 0039 DIGIT NINE +3A 003A COLON +3B 003B SEMICOLON +3C 003C LESS-THAN SIGN +3D 003D EQUALS SIGN +3E 003E GREATER-THAN SIGN +3F 003F QUESTION MARK +40 0040 COMMERCIAL AT +41 0041 LATIN CAPITAL LETTER A +42 0042 LATIN CAPITAL LETTER B +43 0043 LATIN CAPITAL LETTER C +44 0044 LATIN CAPITAL LETTER D +45 0045 LATIN CAPITAL LETTER E +46 0046 LATIN CAPITAL LETTER F +47 0047 LATIN CAPITAL LETTER G +48 0048 LATIN CAPITAL LETTER H +49 0049 LATIN CAPITAL LETTER I +4A 004A LATIN CAPITAL LETTER J +4B 004B LATIN CAPITAL LETTER K +4C 004C LATIN CAPITAL LETTER L +4D 004D LATIN CAPITAL LETTER M +4E 004E LATIN CAPITAL LETTER N +4F 004F LATIN CAPITAL LETTER O +50 0050 LATIN CAPITAL LETTER P +51 0051 LATIN CAPITAL LETTER Q +52 0052 LATIN CAPITAL LETTER R +53 0053 LATIN CAPITAL LETTER S +54 0054 LATIN CAPITAL LETTER T +55 0055 LATIN CAPITAL LETTER U +56 0056 LATIN CAPITAL LETTER V +57 0057 LATIN CAPITAL LETTER W +58 0058 LATIN CAPITAL LETTER X +59 0059 LATIN CAPITAL LETTER Y +5A 005A LATIN CAPITAL LETTER Z +5B 005B LEFT SQUARE BRACKET +5C 005C REVERSE SOLIDUS +5D 005D RIGHT SQUARE BRACKET +5E 005E CIRCUMFLEX ACCENT +5F 005F LOW LINE +60 0060 GRAVE ACCENT +61 0061 LATIN SMALL LETTER A +62 0062 LATIN SMALL LETTER B +63 0063 LATIN SMALL LETTER C +64 0064 LATIN SMALL LETTER D +65 0065 LATIN SMALL LETTER E +66 0066 LATIN SMALL LETTER F +67 0067 LATIN SMALL LETTER G +68 0068 LATIN SMALL LETTER H +69 0069 LATIN SMALL LETTER I +6A 006A LATIN SMALL LETTER J +6B 006B LATIN SMALL LETTER K +6C 006C LATIN SMALL LETTER L +6D 006D LATIN SMALL LETTER M +6E 006E LATIN SMALL LETTER N +6F 006F LATIN SMALL LETTER O +70 0070 LATIN SMALL LETTER P +71 0071 LATIN SMALL LETTER Q +72 0072 LATIN SMALL LETTER R +73 0073 LATIN SMALL LETTER S +74 0074 LATIN SMALL LETTER T +75 0075 LATIN SMALL LETTER U +76 0076 LATIN SMALL LETTER V +77 0077 LATIN SMALL LETTER W +78 0078 LATIN SMALL LETTER X +79 0079 LATIN SMALL LETTER Y +7A 007A LATIN SMALL LETTER Z +7B 007B LEFT CURLY BRACKET +7C 007C VERTICAL LINE +7D 007D RIGHT CURLY BRACKET +7E 007E TILDE +7F 007F DELETE +80 20AC EURO SIGN +82 201A SINGLE LOW-9 QUOTATION MARK +83 0192 LATIN SMALL LETTER F WITH HOOK +84 201E DOUBLE LOW-9 QUOTATION MARK +85 2026 HORIZONTAL ELLIPSIS +86 2020 DAGGER +87 2021 DOUBLE DAGGER +88 02C6 MODIFIER LETTER CIRCUMFLEX ACCENT +89 2030 PER MILLE SIGN +8A 0160 LATIN CAPITAL LETTER S WITH CARON +8B 2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK +8C 0152 LATIN CAPITAL LIGATURE OE +8E 017D LATIN CAPITAL LETTER Z WITH CARON +91 2018 LEFT SINGLE QUOTATION MARK +92 2019 RIGHT SINGLE QUOTATION MARK +93 201C LEFT DOUBLE QUOTATION MARK +94 201D RIGHT DOUBLE QUOTATION MARK +95 2022 BULLET +96 2013 EN DASH +97 2014 EM DASH +98 02DC SMALL TILDE +99 2122 TRADE MARK SIGN +9A 0161 LATIN SMALL LETTER S WITH CARON +9B 203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +9C 0153 LATIN SMALL LIGATURE OE +9E 017E LATIN SMALL LETTER Z WITH CARON +9F 0178 LATIN CAPITAL LETTER Y WITH DIAERESIS +A0 00A0 NO-BREAK SPACE +A1 00A1 INVERTED EXCLAMATION MARK +A2 00A2 CENT SIGN +A3 00A3 POUND SIGN +A4 00A4 CURRENCY SIGN +A5 00A5 YEN SIGN +A6 00A6 BROKEN BAR +A7 00A7 SECTION SIGN +A8 00A8 DIAERESIS +A9 00A9 COPYRIGHT SIGN +AA 00AA FEMININE ORDINAL INDICATOR +AB 00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +AC 00AC NOT SIGN +AD 00AD SOFT HYPHEN +AE 00AE REGISTERED SIGN +AF 00AF MACRON +B0 00B0 DEGREE SIGN +B1 00B1 PLUS-MINUS SIGN +B2 00B2 SUPERSCRIPT TWO +B3 00B3 SUPERSCRIPT THREE +B4 00B4 ACUTE ACCENT +B5 00B5 MICRO SIGN +B6 00B6 PILCROW SIGN +B7 00B7 MIDDLE DOT +B8 00B8 CEDILLA +B9 00B9 SUPERSCRIPT ONE +BA 00BA MASCULINE ORDINAL INDICATOR +BB 00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +BC 00BC VULGAR FRACTION ONE QUARTER +BD 00BD VULGAR FRACTION ONE HALF +BE 00BE VULGAR FRACTION THREE QUARTERS +BF 00BF INVERTED QUESTION MARK +C0 00C0 LATIN CAPITAL LETTER A WITH GRAVE +C1 00C1 LATIN CAPITAL LETTER A WITH ACUTE +C2 00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX +C3 00C3 LATIN CAPITAL LETTER A WITH TILDE +C4 00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +C5 00C5 LATIN CAPITAL LETTER A WITH RING ABOVE +C6 00C6 LATIN CAPITAL LETTER AE +C7 00C7 LATIN CAPITAL LETTER C WITH CEDILLA +C8 00C8 LATIN CAPITAL LETTER E WITH GRAVE +C9 00C9 LATIN CAPITAL LETTER E WITH ACUTE +CA 00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX +CB 00CB LATIN CAPITAL LETTER E WITH DIAERESIS +CC 00CC LATIN CAPITAL LETTER I WITH GRAVE +CD 00CD LATIN CAPITAL LETTER I WITH ACUTE +CE 00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX +CF 00CF LATIN CAPITAL LETTER I WITH DIAERESIS +D0 00D0 LATIN CAPITAL LETTER ETH +D1 00D1 LATIN CAPITAL LETTER N WITH TILDE +D2 00D2 LATIN CAPITAL LETTER O WITH GRAVE +D3 00D3 LATIN CAPITAL LETTER O WITH ACUTE +D4 00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX +D5 00D5 LATIN CAPITAL LETTER O WITH TILDE +D6 00D6 LATIN CAPITAL LETTER O WITH DIAERESIS +D7 00D7 MULTIPLICATION SIGN +D8 00D8 LATIN CAPITAL LETTER O WITH STROKE +D9 00D9 LATIN CAPITAL LETTER U WITH GRAVE +DA 00DA LATIN CAPITAL LETTER U WITH ACUTE +DB 00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX +DC 00DC LATIN CAPITAL LETTER U WITH DIAERESIS +DD 00DD LATIN CAPITAL LETTER Y WITH ACUTE +DE 00DE LATIN CAPITAL LETTER THORN +DF 00DF LATIN SMALL LETTER SHARP S +E0 00E0 LATIN SMALL LETTER A WITH GRAVE +E1 00E1 LATIN SMALL LETTER A WITH ACUTE +E2 00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX +E3 00E3 LATIN SMALL LETTER A WITH TILDE +E4 00E4 LATIN SMALL LETTER A WITH DIAERESIS +E5 00E5 LATIN SMALL LETTER A WITH RING ABOVE +E6 00E6 LATIN SMALL LETTER AE +E7 00E7 LATIN SMALL LETTER C WITH CEDILLA +E8 00E8 LATIN SMALL LETTER E WITH GRAVE +E9 00E9 LATIN SMALL LETTER E WITH ACUTE +EA 00EA LATIN SMALL LETTER E WITH CIRCUMFLEX +EB 00EB LATIN SMALL LETTER E WITH DIAERESIS +EC 00EC LATIN SMALL LETTER I WITH GRAVE +ED 00ED LATIN SMALL LETTER I WITH ACUTE +EE 00EE LATIN SMALL LETTER I WITH CIRCUMFLEX +EF 00EF LATIN SMALL LETTER I WITH DIAERESIS +F0 00F0 LATIN SMALL LETTER ETH +F1 00F1 LATIN SMALL LETTER N WITH TILDE +F2 00F2 LATIN SMALL LETTER O WITH GRAVE +F3 00F3 LATIN SMALL LETTER O WITH ACUTE +F4 00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX +F5 00F5 LATIN SMALL LETTER O WITH TILDE +F6 00F6 LATIN SMALL LETTER O WITH DIAERESIS +F7 00F7 DIVISION SIGN +F8 00F8 LATIN SMALL LETTER O WITH STROKE +F9 00F9 LATIN SMALL LETTER U WITH GRAVE +FA 00FA LATIN SMALL LETTER U WITH ACUTE +FB 00FB LATIN SMALL LETTER U WITH CIRCUMFLEX +FC 00FC LATIN SMALL LETTER U WITH DIAERESIS +FD 00FD LATIN SMALL LETTER Y WITH ACUTE +FE 00FE LATIN SMALL LETTER THORN +FF 00FF LATIN SMALL LETTER Y WITH DIAERESIS diff --git a/libucw/charset/setnames.c b/libucw/charset/setnames.c new file mode 100644 index 0000000..3f4cdda --- /dev/null +++ b/libucw/charset/setnames.c @@ -0,0 +1,66 @@ +/* + * Character Set Conversion Library 1.0 -- Character Set Names + * + * (c) 1998--2005 Martin Mares + * (c) 2007 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU General Public License. + */ + +#include +#include + +#include + +/* Names according to RFC 1345 (see http://www.iana.org/assignments/character-sets) */ + +static const char *cs_names[] = { + "US-ASCII", + "ISO-8859-1", + "ISO-8859-2", + "ISO-8859-3", + "ISO-8859-4", + "ISO-8859-5", + "ISO-8859-6", + "ISO-8859-7", + "ISO-8859-8", + "ISO-8859-9", + "ISO-8859-10", + "ISO-8859-11", + "ISO-8859-13", + "ISO-8859-14", + "ISO-8859-15", + "ISO-8859-16", + "windows-1250", + "windows-1251", + "windows-1252", + "x-kam-cs", + "CSN_369103", + "cp852", + "x-mac-ce", + "x-cork", + "utf-8", + "utf-16be", + "utf-16le" +}; + +int +find_charset_by_name(const char *c) +{ + uint i; + + for(i=0; i CONV_NUM_CHARSETS) + return "x-unknown"; + else + return (char *)cs_names[i]; +} diff --git a/libucw/charset/stk-charconv.c b/libucw/charset/stk-charconv.c new file mode 100644 index 0000000..b6359d4 --- /dev/null +++ b/libucw/charset/stk-charconv.c @@ -0,0 +1,60 @@ +/* + * UCW Library -- Character Conversion with Allocation on the Stack + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include + +#define INITIAL_MIN_SIZE 16 +#define INITIAL_SCALE 2 + +uint +stk_strconv_init(struct conv_context *c, const byte *s, uint in_cs, uint out_cs) +{ + uint l = strlen(s); + if (in_cs == out_cs) + { + c->source = s; + c->source_end = NULL; + return l + 1; + } + conv_init(c); + conv_set_charset(c, in_cs, out_cs); + c->source = s; + c->source_end = s + l + 1; + if (l < (INITIAL_MIN_SIZE - 1) / INITIAL_SCALE) + return INITIAL_MIN_SIZE; + else + return l * INITIAL_SCALE + 1; +} + +uint +stk_strconv_step(struct conv_context *c, byte *buf, uint len) +{ + if (!c->source_end) + { + memcpy(buf, c->source, len); + c->dest_start = buf; + return 0; + } + if (c->dest_start) + { + uint l = c->dest_end - c->dest_start; + memcpy(buf, c->dest_start, l); + c->dest = buf + l; + } + else + c->dest = buf; + c->dest_start = buf; + c->dest_end = buf + len; + if (conv_run(c) & CONV_SOURCE_END) + return 0; + return len << 1; +} + diff --git a/libucw/charset/stk-charconv.h b/libucw/charset/stk-charconv.h new file mode 100644 index 0000000..a1d40dd --- /dev/null +++ b/libucw/charset/stk-charconv.h @@ -0,0 +1,35 @@ +/* + * UCW Library -- Character Conversion with Allocation on the Stack + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _CHARSET_STK_CHARCONV_H +#define _CHARSET_STK_CHARCONV_H + +#include +#include + +#ifdef CONFIG_UCW_CLEAN_ABI +#define stk_strconv_init ucw_stk_strconv_init +#define stk_strconv_step ucw_stk_strconv_step +#endif + +/* The following macros convert strings between given charsets (CONV_CHARSET_x). */ + +#define stk_strconv(s, cs_in, cs_out) \ + ({ struct conv_context _c; uint _l=stk_strconv_init(&_c, (s), (cs_in), (cs_out)); \ + while (_l) _l=stk_strconv_step(&_c, alloca(_l), _l); _c.dest_start; }) + +#define stk_strconv_to_utf8(s, cs_in) stk_strconv(s, cs_in, CONV_CHARSET_UTF8) +#define stk_strconv_from_utf8(s, cs_out) stk_strconv(s, CONV_CHARSET_UTF8, cs_out) + +/* Internals */ + +uint stk_strconv_init(struct conv_context *c, const byte *s, uint cs_in, uint cs_out); +uint stk_strconv_step(struct conv_context *c, byte *buf, uint len); + +#endif diff --git a/libucw/charset/tocat.c b/libucw/charset/tocat.c new file mode 100644 index 0000000..a5f9da4 --- /dev/null +++ b/libucw/charset/tocat.c @@ -0,0 +1,12 @@ +/* + * The UniCode Library -- Category Table + * + * (c) 1997 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include diff --git a/libucw/charset/toligatures.c b/libucw/charset/toligatures.c new file mode 100644 index 0000000..8cb5c0f --- /dev/null +++ b/libucw/charset/toligatures.c @@ -0,0 +1,18 @@ +/* + * The UniCode Library -- Table of Ligatures + * + * (c) 2003 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include + +const u16 * +Uexpand_lig(uint x) +{ + return _U_lig_hash[x % LIG_HASH_SIZE]; +} diff --git a/libucw/charset/tolower.c b/libucw/charset/tolower.c new file mode 100644 index 0000000..84544d7 --- /dev/null +++ b/libucw/charset/tolower.c @@ -0,0 +1,12 @@ +/* + * The UniCode Library -- Lowercase Table + * + * (c) 1997 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include diff --git a/libucw/charset/toupper.c b/libucw/charset/toupper.c new file mode 100644 index 0000000..5e2b91e --- /dev/null +++ b/libucw/charset/toupper.c @@ -0,0 +1,12 @@ +/* + * The UniCode Library -- Uppercase Table + * + * (c) 1997 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include diff --git a/libucw/charset/ucw-cs2cs.c b/libucw/charset/ucw-cs2cs.c new file mode 100644 index 0000000..74e092a --- /dev/null +++ b/libucw/charset/ucw-cs2cs.c @@ -0,0 +1,64 @@ +/* + * Simple character set convertor + * + * (c) 1998 Pavel Machek + * (c) 2003 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU General Public License. + */ + +#include +#include + +#include +#include +#include + +#ifdef TEST +#define BUFSIZE 13 +#else +#define BUFSIZE 4096 +#endif + +int +main(int argc, char **argv) +{ + struct conv_context ctxt; + int ch_from, ch_to, n, flags; + char inbuf[BUFSIZE], outbuf[BUFSIZE]; + + if (argc != 3) + die("ucw-cs2cs in-charset out-charset"); + conv_init(&ctxt); + ch_from = find_charset_by_name(argv[1]); + if (ch_from < 0) + die("Unknown charset %s", argv[1]); + ch_to = find_charset_by_name(argv[2]); + if (ch_to < 0) + die("Unknown charset %s", argv[2]); + + conv_set_charset(&ctxt, ch_from, ch_to); + while ((n = read(0, inbuf, sizeof(inbuf))) > 0) + { + ctxt.source = inbuf; + ctxt.source_end = inbuf + n; + ctxt.dest = ctxt.dest_start = outbuf; + ctxt.dest_end = outbuf + sizeof(outbuf); + do + { + flags = conv_run(&ctxt); + if (flags & (CONV_SOURCE_END | CONV_DEST_END)) + { + int w = write(1, ctxt.dest_start, ctxt.dest - ctxt.dest_start); + if (w < 0) + die("write error: %m"); + ctxt.dest = outbuf; + } + } + while (! (flags & CONV_SOURCE_END)); + } + if (n < 0) + die("read error: %m"); + return 0; +} diff --git a/libucw/charset/unaccent.c b/libucw/charset/unaccent.c new file mode 100644 index 0000000..4bb97a5 --- /dev/null +++ b/libucw/charset/unaccent.c @@ -0,0 +1,12 @@ +/* + * The UniCode Library -- Unaccenting Table + * + * (c) 1997 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include diff --git a/libucw/charset/unicat.h b/libucw/charset/unicat.h new file mode 100644 index 0000000..78f99eb --- /dev/null +++ b/libucw/charset/unicat.h @@ -0,0 +1,78 @@ +/* + * The UniCode Character Categorizer + * + * (c) 1997--2004 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _CHARSET_UNICAT_H +#define _CHARSET_UNICAT_H + +#ifdef CONFIG_UCW_CLEAN_ABI +#define Uexpand_lig ucw_Uexpand_lig +#define _U_cat ucw__U_cat +#define _U_lower ucw__U_lower +#define _U_unaccent ucw__U_unaccent +#define _U_upper ucw__U_upper +#endif + +extern const byte *_U_cat[]; +extern const u16 *_U_upper[], *_U_lower[], *_U_unaccent[]; + +static inline uint Ucategory(uint x) +{ + if (_U_cat[x >> 8U]) + return _U_cat[x >> 8U][x & 0xff]; + else + return 0; +} + +static inline uint Utoupper(uint x) +{ + uint w = (_U_upper[x >> 8U]) ? _U_upper[x >> 8U][x & 0xff] : 0; + return w ? w : x; +} + +static inline uint Utolower(uint x) +{ + uint w = (_U_lower[x >> 8U]) ? _U_lower[x >> 8U][x & 0xff] : 0; + return w ? w : x; +} + +static inline uint Uunaccent(uint x) +{ + uint w = (_U_unaccent[x >> 8U]) ? _U_unaccent[x >> 8U][x & 0xff] : 0; + return w ? w : x; +} + +extern const u16 *Uexpand_lig(uint x); + +enum unicode_char_type { + _U_LETTER = 1, /* Letters */ + _U_UPPER = 2, /* Upper-case letters */ + _U_LOWER = 4, /* Lower-case letters */ + _U_CTRL = 8, /* Control characters */ + _U_DIGIT = 16, /* Digits */ + _U_XDIGIT = 32, /* Hexadecimal digits */ + _U_SPACE = 64, /* White spaces (spaces, tabs, newlines) */ + _U_LIGATURE = 128, /* Compatibility ligature (to be expanded) */ +}; + +#define _U_LUPPER (_U_LETTER | _U_UPPER) +#define _U_LLOWER (_U_LETTER | _U_LOWER) + +#define UCat(x,y) (Ucategory(x) & (y)) + +#define Ualpha(x) UCat(x, _U_LETTER) +#define Uupper(x) UCat(x, _U_UPPER) +#define Ulower(x) UCat(x, _U_LOWER) +#define Udigit(x) UCat(x, _U_DIGIT) +#define Uxdigit(x) UCat(x, (_U_DIGIT | _U_XDIGIT)) +#define Ualnum(x) UCat(x, (_U_LETTER | _U_DIGIT)) +#define Uctrl(x) UCat(x, _U_CTRL) +#define Uprint(x) !Uctrl(x) +#define Uspace(x) UCat(x, _U_SPACE) + +#endif diff --git a/libucw/configure b/libucw/configure new file mode 100755 index 0000000..d3fca74 --- /dev/null +++ b/libucw/configure @@ -0,0 +1,36 @@ +#!/usr/bin/perl +# Configure Script for UCW Libraries +# (c) 2007--2010 Martin Mares + +use warnings; +use strict; + +our $srcdir; +BEGIN { + my $pkgfile = "ucw/wildmatch.c"; + if (!defined ($srcdir = $ENV{"SRCDIR"})) { + if (-f $pkgfile) { + $srcdir="."; + } elsif ($0 =~ m@^(.*)/configure$@ && -f "$1/$pkgfile") { + $srcdir=$1; + } else { + die "Don't know how to find myself. Please set SRCDIR manually.\n"; + } + } +} + +use lib "$srcdir/ucw/perl/"; +use UCW::Configure; + +Init($srcdir, "default.cfg"); +Include "ucw/default.cfg"; +Log "### Configuring UCW Libraries " . Get("UCW_VERSION") . " with configuration " . Get("CONFIG") . "\n"; +Include Get("CONFIG"); +require UCW::Configure::Paths; +require UCW::Configure::C; +require UCW::Configure::LibUCW; +require UCW::Configure::Doc; +require UCW::Configure::Perl; +Finish(); + +Log "\nConfigured, run `make' to build everything.\n"; diff --git a/libucw/debug/check-configs b/libucw/debug/check-configs new file mode 100755 index 0000000..e8787bf --- /dev/null +++ b/libucw/debug/check-configs @@ -0,0 +1,65 @@ +#!/bin/bash +# A script for testing compilability of different configurations +# (c) 2004--2010 Martin Mares + +set -e + +TEST=0 +ERR= +CC=${CC:-gcc} +MAKEOPTS=${MAKEOPTS:--j8} + +function die +{ + echo >&3 " $@" + exit 1 +} + +function try +{ + TEST=$(($TEST+1)) + TDIR=tests/$TEST + mkdir $TDIR + echo "### Test $TEST: $@ ###" | tee $TDIR/log + CONFIG="$1" + shift + ARGS="$@ CC=$CC" + + ( + cd $TDIR + exec 3>&2 >>log 2>&1 + case $CONFIG in + *) ../../configure $CONFIG CONFIG_LOCAL $ARGS || die "CANNOT CONFIGURE" + ;; + esac + make $MAKEOPTS || die FAILED + echo >&3 " COMPILATION PASSED" + if [ -z "$SKIP_TESTS" ] ; then + make -k -j1 tests || die "TESTS FAILED" + echo >&3 " TESTS PASSED" + fi + ) || ERR=1 +} + +rm -rf tests +mkdir tests +if [ "$1" == DARWIN ] ; then + # All tests on Darwin need CONFIG_SHARED, due to libucw-charset collision + # only visible with static linking. + FLAGS="CONFIG_SHARED" + try debug/default.cfg $FLAGS CONFIG_UCW_PCRE # `make tests' does not work with non-local builds with shared libs +elif [ -n "$1" ] ; then + try "$@" +else + try default.cfg # default configuration + try default.cfg -CONFIG_DEBUG # with no debugging code + try debug/default.cfg # debugging configuration + try debug/default.cfg -CONFIG_SHARED # statically linked + try debug/default.cfg -CONFIG_UCW_THREADS # non-threaded configuration + try debug/default.cfg -CONFIG_UCW_TLS # threaded, but no TLS support in gcc + try debug/default.cfg -CONFIG_UCW_EPOLL -CONFIG_UCW_MONOTONIC_CLOCK # without epoll and monotonic clock + try debug/default.cfg CONFIG_UCW_POSIX_REGEX # different regex libs + try debug/default.cfg CONFIG_UCW_PCRE +fi + +[ -z "$ERR" ] diff --git a/libucw/debug/default.cfg b/libucw/debug/default.cfg new file mode 100644 index 0000000..ce75314 --- /dev/null +++ b/libucw/debug/default.cfg @@ -0,0 +1,17 @@ +# Configuration used for debugging LibUCW + +Include("default.cfg"); + +Set("CONFIG_LOCAL"); +# UnSet("CONFIG_SHARED"); +Set("CONFIG_DEBUG"); +Set("CONFIG_UCW_DEBUG_TOOLS"); +Set("CONFIG_EXACT_CPU"); +Set("CONFIG_UCW_OBSOLETE_DAEMON_HELPER"); + +Set("CONFIG_IMAGES"); +Set("CONFIG_IMAGES_DUP"); +Set("CONFIG_IMAGES_SIM"); + +Set("CONFIG_CHARSET"); +Set("CONFIG_XML"); diff --git a/libucw/default.cfg b/libucw/default.cfg new file mode 100644 index 0000000..086ab38 --- /dev/null +++ b/libucw/default.cfg @@ -0,0 +1,49 @@ +# Default configuration of UCW libraries +# (see */default.cfg for the description of all options) + +# Do we want shared or static libraries? +Set("CONFIG_SHARED"); + +# Include debugging code +Set("CONFIG_DEBUG"); + +# We want the public API +Set("CONFIG_INSTALL_API"); + +# Enable GCC link-time optimizations (experimental) +UnSet("CONFIG_LTO"); + +# LibUCW should support files >2GB and threading +Set("CONFIG_UCW_LARGE_FILES"); +Set("CONFIG_UCW_THREADS" => 1); + +# Libucw extensions +Set("CONFIG_UCW_PERL" => 1); +Set("CONFIG_UCW_PERL_MODULES" => 1); +Set("CONFIG_UCW_SHELL_UTILS" => 1); +Set("CONFIG_UCW_UTILS" => 1); + +# Libucw-images settings +UnSet("CONFIG_IMAGES"); +Set("CONFIG_IMAGES_LIBJPEG"); +Set("CONFIG_IMAGES_LIBPNG"); +UnSet("CONFIG_IMAGES_LIBUNGIF"); +Set("CONFIG_IMAGES_LIBGIF"); +UnSet("CONFIG_IMAGES_LIBMAGICK"); + +# Libucw-charset +Set("CONFIG_CHARSET"); +Set("CONFIG_CHARSET_UTILS"); + +# Libucw-xml +Set("CONFIG_XML"); + +# Libucw-json +Set("CONFIG_JSON"); + +# Compress .deb packages with gzip (instead of the default algorithm) +# Hack for Economia's deployment machinery +UnSet("CONFIG_BUILDDEB_GZIP"); + +# Return success +1; diff --git a/libucw/etc/images b/libucw/etc/images new file mode 100644 index 0000000..b02c4d0 --- /dev/null +++ b/libucw/etc/images @@ -0,0 +1,91 @@ +# Configuration of the image library + +######## General parameters ##################################################### + +ImageLib { + +# Default tracing level (0 to disable) +Trace 0 + +# Limits for image allocation +ImageMaxDim 0xffff # Maximum width/height (at most 64k-1) +ImageMaxBytes 256M # Maximum size in bytes + +} + +#if CONFIG_IMAGES_DUP || CONFIG_IMAGES_SIM +######## Image signatures ####################################################### + +ImageSig { + +# To find similar images, Sherlock uses comparison based on regions. +# First of all, the imagesim analyser extracts various region features. +# Sets of these features are called "image signatures" and they are stored +# in the `H' attribute of image objects. + +# Signatures are later processed by the indexer to build an effective +# search structure finally used by the search server. See Indexer and Search +# sections for more options. + +# Minimum image size to apply segmentation. Smaller images are always +# compared by the simple "average" method (see ImageSig.CompareMethod). +MinWidth 16 +MinHeight 16 + +# List of subdivision thresholds in the first phase of segmentation. +# Lower the values to increase the average number of regions and vice versa. +PreQuantThresholds 6 12 15 20 25 25 30 30 40 40 50 50 60 60 60 + +# Settings for the second phase of segmentation -- usually not so important. +# We use an iterative algorithm to improve the average error from the first phase. +# We stop the process after PostQuantMaxSteps or if we get only PostQuantThreshold +# percentual improvement over the previous step. +PostQuantMinSteps 2 +PostQuantMaxSteps 10 +PostQuantThreshold 1 + +# BorderBonus and BorderSize parameters can increase or decrease the weight +# of image pixels near the borders. Weight of all pixels more than BorderSize * MIN(cols, rows) +# pixels far from the edges is 128. Then this value continuously decreses/increses up to 128+BorderSize. +BorderSize 0.4 +BorderBonus -50 + +# Scaling constants for computation of normalized i-th order inertia features (I1, I2, I3). +InertiaScale 2 0.5 0.05 + +# Threshold for detecting textured images (see images/sig-txt.c for details). +# Decrease the threshold if you want less detected textures, +# set it to zero to disable the algorithm completely. +TexturedThreshold 0.32 + +# Signature comparison method: +# +# integrated +# based on: James Z. Wang, Jia Li and Gio Wiederhold, +# "SIMPLIcity: Semantics-Sensitive Integrated Matching for Picture Libraries", +# IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 23, no. 9, pp. 947-963, 2001. +# +# fuzzy (unstable and unbalanced parameters) +# based on: Yixin Chen and James Z. Wang, +# "A Region-Based Fuzzy Feature Matching Approach to Content-Based Image Retrieval", +# IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 24, no. 9, pp. 1252-1267, 2002. +# +# average +# Simple distance of image features averages (ignores segmentation). +# +CompareMethod integrated + +# Array of multiplicative constants in feature vector distance computation +# (L, u, v, LH, HL, HH, I1, I2, I3, X, Y). Each one must be an integer in range 0..15, default is 4. +CompareFeaturesWeights 4 6 6 4 4 4 4 4 4 4 4 + +} + +#endif +#ifdef CONFIG_IMAGES_DUP +######## Duplicate finder ####################################################### + +ImageDup { +# Detection of image duplicates does not work yet. +} +#endif diff --git a/libucw/etc/libucw b/libucw/etc/libucw new file mode 100644 index 0000000..aa7db8b --- /dev/null +++ b/libucw/etc/libucw @@ -0,0 +1,247 @@ +# Configuration of the UCW library and related utilities + +######## Memory Mapped Access to Files ########################################## + +# Whenever you specify 0 for I/O buffer size, memory mapping is used instead. +FBMMap { + +# Map this many bytes at once (needs to be a multiple of CPU page size) +WindowSize 1M + +# When in need to extend a file, grow it by so many bytes (>= page size) +ExtendSize 1M + +} + +######## Direct Streamed I/O on Files ########################################### + +FBDirect { + +# Debug: Cheat by turning off O_DIRECT +#Cheat 1 + +} + +######## Atomic Multi-Threaded I/O on Files ##################################### + +FBAtomic { + +# Enable tracing +#Trace 1 + +} + +######## Parametrized I/O on Files ############################################## + +FBParam { + +Defaults { + +# Access type (std|direct|mmap). +Type std + +# Size of I/O buffer. Something of the order of megabytes for fast disks is recommended for direct I/O. +BufSize 64K + +# Optimize for mixed forward/backward reading (standard I/O only) +KeepBackBuf 0 + +# Perform read-ahead (direct I/O only) +ReadAhead 1 + +# Maximum number of write-back requests queued (direct I/O only) +WriteBack 1 + +} + +} + +######## Temporary files ######################################################## + +Tempfiles { + +# By default, we use the system's default temporary directory ($TMPDIR or /tmp), +# but sometimes it is better to store the temporary files in the local tree. +Dir @INSTALL_VAR_PREFIX@tmp + +# Prefix of temporary file names +Prefix temp- + +# By default, we append a random number to Prefix to get a temporary file name. +# If Prefix points to a directory that is not writable by malicious users, +# we can be less careful and use more consistent names of temporary files +# formed by adding "pid(-tid)-counter" instead. +PublicDir 0 + +} + +######## Threads ################################################################ + +Threads { + +# Default thread stack size +DefaultStackSize 64K + +} + +######## Sorter ################################################################# + +Sorter { + +# Trace sorting (1=basic statistics, 2=more stats, 3 and more for debugging) +Trace 2 + +# Trace array sorting (internal sorters) +TraceArray 0 + +# How much memory is the sorter allowed to use +SortBuffer 4M + +# File access used by the sorter (see FBParam section for details) +FileAccess std 256K + +# Use a different file access method for small inputs (less than the specified size) +SmallFileAccess std 64K +SmallInput 64M + +# Min-/Maximum number of bits to use in the external radix-sort (beware, we will open +# 1+2^this files and require a stream buffer for each of them; however, while we are +# doing that, the sort buffer is not allocated). Set both to zero to disable radix-sorting. +MinRadixBits 2 +MaxRadixBits 4 + +# The same for multi-way merging. The memory requirements are also the same, +# but please keep in mind that this can create lots of SortBuffer-sized files, +# so it is probably better to keep it disabled if you have a small SortBuffer. +MinMultiwayBits 2 +MaxMultiwayBits 4 + +# If we did not use radix-sorter to the full width, we still might add some more +# bits to the width to get chunks which are even smaller than SortBuffer, because +# it can speed up internal sorting later. However, we also want to avoid small +# files, so we add only a little. +AddRadixBits 2 + +# Number of threads used for sorting (0=disable threading) +Threads 0 + +# Minimum size of input (in bytes) to consider multi-threaded internal sorting +ThreadThreshold 1M + +# Chunks smaller than ThreadThreshold are sorted by a sequential algorithm, but +# if they are at least of the following size, different chunks are sorted in +# parallel. There is a slight space penalty for setting up the parallel process, +# so better avoid setting this number too small. +ThreadChunk 256 + +# Internal radix-sort stops at this size and switches to QuickSort (must be >0) +RadixThreshold 4K + +# Debugging switches (see the source) +Debug 0 + +} + +######## URL processing ######################################################### + +URL { + +# Ignore spaces at the start/end of a URL +IgnoreSpaces 1 + +# Ignore underflows in relative paths (/../ from root) +IgnoreUnderflow 1 + +# Some URL's with many repeated components are filtered out to avoid infinite +# URL's (e.g. http://czech.recoder.cz/win/iso/win/iso/file.html, or +# http://a.com/?a=b&a=b&a=b, ...). +# The URL is split to components divided by any of the specified separators. +# Then the separators are forgotten and the components between them are +# examined. +ComponentSeparators /&? + +# URL is filtered out if there's a sequence of components in a row with at most +# MaxRepeatLength components and the sequence is repeated more than MinRepeatCount +# times. Default values are high MinRepeatCount and low MaxRepeatLength, so the +# mechanism is disabled. +MinRepeatCount 4 +MaxRepeatLength 4 + +# Maximum number of occurences of a single component in the entire URL (possibly interleaved +# by different components). The detector is disabled by default. +MaxOccurences 4 + +} + +######## Logging ################################################################ + +Logging { + +# In this section, you can define various logging streams which can be referred to by other sections. + +# Stream { +# # The name of the stream +# Name test-log +# +# # When it should log the messages to a file, a name of the file should be specified. +# # Escape sequences for current date and time as described in strftime(3) can be used. +# FileName log/test-%Y%m%d +# +# # If you need to log to stderr or another already opened descriptor, you can specify its number. +# FileDesc 2 +# +# # Instead of a file, a syslog facility can be specified. See syslog(3) for an explanation. +# SyslogFacility daemon +# +# # You can request that syslog includes a process ID in each message. Due to inflexibility +# # of the syslog protocol, all syslog streams active at a moment must agree on this setting. +# # (default: 0) +# SyslogPID 1 +# +# # When logging to files, timestamps with microsecond precision can be requested. (default: 0) +# Microseconds 1 +# +# # Messages logged to this stream can be restricted to a subset of severity levels. +# # Available levels are: debug info warn error info_r warn_r error_r fatal. +# # This configuration item is a bitmap with a default of "all", so we need the ":reset" operator. +# Levels:reset info warn error fatal +# +# # Similarly, messages can be restricted to a subset of message types. The types are +# # specific for each program. This configuration item is a list of type names; by default +# # it is empty, which is equivalent to all types being enabled. +# Types:reset default foo +# +# # Should the message types be logged? They usually do not carry much useful +# # information for the viewer of the log, so they are not included by default, +# # but you might want to see them when tuning the Types setting. (default: 0) +# ShowTypes 1 +# +# # If an error occurs when logging a message to this stream, the program normally +# # logs a special error message to the other streams and continues running. You can +# # however request to exit the program in such cases, so that the log files are +# # guaranteed to be complete. (default: 0) +# ErrorsFatal 1 +# +# # Let stderr of the program point to this file-based log_stream (default: 0) +# StdErrFollows 1 +# +# # Some events are logworthy, but they could happen too frequently and flood the log. +# # You can avoid the flooding by setting up a rate limiter for a specific subset of +# # message types. If more limiters match the type of a message, only the last one applies. +# Limit { +# # A list of message types (default: empty = all types) +# Types default foo +# +# # The maximum allowed sustained rate (messages/second, may be fractional) +# Rate 1 +# +# # Maximum length of a burst temporarily exceeding the rate (default: try to guess) +# Burst 2 +# } +# +# # The messages that have passed the filters and limiters can be forwarded to other +# # log streams. Logging loops are not healthy for your program :) (a list of stream names) +# Substream another-stream +# } + +} diff --git a/libucw/examples/external-ucw-build/Makefile b/libucw/examples/external-ucw-build/Makefile new file mode 100644 index 0000000..5715c25 --- /dev/null +++ b/libucw/examples/external-ucw-build/Makefile @@ -0,0 +1,34 @@ +# Example Makefile for a stand-alone program using the libucw build system +# (c) 2007 Martin Mares +# (c) 2008 Michal Vaner + +# The default target +all: runtree programs + +# Include configuration +s=. +-include obj/config.mk +obj/config.mk: + @echo "You need to run configure first." && false + +# Do not show strange errors if the BUILDSYS is not set +# (it happens if noone called configure as reported above) +ifdef BUILDSYS + +# We will use the libucw build system +include $(BUILDSYS)/Maketop + +# Add the detected flags to all the global flags +CFLAGS+=$(LIBUCW_CFLAGS) +LIBS+=$(LIBUCW_LIBS) + +# Programs we want to compile +PROGS+=$(o)/test + +# And how they are created +$(o)/test: $(o)/test.o $(LIBUCW) + +# And finally the default rules of the build system +include $(BUILDSYS)/Makebottom + +endif diff --git a/libucw/examples/external-ucw-build/configure b/libucw/examples/external-ucw-build/configure new file mode 100755 index 0000000..ec4b279 --- /dev/null +++ b/libucw/examples/external-ucw-build/configure @@ -0,0 +1,46 @@ +#!/usr/bin/perl +# Configure script for the libucw example (inspired by ../external/configure) +# (c) 2008 Michal Vaner + +use warnings; +use strict; + +our($srcdir, $libdir); +BEGIN { + # Find the sources + my $pkgfile = "test.c"; + if (!defined ($srcdir = $ENV{"SRCDIR"})) { + if (-f $pkgfile) { + $srcdir="."; + } elsif ($0 =~ m@^(.*)/configure$@ && -f "$1/$pkgfile") { + $srcdir=$1; + } else { + die "Don't know how to find myself. Please set SRCDIR manually.\n"; + } + } + # Ask pkg-config if libucw is installed and find its configure modules + `pkg-config libucw --atleast-version=3.13`; + !$? or die "Package `libucw' (version 3.13 or newer) not found. Is PKG_CONFIG_PATH set properly?\n"; + $libdir=`pkg-config libucw --variable=perl_modules_dir`; + chomp $libdir; + die "Unable to find the libucw configure system\n" if $? || not defined $libdir; +} +use lib $libdir; +use UCW::Configure; + +Init($srcdir, 'default.cfg'); +Log "### Configuring TestApp ###\n\n"; +Include Get("CONFIG"); +# What should be detected? +require UCW::Configure::Build; +require UCW::Configure::Paths; +require UCW::Configure::C; +require UCW::Configure::Pkg; +# You could generate your own documentation, too +# require UCW::Configure::Doc; + +# Get some libraries +UCW::Configure::Pkg::PkgConfig("libucw") or Fail("libUCW is required"); +Finish(); + +Log "\nConfigured, run `make' to build everything.\n"; diff --git a/libucw/examples/external-ucw-build/default.cfg b/libucw/examples/external-ucw-build/default.cfg new file mode 100644 index 0000000..48f5ec2 --- /dev/null +++ b/libucw/examples/external-ucw-build/default.cfg @@ -0,0 +1,5 @@ + +# You can specify default configuration here: +# Set("SOME_SYMBOL"); + +1; diff --git a/libucw/examples/external-ucw-build/test.c b/libucw/examples/external-ucw-build/test.c new file mode 100644 index 0000000..19a2685 --- /dev/null +++ b/libucw/examples/external-ucw-build/test.c @@ -0,0 +1,8 @@ +#include + +int main(void) +{ + log_init("test"); + msg(L_INFO, "Hoooot!"); + return 0; +} diff --git a/libucw/examples/external/Makefile b/libucw/examples/external/Makefile new file mode 100644 index 0000000..0cb26a1 --- /dev/null +++ b/libucw/examples/external/Makefile @@ -0,0 +1,8 @@ +# Example Makefile for a stand-alone program using libucw + +CFLAGS:=$(shell pkg-config --cflags libucw) +LDLIBS:=$(shell pkg-config --libs libucw) + +all: test + +test: test.c diff --git a/libucw/examples/external/test.c b/libucw/examples/external/test.c new file mode 100644 index 0000000..28e45c8 --- /dev/null +++ b/libucw/examples/external/test.c @@ -0,0 +1,8 @@ +#include + +int main(void) +{ + log_init("test"); + msg(L_INFO, "Hoooot!"); + return 0; +} diff --git a/libucw/examples/internal/Makefile b/libucw/examples/internal/Makefile new file mode 100644 index 0000000..817029d --- /dev/null +++ b/libucw/examples/internal/Makefile @@ -0,0 +1,35 @@ +# Example Makefile for a stand-alone program using the libucw build system +# (c) 2007 Martin Mares + +# The default target +all: runtree programs + +# Include configuration +s=. +-include obj/config.mk +obj/config.mk: + @echo "You need to run configure first." && false + +BUILDSYS=$(s)/build + +# We will use the libucw build system +include $(BUILDSYS)/Maketop + +# Set up names of common libraries (to avoid forward references in rules) +LIBCHARSET=$(o)/charset/libucw-charset.pc +LIBIMAGES=$(o)/images/libucw-images.pc + +# Include makefiles of libraries we wish to use +include $(s)/ucw/Makefile +include $(s)/charset/Makefile +include $(s)/images/Makefile + +# Programs we want to compile +PROGS+=$(o)/test +$(o)/test: $(o)/test.o $(LIBUCW) $(LIBCHARSET) $(LIBIMAGES) + +# All tests (%-t) get automatically linked with libucw +TESTING_DEPS=$(LIBUCW) + +# And finally the default rules of the build system +include $(BUILDSYS)/Makebottom diff --git a/libucw/examples/internal/configure b/libucw/examples/internal/configure new file mode 100755 index 0000000..d914789 --- /dev/null +++ b/libucw/examples/internal/configure @@ -0,0 +1,34 @@ +#!/usr/bin/perl +# Configure script for the libucw example +# (c) 2007 Martin Mares + +use warnings; +use strict; + +our $srcdir; +BEGIN { + my $pkgfile = "ucw/wildmatch.c"; + if (!defined ($srcdir = $ENV{"SRCDIR"})) { + if (-f $pkgfile) { + $srcdir="."; + } elsif ($0 =~ m@^(.*)/configure$@ && -f "$1/$pkgfile") { + $srcdir=$1; + } else { + die "Don't know how to find myself. Please set SRCDIR manually.\n"; + } + } +} + +use lib "$srcdir/ucw/perl/"; +use UCW::Configure; + +Init($srcdir, "default.cfg"); +Include "ucw/default.cfg"; +Log "### Configuring TestApp ###\n\n"; +Include Get("CONFIG"); +require UCW::Configure::Paths; +require UCW::Configure::C; +require UCW::Configure::LibUCW; +Finish(); + +Log "\nConfigured, run `make' to build everything.\n"; diff --git a/libucw/examples/internal/default.cfg b/libucw/examples/internal/default.cfg new file mode 100644 index 0000000..984be67 --- /dev/null +++ b/libucw/examples/internal/default.cfg @@ -0,0 +1,18 @@ +# Default configuration file for our test application + +# Do a local build +Set("CONFIG_LOCAL"); + +# We want to build all libraries shared +Set("CONFIG_SHARED"); + +# Libucw-images settings +Set("CONFIG_IMAGES"); +Set("CONFIG_IMAGES_LIBJPEG"); +Set("CONFIG_IMAGES_LIBPNG"); +Set("CONFIG_IMAGES_LIBUNGIF"); +UnSet("CONFIG_IMAGES_LIBGIF"); +UnSet("CONFIG_IMAGES_LIBMAGICK"); + +# Return success +1; diff --git a/libucw/examples/internal/test.c b/libucw/examples/internal/test.c new file mode 100644 index 0000000..28e45c8 --- /dev/null +++ b/libucw/examples/internal/test.c @@ -0,0 +1,8 @@ +#include + +int main(void) +{ + log_init("test"); + msg(L_INFO, "Hoooot!"); + return 0; +} diff --git a/libucw/examples/openwrt-package/Makefile b/libucw/examples/openwrt-package/Makefile new file mode 100644 index 0000000..f8c76ff --- /dev/null +++ b/libucw/examples/openwrt-package/Makefile @@ -0,0 +1,45 @@ +# Package Makefile for OpenWRT + +include $(TOPDIR)/rules.mk + +PKG_NAME:=libucw +PKG_VERSION:=6.5.9 +PKG_RELEASE:=1 + +PKG_BUILD_DIR:=$(BUILD_DIR)/libucw-$(PKG_VERSION) +PKG_SOURCE:=libucw-$(PKG_VERSION).tar.gz +PKG_SOURCE_SUBDIR=libucw-$(PKG_VERSION) +PKG_SOURCE_PROTO:=git +PKG_SOURCE_URL:=git://git.ucw.cz/libucw.git +# PKG_SOURCE_VERSION:=v$(PKG_VERSION) +PKG_SOURCE_VERSION:=9ef73a67dd942f6e369c2719847d2cc35e920c88 + +include $(INCLUDE_DIR)/package.mk + +define Package/libucw + SECTION:=ucw + CATEGORY:=Libraries + TITLE:=The UCW Library + URL:=http://www.ucw.cz/libucw/ + DEPENDS:=+libpthread +librt +endef + +define Package/libucw/description + The UCW library aims to provide a set general purpose tools for programming in + the C language. In other words, to make writing of complex and very efficient + programs in plain C a pleasure. +endef + +define Build/Configure + ( cd $(PKG_BUILD_DIR) && ./configure PREFIX=/ CC=$(TARGET_CC) AR=$(TARGET_AR) OS=Linux -CONFIG_DEBUG -CONFIG_UCW_PERL -CONFIG_UCW_PERL_MODULES -CONFIG_UCW_SHELL_UTILS -CONFIG_UCW_UTILS -CONFIG_CHARSET -CONFIG_JSON -CONFIG_XML ) +endef + +define Build/InstallDev + make -C $(PKG_BUILD_DIR) DESTDIR=$(1) install-libucw-lib install-libucw-api +endef + +define Package/libucw/install + make -C $(PKG_BUILD_DIR) DESTDIR=$(1) install-libucw-lib +endef + +$(eval $(call BuildPackage,libucw)) diff --git a/libucw/images/Makefile b/libucw/images/Makefile new file mode 100644 index 0000000..d418f75 --- /dev/null +++ b/libucw/images/Makefile @@ -0,0 +1,120 @@ +# Makefile for the Image Library (c) 2006 Pavel Charvat + +DIRS+=images + +LIBIMAGES_PROGS=$(o)/images/ucw-image-tool $(o)/images/ucw-color-tool +LIBIMAGES_CONFIGS+=images +LIBIMAGES_MODS=math config context image scale color io-main +LIBIMAGES_INCLUDES=images.h error.h color.h math.h +export LIBIMAGES_LIBS=-lm + +ifdef CONFIG_SHERLOCK +LIBIMAGES_MODS+=object +LIBIMAGES_DEPS=$(LIBSH) +else +LIBIMAGES_DEPS=$(LIBUCW) +endif + +ifdef CONFIG_INSTALL_API +$(o)/images/libucw-images.pc: $(addprefix $(o)/images/libucw-images$(LV),.a .so) +endif + +ifdef CONFIG_IMAGES_DUP +LIBIMAGES_PROGS+=$(o)/images/ucw-image-dup-test +LIBIMAGES_MODS+=dup-init dup-cmp +LIBIMAGES_INCLUDES+=duplicates.h +endif +ifdef CONFIG_IMAGES_SIM +LIBIMAGES_PROGS+=$(o)/images/ucw-image-sim-test +LIBIMAGES_MODS+=sig-cmp +endif +ifneq ($(CONFIG_IMAGES_DUP)$(CONFIG_IMAGES_SIM),) +LIBIMAGES_MODS+=sig-dump sig-init sig-seg sig-txt +LIBIMAGES_INCLUDES+=signature.h +endif + +ifdef CONFIG_IMAGES_LIBJPEG +LIBIMAGES_MODS+=io-libjpeg +LIBIMAGES_LIBS+=-ljpeg +endif + +ifdef CONFIG_IMAGES_LIBPNG +LIBIMAGES_MODS+=io-libpng +LIBIMAGES_LIBS+=-lpng +endif + +ifdef CONFIG_IMAGES_LIBUNGIF +LIBIMAGES_MODS+=io-libungif +LIBIMAGES_LIBS+=-lungif +else +ifdef CONFIG_IMAGES_LIBGIF +LIBIMAGES_MODS+=io-libungif +LIBIMAGES_LIBS+=-lgif +endif +endif + +ifdef CONFIG_IMAGES_LIBMAGICK +LIBIMAGES_MODS+=io-libmagick +MAGICK_LIBS:=$(shell GraphicsMagick-config --libs) +MAGICK_CPPFLAGS:=$(shell GraphicsMagick-config --cppflags) +LIBIMAGES_LIBS+=$(MAGICK_LIBS) -lpthread +$(addprefix $(o)/images/io-libmagick,.o .oo): CFLAGS+=$(MAGICK_CPPFLAGS) -Wno-redundant-decls -Wno-undef +endif + +PROGS+=$(LIBIMAGES_PROGS) +CONFIGS+=$(LIBIMAGES_CONFIGS) + +$(o)/images/libucw-images$(LV).a: $(addsuffix .o,$(addprefix $(o)/images/,$(LIBIMAGES_MODS))) +$(o)/images/libucw-images$(LV).so: $(addsuffix .oo,$(addprefix $(o)/images/,$(LIBIMAGES_MODS))) $(LIBIMAGES_DEPS) +$(o)/images/libucw-images$(LV).so: SONAME_SUFFIX=.0 +$(o)/images/libucw-images$(LV).so: LIBS+=$(LIBIMAGES_LIBS) +$(o)/images/libucw-images.pc: $(LIBIMAGES_DEPS) + +$(o)/images/ucw-image-tool: $(o)/images/ucw-image-tool.o $(LIBIMAGES) $(LIBUCW) +$(o)/images/ucw-color-tool: $(o)/images/ucw-color-tool.o $(LIBIMAGES) $(LIBUCW) +$(o)/images/ucw-image-dup-test: $(o)/images/ucw-image-dup-test.o $(LIBIMAGES) $(LIBUCW) +$(o)/images/ucw-image-sim-test: $(o)/images/ucw-image-sim-test.o $(LIBIMAGES) $(LIBUCW) + +TESTS+=$(o)/images/image-test.test +$(o)/images/image-test: $(o)/images/image-test.o $(LIBIMAGES) $(LIBUCW) +$(o)/images/image-test: LIBS+=-lpthread +$(o)/images/image-test.test: $(o)/images/image-test + +TESTS+=$(o)/images/color.test +$(o)/images/color-t: $(LIBIMAGES) $(LIBUCW) +$(o)/images/color-t: LIBS+=-lm +$(o)/images/color.test: $(o)/images/color-t + +API_LIBS+=libucw-images +API_INCLUDES+=$(o)/images/.include-stamp +$(o)/images/.include-stamp: $(addprefix $(s)/images/,$(LIBIMAGES_INCLUDES)) +$(o)/images/.include-stamp: IDST=images +run/lib/pkgconfig/libucw-images.pc: $(o)/images/libucw-images.pc + +INSTALL_TARGETS+=install-libucw-images-lib +install-libucw-images-lib: + install -d -m 755 $(DESTDIR)$(INSTALL_LIB_DIR) + install -m 644 run/lib/libucw-images$(LV).so.0 $(DESTDIR)$(INSTALL_LIB_DIR)/libucw-images$(LV).so.0.0 + ln -sf libucw-images$(LV).so.0.0 $(DESTDIR)$(INSTALL_LIB_DIR)/libucw-images$(LV).so.0 +.PHONY: install-libucw-images-lib + +INSTALL_TARGETS+=install-libucw-images-api +install-libucw-images-api: + install -d -m 755 $(addprefix $(DESTDIR),$(INSTALL_INCLUDE_DIR)/images $(INSTALL_LIB_DIR) $(INSTALL_PKGCONFIG_DIR)) + install -m 644 $(addprefix run/include/images/,$(LIBIMAGES_INCLUDES)) $(DESTDIR)$(INSTALL_INCLUDE_DIR)/images + install -m 644 run/lib/pkgconfig/libucw-images.pc $(DESTDIR)$(INSTALL_PKGCONFIG_DIR) + ln -sf libucw-images$(LV).so.0.0 $(DESTDIR)$(INSTALL_LIB_DIR)/libucw-images$(LV).so + install -m 644 run/lib/libucw-images$(LV).a $(DESTDIR)$(INSTALL_LIB_DIR) +.PHONY: install-libucw-images-api + +INSTALL_TARGETS+=install-libucw-images-utils +install-libucw-images-utils: + install -d -m 755 $(DESTDIR)$(INSTALL_BIN_DIR) + install -m 755 $(LIBIMAGES_PROGS) $(DESTDIR)$(INSTALL_BIN_DIR) +.PHONY: install-libucw-images-utils + +INSTALL_TARGETS+=install-libucw-images-config +install-libucw-images-config: + install -d -m 755 $(DESTDIR)$(INSTALL_CONFIG_DIR) + install -m 644 $(addprefix run/$(CONFIG_DIR)/,$(LIBIMAGES_CONFIGS)) $(DESTDIR)$(INSTALL_CONFIG_DIR) +.PHONY: install-libucw-images-config diff --git a/libucw/images/color.c b/libucw/images/color.c new file mode 100644 index 0000000..50c38c6 --- /dev/null +++ b/libucw/images/color.c @@ -0,0 +1,1369 @@ +/* + * Image Library -- Color Spaces + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include +#include +#include + +#include +#include + +uint color_space_channels[COLOR_SPACE_MAX] = { + [COLOR_SPACE_UNKNOWN] = 0, + [COLOR_SPACE_UNKNOWN_1] = 1, + [COLOR_SPACE_UNKNOWN_2] = 2, + [COLOR_SPACE_UNKNOWN_3] = 3, + [COLOR_SPACE_UNKNOWN_4] = 4, + [COLOR_SPACE_GRAYSCALE] = 1, + [COLOR_SPACE_RGB] = 3, + [COLOR_SPACE_XYZ] = 3, + [COLOR_SPACE_LAB] = 3, + [COLOR_SPACE_YCBCR] = 3, + [COLOR_SPACE_CMYK] = 4, + [COLOR_SPACE_YCCK] = 4, +}; + +byte *color_space_name[COLOR_SPACE_MAX] = { + [COLOR_SPACE_UNKNOWN] = "Unknown", + [COLOR_SPACE_UNKNOWN_1] = "1-channel", + [COLOR_SPACE_UNKNOWN_2] = "2-channels", + [COLOR_SPACE_UNKNOWN_3] = "3-channels", + [COLOR_SPACE_UNKNOWN_4] = "4-channels", + [COLOR_SPACE_GRAYSCALE] = "Grayscale", + [COLOR_SPACE_RGB] = "RGB", + [COLOR_SPACE_XYZ] = "XYZ", + [COLOR_SPACE_LAB] = "LAB", + [COLOR_SPACE_YCBCR] = "YCbCr", + [COLOR_SPACE_CMYK] = "CMYK", + [COLOR_SPACE_YCCK] = "YCCK", +}; + +byte * +color_space_id_to_name(uint id) +{ + ASSERT(id < COLOR_SPACE_MAX); + return color_space_name[id]; +} + +uint +color_space_name_to_id(byte *name) +{ + for (uint i = 1; i < COLOR_SPACE_MAX; i++) + if (color_space_name[i] && !strcasecmp(name, color_space_name[i])) + return i; + return 0; +} + +struct color color_black = { .color_space = COLOR_SPACE_GRAYSCALE }; +struct color color_white = { .c = { 255 }, .color_space = COLOR_SPACE_GRAYSCALE }; + +int +color_get(struct color *color, byte *src, uint src_space) +{ + color->color_space = src_space; + memcpy(color->c, src, color_space_channels[src_space]); + return 1; +} + +int +color_put(struct image_context *ctx, struct color *color, byte *dest, uint dest_space) +{ + switch (dest_space) + { + case COLOR_SPACE_GRAYSCALE: + switch (color->color_space) + { + case COLOR_SPACE_GRAYSCALE: + dest[0] = color->c[0]; + return 1; + case COLOR_SPACE_RGB: + dest[0] = rgb_to_gray_func(color->c[0], color->c[1], color->c[2]); + return 1; + } + break; + case COLOR_SPACE_RGB: + switch (color->color_space) + { + case COLOR_SPACE_GRAYSCALE: + dest[0] = dest[1] = dest[2] = color->c[0]; + return 1; + case COLOR_SPACE_RGB: + dest[0] = color->c[0]; + dest[1] = color->c[1]; + dest[2] = color->c[2]; + return 1; + case COLOR_SPACE_CMYK: + { + double rgb[3], cmyk[4]; + for (uint i = 0; i < 4; i++) + cmyk[i] = color->c[i] * (1.0 / 255); + cmyk_to_rgb_exact(rgb, cmyk); + for (uint i = 0; i < 3; i++) + dest[i] = CLAMP(rgb[i] * 255, 0, 255); + } + return 1; + } + break; + case COLOR_SPACE_CMYK: + switch (color->color_space) + { + case COLOR_SPACE_GRAYSCALE: + dest[0] = dest[1] = dest[2] = 0; + dest[3] = 255 - color->c[0]; + return 1; + case COLOR_SPACE_RGB: + { + double rgb[3], cmyk[4]; + for (uint i = 0; i < 3; i++) + rgb[i] = color->c[i] * (1.0 / 255); + rgb_to_cmyk_exact(cmyk, rgb); + for (uint i = 0; i < 4; i++) + dest[i] = CLAMP(cmyk[i] * 255, 0, 255); + } + return 1; + } + break; + } + if (dest_space != COLOR_SPACE_RGB ) + { + /* Try to convert the color via RGB */ + struct color rgb; + if (!color_put(ctx, color, rgb.c, COLOR_SPACE_RGB)) + return 0; + rgb.color_space = COLOR_SPACE_RGB; + return color_put(ctx, &rgb, dest, dest_space); + } + IMAGE_ERROR(ctx, IMAGE_ERROR_INVALID_PIXEL_FORMAT, "Conversion from %s to %s is not supported", + color_space_id_to_name(color->color_space), color_space_id_to_name(color->color_space)); + return 0; +} + + +/********************* IMAGE CONVERSION ROUTINES **********************/ + +struct image_conv_options image_conv_defaults = { + .flags = IMAGE_CONV_COPY_ALPHA | IMAGE_CONV_FILL_ALPHA | IMAGE_CONV_APPLY_ALPHA, + .background = { .color_space = COLOR_SPACE_GRAYSCALE } }; + +/* Grayscale <-> RGB */ + +#define IMAGE_WALK_PREFIX(x) walk_##x +#define IMAGE_WALK_FUNC_NAME image_conv_gray_1_to_rgb_n +#define IMAGE_WALK_DOUBLE +#define IMAGE_WALK_SEC_COL_STEP 1 +#define IMAGE_WALK_UNROLL 4 +#define IMAGE_WALK_DO_STEP do{ walk_pos[0] = walk_pos[1] = walk_pos[2] = walk_sec_pos[0]; }while(0) +#include + +#define IMAGE_WALK_PREFIX(x) walk_##x +#define IMAGE_WALK_FUNC_NAME image_conv_rgb_n_to_gray_1 +#define IMAGE_WALK_DOUBLE +#define IMAGE_WALK_COL_STEP 1 +#define IMAGE_WALK_UNROLL 2 +#define IMAGE_WALK_DO_STEP do{ walk_pos[0] = rgb_to_gray_func(walk_sec_pos[0], walk_sec_pos[1], walk_sec_pos[2]); }while(0) +#include + +/* Grayscale <-> YCbCr */ + +#define IMAGE_WALK_PREFIX(x) walk_##x +#define IMAGE_WALK_FUNC_NAME image_conv_gray_1_to_ycbcr_n +#define IMAGE_WALK_DOUBLE +#define IMAGE_WALK_SEC_COL_STEP 1 +#define IMAGE_WALK_UNROLL 4 +#define IMAGE_WALK_DO_STEP do{ walk_pos[0] = walk_sec_pos[0]; walk_pos[1] = walk_pos[2] = 0; }while(0) +#include + +#define IMAGE_WALK_PREFIX(x) walk_##x +#define IMAGE_WALK_FUNC_NAME image_conv_ycbcr_n_to_gray_1 +#define IMAGE_WALK_DOUBLE +#define IMAGE_WALK_COL_STEP 1 +#define IMAGE_WALK_UNROLL 4 +#define IMAGE_WALK_DO_STEP do{ walk_pos[0] = walk_sec_pos[0]; }while(0) +#include + +/* YCbCr <-> RGB */ + +static inline void +pixel_conv_ycbcr_to_rgb(byte *dest, byte *src) +{ + /* R = Y + 1.40200 * Cr + * G = Y - 0.34414 * Cb - 0.71414 * Cr + * B = Y + 1.77200 * Cb */ + int y = src[0], cb = src[1] - 128, cr = src[2] - 128; + dest[0] = CLAMP(y + (91881 * cr) / 0x10000, 0, 255); + dest[1] = CLAMP(y - (22553 * cb + 46801 * cr) / 0x10000, 0, 255); + dest[2] = CLAMP(y + (116129 * cb) / 0x10000, 0, 255); +} + +#define IMAGE_WALK_PREFIX(x) walk_##x +#define IMAGE_WALK_FUNC_NAME image_conv_ycbcr_n_to_rgb_n +#define IMAGE_WALK_DOUBLE +#define IMAGE_WALK_DO_STEP do{ pixel_conv_ycbcr_to_rgb(walk_pos, walk_sec_pos); }while(0) +#include + +static inline void +pixel_conv_rgb_to_ycbcr(byte *dest, byte *src) +{ + /* Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + * Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTER + * Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTER */ + uint r = src[0], g = src[1], b = src[2]; + dest[0] = (19595 * r + 38470 * g + 7471 * b) / 0x10000; + dest[1] = (0x800000 + 0x8000 * b - 11058 * r - 21710 * g) / 0x10000; + dest[2] = (0x800000 + 0x8000 * r - 27439 * g - 5329 * b) / 0x10000; +} + +#define IMAGE_WALK_PREFIX(x) walk_##x +#define IMAGE_WALK_FUNC_NAME image_conv_rgb_n_to_ycbcr_n +#define IMAGE_WALK_DOUBLE +#define IMAGE_WALK_DO_STEP do{ pixel_conv_rgb_to_ycbcr(walk_pos, walk_sec_pos); }while(0) +#include + +/* CMYK <-> RGB */ + +static inline void +pixel_conv_cmyk_to_rgb(byte *dest, byte *src) +{ + uint d = (255 - src[3]) * (0xffffffffU / 255 /255); + dest[0] = d * (255 - src[0]) >> 24; + dest[1] = d * (255 - src[1]) >> 24; + dest[2] = d * (255 - src[2]) >> 24; +} + +#define IMAGE_WALK_PREFIX(x) walk_##x +#define IMAGE_WALK_FUNC_NAME image_conv_cmyk_4_to_rgb_n +#define IMAGE_WALK_DOUBLE +#define IMAGE_WALK_SEC_COL_STEP 4 +#define IMAGE_WALK_DO_STEP do{ pixel_conv_cmyk_to_rgb(walk_pos, walk_sec_pos); }while(0) +#include + +static inline void +pixel_conv_rgb_to_cmyk(byte *dest, byte *src) +{ + uint k = MAX(src[0], src[1]); + k = MAX(k, src[2]); + uint d = fast_div_u32_u8(0x7fffffffU, k); /* == 0 for zero K */ + dest[0] = (d * (k - src[0])) >> 23; + dest[1] = (d * (k - src[1])) >> 23; + dest[2] = (d * (k - src[2])) >> 23; + dest[3] = 255 - k; +} + +#define IMAGE_WALK_PREFIX(x) walk_##x +#define IMAGE_WALK_FUNC_NAME image_conv_rgb_n_to_cmyk_4 +#define IMAGE_WALK_DOUBLE +#define IMAGE_WALK_COL_STEP 4 +#define IMAGE_WALK_DO_STEP do{ pixel_conv_rgb_to_cmyk(walk_pos, walk_sec_pos); }while(0) +#include + +/* CMYK <-> YCbCr */ + +#define IMAGE_WALK_PREFIX(x) walk_##x +#define IMAGE_WALK_FUNC_NAME image_conv_cmyk_4_to_ycbcr_n +#define IMAGE_WALK_DOUBLE +#define IMAGE_WALK_SEC_COL_STEP 4 +#define IMAGE_WALK_DO_STEP do{ pixel_conv_cmyk_to_rgb(walk_pos, walk_sec_pos); pixel_conv_rgb_to_ycbcr(walk_pos, walk_pos); }while(0) +#include + +#define IMAGE_WALK_PREFIX(x) walk_##x +#define IMAGE_WALK_FUNC_NAME image_conv_ycbcr_n_to_cmyk_4 +#define IMAGE_WALK_DOUBLE +#define IMAGE_WALK_COL_STEP 4 +#define IMAGE_WALK_DO_STEP do{ pixel_conv_ycbcr_to_rgb(walk_pos, walk_sec_pos); pixel_conv_rgb_to_cmyk(walk_pos, walk_pos); }while(0) +#include + +/* YCCK <-> RGB */ + +static inline void +pixel_conv_ycck_to_rgb(byte *dest, byte *src) +{ + int y = src[0], cb = src[1] - 128, cr = src[2] - 128; + uint d = (255 - src[3]) * (0xffffffffU / 255 /255); + dest[0] = (d * CLAMP(y + (91881 * cr) / 0x10000, 0, 255) >> 24); + dest[1] = (d * CLAMP(y - (22553 * cb + 46801 * cr) / 0x10000, 0, 255) >> 24); + dest[2] = (d * CLAMP(y + (116129 * cb) / 0x10000, 0, 255) >> 24); +} + +#define IMAGE_WALK_PREFIX(x) walk_##x +#define IMAGE_WALK_FUNC_NAME image_conv_ycck_4_to_rgb_n +#define IMAGE_WALK_DOUBLE +#define IMAGE_WALK_SEC_COL_STEP 4 +#define IMAGE_WALK_DO_STEP do{ pixel_conv_ycck_to_rgb(walk_pos, walk_sec_pos); }while(0) +#include + +static inline void +pixel_conv_rgb_to_ycck(byte *dest, byte *src) +{ + uint k = MAX(src[0], src[1]); + k = MAX(k, src[2]); + uint d = fast_div_u32_u8(0x7fffffffU, k); /* == 0 for zero K */ + uint r = 255 - ((d * (k - src[0])) >> 23); + uint g = 255 - ((d * (k - src[1])) >> 23); + uint b = 255 - ((d * (k - src[2])) >> 23); + dest[0] = (19595 * r + 38470 * g + 7471 * b) / 0x10000; + dest[1] = (0x800000 + 0x8000 * b - 11058 * r - 21710 * g) / 0x10000; + dest[2] = (0x800000 + 0x8000 * r - 27439 * g - 5329 * b) / 0x10000; + dest[3] = 255 - k; +} + +#define IMAGE_WALK_PREFIX(x) walk_##x +#define IMAGE_WALK_FUNC_NAME image_conv_rgb_n_to_ycck_4 +#define IMAGE_WALK_DOUBLE +#define IMAGE_WALK_COL_STEP 4 +#define IMAGE_WALK_DO_STEP do{ pixel_conv_rgb_to_ycck(walk_pos, walk_sec_pos); }while(0) +#include + +/* YCCK <-> YCbCr */ + +#define IMAGE_WALK_PREFIX(x) walk_##x +#define IMAGE_WALK_FUNC_NAME image_conv_ycck_4_to_ycbcr_n +#define IMAGE_WALK_DOUBLE +#define IMAGE_WALK_SEC_COL_STEP 4 +#define IMAGE_WALK_DO_STEP do{ pixel_conv_ycck_to_rgb(walk_pos, walk_sec_pos); pixel_conv_rgb_to_ycbcr(walk_pos, walk_pos); }while(0) +#include + +#define IMAGE_WALK_PREFIX(x) walk_##x +#define IMAGE_WALK_FUNC_NAME image_conv_ycbcr_n_to_ycck_4 +#define IMAGE_WALK_DOUBLE +#define IMAGE_WALK_COL_STEP 4 +#define IMAGE_WALK_DO_STEP do{ pixel_conv_ycbcr_to_rgb(walk_pos, walk_sec_pos); pixel_conv_rgb_to_ycck(walk_pos, walk_pos); }while(0) +#include + +/* Main functions */ + +static int +image_conv_color_space(struct image_context *ctx UNUSED, struct image *dest, struct image *src, struct image_conv_options *opt UNUSED) +{ + switch (dest->flags & IMAGE_COLOR_SPACE) + { + case COLOR_SPACE_GRAYSCALE: + switch (src->flags & IMAGE_COLOR_SPACE) + { + case COLOR_SPACE_RGB: + if (dest->pixel_size == 1) + { + image_conv_rgb_n_to_gray_1(dest, src); + return 1; + } + break; + case COLOR_SPACE_YCBCR: + if (dest->pixel_size == 1) + { + image_conv_ycbcr_n_to_gray_1(dest, src); + return 1; + } + break; + } + break; + case COLOR_SPACE_RGB: + switch (src->flags & IMAGE_CHANNELS_FORMAT) + { + case COLOR_SPACE_GRAYSCALE: + if (src->pixel_size == 1) + { + image_conv_gray_1_to_rgb_n(dest, src); + return 1; + } + break; + case COLOR_SPACE_YCBCR: + image_conv_ycbcr_n_to_rgb_n(dest, src); + return 1; + case COLOR_SPACE_CMYK: + if (src->pixel_size == 4) + { + image_conv_cmyk_4_to_rgb_n(dest, src); + return 1; + } + break; + case COLOR_SPACE_YCCK: + if (src->pixel_size == 4) + { + image_conv_ycck_4_to_rgb_n(dest, src); + return 1; + } + break; + } + break; + case COLOR_SPACE_YCBCR: + switch (src->flags & IMAGE_CHANNELS_FORMAT) + { + case COLOR_SPACE_GRAYSCALE: + if (src->pixel_size == 1) + { + image_conv_gray_1_to_ycbcr_n(dest, src); + return 1; + } + break; + case COLOR_SPACE_RGB: + image_conv_rgb_n_to_ycbcr_n(dest, src); + return 1; + case COLOR_SPACE_CMYK: + if (src->pixel_size == 4) + { + image_conv_cmyk_4_to_ycbcr_n(dest, src); + return 1; + } + break; + case COLOR_SPACE_YCCK: + if (src->pixel_size == 4) + { + image_conv_ycck_4_to_ycbcr_n(dest, src); + return 1; + } + break; + } + break; + case COLOR_SPACE_CMYK: + switch (src->flags & IMAGE_CHANNELS_FORMAT) + { + case COLOR_SPACE_RGB: + if (dest->pixel_size == 4) + { + image_conv_rgb_n_to_cmyk_4(dest, src); + return 1; + } + break; + case COLOR_SPACE_YCBCR: + if (dest->pixel_size == 4) + { + image_conv_ycbcr_n_to_cmyk_4(dest, src); + return 1; + } + break; + } + break; + case COLOR_SPACE_YCCK: + switch (src->flags & IMAGE_CHANNELS_FORMAT) + { + case COLOR_SPACE_RGB: + if (dest->pixel_size == 4) + { + image_conv_rgb_n_to_ycck_4(dest, src); + return 1; + } + break; + case COLOR_SPACE_YCBCR: + if (dest->pixel_size == 4) + { + image_conv_ycbcr_n_to_ycck_4(dest, src); + return 1; + } + break; + } + break; + } + return 0; +} + +static void +image_conv_copy(struct image *dest, struct image *src) +{ + if (dest->pixels == src->pixels) + return; + else if (dest->pixel_size != src->pixel_size) + { + uint channels = MIN(dest->channels, src->channels); + switch (channels) + { + case 1: + { +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_DOUBLE +# define IMAGE_WALK_IMAGE dest +# define IMAGE_WALK_SEC_IMAGE src +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_DO_STEP do{ walk_pos[0] = walk_sec_pos[0]; }while(0) +# include + } + return; + case 2: +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_DOUBLE +# define IMAGE_WALK_IMAGE dest +# define IMAGE_WALK_SEC_IMAGE src +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_DO_STEP do{ walk_pos[0] = walk_sec_pos[0]; walk_pos[1] = walk_sec_pos[1]; }while(0) +# include + return; + case 3: +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_DOUBLE +# define IMAGE_WALK_IMAGE dest +# define IMAGE_WALK_SEC_IMAGE src +# define IMAGE_WALK_UNROLL 2 +# define IMAGE_WALK_DO_STEP do{ walk_pos[0] = walk_sec_pos[0]; walk_pos[1] = walk_sec_pos[1]; walk_pos[2] = walk_sec_pos[2]; }while(0) +# include + return; + case 4: +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_DOUBLE +# define IMAGE_WALK_IMAGE dest +# define IMAGE_WALK_SEC_IMAGE src +# define IMAGE_WALK_UNROLL 2 +# define IMAGE_WALK_DO_STEP do{ walk_pos[0] = walk_sec_pos[0]; walk_pos[1] = walk_sec_pos[1]; walk_pos[2] = walk_sec_pos[2]; walk_pos[3] = walk_sec_pos[3]; }while(0) +# include + return; + default: +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_DOUBLE +# define IMAGE_WALK_IMAGE dest +# define IMAGE_WALK_SEC_IMAGE src +# define IMAGE_WALK_DO_STEP do{ for (uint i = 0; i < channels; i++) walk_pos[i] = walk_sec_pos[i]; }while(0) +# include + return; + } + } + else if (dest->row_size != src->row_size || ((dest->flags | src->flags) & IMAGE_GAPS_PROTECTED)) + { + byte *s = src->pixels; + byte *d = dest->pixels; + for (uint row = src->rows; row--; ) + { + memcpy(d, s, src->row_pixels_size); + d += dest->row_size; + s += src->row_size; + } + } + else if (dest->pixels != src->pixels) + memcpy(dest->pixels, src->pixels, src->image_size); +} + +static void +image_conv_fill_alpha(struct image *dest) +{ + switch (dest->channels) + { + case 2: + if (dest->pixel_size == 2) + { +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE dest +# define IMAGE_WALK_COL_STEP 2 +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_DO_STEP do{ walk_pos[1] = 255; }while(0) +# include + return; + } + break; + case 4: + if (dest->pixel_size == 4) + { +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE dest +# define IMAGE_WALK_COL_STEP 4 +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_DO_STEP do{ walk_pos[3] = 255; }while(0) +# include + return; + } + break; + } + { +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE dest +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_DO_STEP do{ walk_pos[dest->channels - 1] = 255; }while(0) +# include + } +} + +static void +image_conv_copy_alpha(struct image *dest, struct image *src) +{ + if (dest->pixels != src->pixels || dest->channels != src->channels) + { +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_DOUBLE +# define IMAGE_WALK_IMAGE dest +# define IMAGE_WALK_SEC_IMAGE src +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_DO_STEP do{ walk_pos[dest->channels - 1] = walk_sec_pos[src->channels - 1]; }while(0) +# include + } +} + +static inline uint +image_conv_alpha_func(uint value, uint alpha, uint acoef, uint bcoef) +{ + return ((uint)(acoef + (int)alpha * (int)(value - bcoef)) * (0xffffffffU / 255 / 255)) >> 24; +} + +static int +image_conv_apply_alpha_from(struct image_context *ctx, struct image *dest, struct image *src, struct image_conv_options *opt) +{ + if (!opt->background.color_space) + return 1; + byte background[IMAGE_MAX_CHANNELS]; + if (unlikely(!color_put(ctx, &opt->background, background, dest->flags & IMAGE_COLOR_SPACE))) + return 0; + uint a[IMAGE_MAX_CHANNELS], b[IMAGE_MAX_CHANNELS]; + for (uint i = 0; i < dest->channels; i++) + a[i] = 255 * (b[i] = background[i]); + switch (dest->channels) + { + case 1: + { +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE dest +# define IMAGE_WALK_SEC_IMAGE src +# define IMAGE_WALK_DOUBLE +# define IMAGE_WALK_UNROLL 2 +# define IMAGE_WALK_DO_STEP do{ \ + walk_pos[0] = image_conv_alpha_func(walk_pos[0], walk_sec_pos[src->channels - 1], a[0], b[0]); }while(0) +# include + } + return 1; + case 3: + { +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE dest +# define IMAGE_WALK_SEC_IMAGE src +# define IMAGE_WALK_DOUBLE +# define IMAGE_WALK_DO_STEP do{ \ + walk_pos[0] = image_conv_alpha_func(walk_pos[0], walk_sec_pos[src->channels - 1], a[0], b[0]); \ + walk_pos[1] = image_conv_alpha_func(walk_pos[1], walk_sec_pos[src->channels - 1], a[1], b[1]); \ + walk_pos[2] = image_conv_alpha_func(walk_pos[2], walk_sec_pos[src->channels - 1], a[2], b[2]); }while(0) +# include + } + return 1; + } + { +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE dest +# define IMAGE_WALK_SEC_IMAGE src +# define IMAGE_WALK_DOUBLE +# define IMAGE_WALK_DO_STEP do{ for (uint i = 0; i < dest->channels; i++) \ + walk_pos[i] = image_conv_alpha_func(walk_pos[i], walk_sec_pos[src->channels - 1], a[i], b[i]); }while(0) +# include + } + return 1; +} + +static int +image_conv_apply_alpha_to(struct image_context *ctx, struct image *dest, struct image *src, struct image_conv_options *opt) +{ + if (!opt->background.color_space) + { + image_conv_copy(dest, src); + return 1; + } + byte background[IMAGE_MAX_CHANNELS]; + if (unlikely(!color_put(ctx, &opt->background, background, dest->flags & IMAGE_COLOR_SPACE))) + return 0; + uint a[IMAGE_MAX_CHANNELS], b[IMAGE_MAX_CHANNELS]; + for (uint i = 0; i < dest->channels; i++) + a[i] = 255 * (b[i] = background[i]); + switch (dest->channels) + { + case 1: + { +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE dest +# define IMAGE_WALK_SEC_IMAGE src +# define IMAGE_WALK_DOUBLE +# define IMAGE_WALK_UNROLL 2 +# define IMAGE_WALK_DO_STEP do{ \ + walk_pos[0] = image_conv_alpha_func(walk_sec_pos[0], walk_sec_pos[src->channels - 1], a[0], b[0]); }while(0) +# include + } + return 1; + case 3: + { +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE dest +# define IMAGE_WALK_SEC_IMAGE src +# define IMAGE_WALK_DOUBLE +# define IMAGE_WALK_DO_STEP do{ \ + walk_pos[0] = image_conv_alpha_func(walk_sec_pos[0], walk_sec_pos[src->channels - 1], a[0], b[0]); \ + walk_pos[1] = image_conv_alpha_func(walk_sec_pos[1], walk_sec_pos[src->channels - 1], a[1], b[1]); \ + walk_pos[2] = image_conv_alpha_func(walk_sec_pos[2], walk_sec_pos[src->channels - 1], a[2], b[2]); }while(0) +# include + } + return 1; + } + { +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE dest +# define IMAGE_WALK_SEC_IMAGE src +# define IMAGE_WALK_DOUBLE +# define IMAGE_WALK_DO_STEP do{ for (uint i = 0; i < dest->channels; i++) \ + walk_pos[i] = image_conv_alpha_func(walk_sec_pos[i], walk_sec_pos[src->channels - 1], a[i], b[i]); }while(0) +# include + } + return 1; +} + +int +image_conv(struct image_context *ctx, struct image *dest, struct image *src, struct image_conv_options *opt) +{ + ASSERT(dest->cols == src->cols && dest->rows == src->rows); + if (!((dest->flags ^ src->flags) & IMAGE_COLOR_SPACE)) + { + if (!(src->flags & IMAGE_ALPHA) || (dest->flags & IMAGE_ALPHA)) + image_conv_copy(dest, src); + else if (unlikely(!image_conv_apply_alpha_to(ctx, dest, src, opt))) + return 0; + } + else + { + if (!(src->flags & IMAGE_ALPHA)) + { + if (unlikely(!image_conv_color_space(ctx, dest, src, opt))) + goto error; + if ((dest->flags & IMAGE_ALPHA) && (opt->flags & IMAGE_CONV_FILL_ALPHA)) + image_conv_fill_alpha(dest); + } + else + { + if (dest->flags & IMAGE_ALPHA) + { + if (dest->channels <= src->channels) + { + if (unlikely(!image_conv_color_space(ctx, dest, src, opt))) + goto error; + if (opt->flags & IMAGE_CONV_COPY_ALPHA) + image_conv_copy_alpha(dest, src); + else if (opt->flags & IMAGE_CONV_FILL_ALPHA) + image_conv_fill_alpha(dest); + } + else + { + if (opt->flags & IMAGE_CONV_COPY_ALPHA) + image_conv_copy_alpha(dest, src); + else + image_conv_fill_alpha(dest); + if (unlikely(!image_conv_color_space(ctx, dest, src, opt))) + goto error; + } + } + else + { + if (dest->channels <= src->channels) + { + if (unlikely(!image_conv_color_space(ctx, dest, src, opt))) + goto error; + if (unlikely(!image_conv_apply_alpha_from(ctx, dest, src, opt))) + return 0; + } + else + { + if (unlikely(!image_conv_apply_alpha_to(ctx, dest, src, opt))) + return 0; + if (unlikely(!image_conv_color_space(ctx, dest, dest, opt))) + goto error; + } + } + } + } + return 1; +error: + IMAGE_ERROR(ctx, IMAGE_ERROR_INVALID_PIXEL_FORMAT, "Image conversion not supported for such pixel formats"); + return 0; +} + +/********************* EXACT CONVERSION ROUTINES **********************/ + +/* Reference whites */ +#define COLOR_ILLUMINANT_A 0.44757, 0.40744 +#define COLOR_ILLUMINANT_B 0.34840, 0.35160 +#define COLOR_ILLUMINANT_C 0.31006, 0.31615 +#define COLOR_ILLUMINANT_D50 0.34567, 0.35850 +#define COLOR_ILLUMINANT_D55 0.33242, 0.34743 +#define COLOR_ILLUMINANT_D65 0.31273, 0.32902 +#define COLOR_ILLUMINANT_D75 0.29902, 0.31485 +#define COLOR_ILLUMINANT_9300K 0.28480, 0.29320 +#define COLOR_ILLUMINANT_E (1./3.), (1./3.) +#define COLOR_ILLUMINANT_F2 0.37207, 0.37512 +#define COLOR_ILLUMINANT_F7 0.31285, 0.32918 +#define COLOR_ILLUMINANT_F11 0.38054, 0.37691 + +const double + color_illuminant_d50[2] = {COLOR_ILLUMINANT_D50}, + color_illuminant_d65[2] = {COLOR_ILLUMINANT_D65}, + color_illuminant_e[2] = {COLOR_ILLUMINANT_E}; + +/* RGB profiles (many missing) */ +const struct color_space_info + color_adobe_rgb_info = {"Adobe RGB", {{0.6400, 0.3300}, {0.2100, 0.7100}, {0.1500, 0.0600}, {COLOR_ILLUMINANT_D65}}, {0.45, 0.45, 0, 0, 0}}, + color_apple_rgb_info = {"Apple RGB", {{0.6250, 0.3400}, {0.2800, 0.5950}, {0.1550, 0.0700}, {COLOR_ILLUMINANT_D65}}, {0.56, 0.56, 0, 0, 0}}, + color_cie_rgb_info = {"CIE RGB", {{0.7350, 0.2650}, {0.2740, 0.7170}, {0.1670, 0.0090}, {COLOR_ILLUMINANT_E}}, {0.45, 0.45, 0, 0, 0}}, + color_color_match_rgb_info = {"ColorMatch RGB", {{0.6300, 0.3400}, {0.2950, 0.6050}, {0.1500, 0.0750}, {COLOR_ILLUMINANT_D50}}, {0.56, 0.56, 0, 0, 0}}, + color_srgb_info = {"sRGB", {{0.6400, 0.3300}, {0.3000, 0.6000}, {0.1500, 0.0600}, {COLOR_ILLUMINANT_D65}}, {0.45, 0.42, 0.055, 0.003, 12.92}}; + +#define CLIP(x, min, max) (((x) < (min)) ? (min) : ((x) > (max)) ? (max) : (x)) + +static inline void +clip(double a[3]) +{ + a[0] = CLIP(a[0], 0, 1); + a[1] = CLIP(a[1], 0, 1); + a[2] = CLIP(a[2], 0, 1); +} + +static inline void +correct_gamma_simple(double dest[3], double src[3], const struct color_space_gamma_info *info) +{ + dest[0] = pow(src[0], info->simple_gamma); + dest[1] = pow(src[1], info->simple_gamma); + dest[2] = pow(src[2], info->simple_gamma); +} + +static inline void +invert_gamma_simple(double dest[3], double src[3], const struct color_space_gamma_info *info) +{ + dest[0] = pow(src[0], 1 / info->simple_gamma); + dest[1] = pow(src[1], 1 / info->simple_gamma); + dest[2] = pow(src[2], 1 / info->simple_gamma); +} + +static inline void +correct_gamma_detailed(double dest[3], double src[3], const struct color_space_gamma_info *info) +{ + for (uint i = 0; i < 3; i++) + if (src[i] > info->transition) + dest[i] = (1 + info->offset) * pow(src[i], info->detailed_gamma) - info->offset; + else + dest[i] = info->slope * src[i]; +} + +static inline void +invert_gamma_detailed(double dest[3], double src[3], const struct color_space_gamma_info *info) +{ + for (uint i = 0; i < 3; i++) + if (src[i] > info->transition * info->slope) + dest[i] = pow((src[i] + info->offset) / (1 + info->offset), 1 / info->detailed_gamma); + else + dest[i] = src[i] / info->slope; +} + +static inline void +apply_matrix(double dest[3], double src[3], double matrix[9]) +{ + dest[0] = src[0] * matrix[0] + src[1] * matrix[1] + src[2] * matrix[2]; + dest[1] = src[0] * matrix[3] + src[1] * matrix[4] + src[2] * matrix[5]; + dest[2] = src[0] * matrix[6] + src[1] * matrix[7] + src[2] * matrix[8]; +} + +void +color_invert_matrix(double dest[9], double matrix[9]) +{ + double *i = dest, *m = matrix; + double a0 = m[4] * m[8] - m[5] * m[7]; + double a1 = m[3] * m[8] - m[5] * m[6]; + double a2 = m[3] * m[7] - m[4] * m[6]; + double d = 1 / (m[0] * a0 - m[1] * a1 + m[2] * a2); + i[0] = d * a0; + i[3] = -d * a1; + i[6] = d * a2; + i[1] = -d * (m[1] * m[8] - m[2] * m[7]); + i[4] = d * (m[0] * m[8] - m[2] * m[6]); + i[7] = -d * (m[0] * m[7] - m[1] * m[6]); + i[2] = d * (m[1] * m[5] - m[2] * m[4]); + i[5] = -d * (m[0] * m[5] - m[2] * m[3]); + i[8] = d * (m[0] * m[4] - m[1] * m[3]); +} + +static void +mul_matrices(double r[9], double a[9], double b[9]) +{ + r[0] = a[0] * b[0] + a[1] * b[3] + a[2] * b[6]; + r[1] = a[0] * b[1] + a[1] * b[4] + a[2] * b[7]; + r[2] = a[0] * b[2] + a[1] * b[5] + a[2] * b[8]; + r[3] = a[3] * b[0] + a[4] * b[3] + a[5] * b[6]; + r[4] = a[3] * b[1] + a[4] * b[4] + a[5] * b[7]; + r[5] = a[3] * b[2] + a[4] * b[5] + a[5] * b[8]; + r[6] = a[6] * b[0] + a[7] * b[3] + a[8] * b[6]; + r[7] = a[6] * b[1] + a[7] * b[4] + a[8] * b[7]; + r[8] = a[6] * b[2] + a[7] * b[5] + a[8] * b[8]; +} + +/* computes conversion matrix from a given color space to CIE XYZ */ +void +color_compute_color_space_to_xyz_matrix(double matrix[9], const struct color_space_chromacity_info *space) +{ + double wX = space->white[0] / space->white[1]; + double wZ = (1 - space->white[0] - space->white[1]) / space->white[1]; + double a[9], b[9]; + a[0] = space->prim1[0]; a[3] = space->prim1[1]; a[6] = 1 - a[0] - a[3]; + a[1] = space->prim2[0]; a[4] = space->prim2[1]; a[7] = 1 - a[1] - a[4]; + a[2] = space->prim3[0]; a[5] = space->prim3[1]; a[8] = 1 - a[2] - a[5]; + color_invert_matrix(b, a); + double ra = wX * b[0] + b[1] + wZ * b[2]; + double rb = wX * b[3] + b[4] + wZ * b[5]; + double rc = wX * b[6] + b[7] + wZ * b[8]; + matrix[0] = a[0] * ra; + matrix[1] = a[1] * rb; + matrix[2] = a[2] * rc; + matrix[3] = a[3] * ra; + matrix[4] = a[4] * rb; + matrix[5] = a[5] * rc; + matrix[6] = a[6] * ra; + matrix[7] = a[7] * rb; + matrix[8] = a[8] * rc; +} + +/* computes matrix to join transformations with different reference whites */ +void +color_compute_bradford_matrix(double matrix[9], const double source[2], const double dest[2]) +{ + /* cone response matrix and its inversion */ + static double r[9] = { + 0.8951, 0.2664, -0.1614, + -0.7502, 1.7135, 0.0367, + 0.0389, -0.0685, 1.0296}; + //static double i[9] = {0.9870, -0.1471, 0.1600, 0.4323, 0.5184, 0.0493, -0.0085, 0.0400, 0.9685}; + double i[9]; + color_invert_matrix(i, r); + double aX = source[0] / source[1]; + double aZ = (1 - source[0] - source[1]) / source[1]; + double bX = dest[0] / dest[1]; + double bZ = (1 - dest[0] - dest[1]) / dest[1]; + double x = (r[0] * bX + r[1] + r[2] * bZ) / (r[0] * aX + r[1] + r[2] * aZ); + double y = (r[3] * bX + r[4] + r[5] * bZ) / (r[3] * aX + r[4] + r[5] * aZ); + double z = (r[6] * bX + r[7] + r[8] * bZ) / (r[6] * aX + r[7] + r[8] * aZ); + double m[9]; + m[0] = i[0] * x; m[1] = i[1] * y; m[2] = i[2] * z; + m[3] = i[3] * x; m[4] = i[4] * y; m[5] = i[5] * z; + m[6] = i[6] * x; m[7] = i[7] * y; m[8] = i[8] * z; + mul_matrices(matrix, m, r); +} + +void +color_compute_color_spaces_conversion_matrix(double matrix[9], const struct color_space_chromacity_info *src, const struct color_space_chromacity_info *dest) +{ + double a_to_xyz[9], b_to_xyz[9], xyz_to_b[9], bradford[9], m[9]; + color_compute_color_space_to_xyz_matrix(a_to_xyz, src); + color_compute_color_space_to_xyz_matrix(b_to_xyz, dest); + color_invert_matrix(xyz_to_b, b_to_xyz); + if (src->white[0] == dest->white[0] && src->white[1] == dest->white[1]) + mul_matrices(matrix, a_to_xyz, xyz_to_b); + else + { + color_compute_bradford_matrix(bradford, src->white, dest->white); + mul_matrices(m, a_to_xyz, bradford); + mul_matrices(matrix, m, xyz_to_b); + } +} + +/* sRGB to XYZ */ +void +srgb_to_xyz_exact(double xyz[3], double srgb[3]) +{ + static double matrix[9] = { + 0.41248031, 0.35756952, 0.18043951, + 0.21268516, 0.71513904, 0.07217580, + 0.01933501, 0.11918984, 0.95031473}; + double srgb_lin[3]; + invert_gamma_detailed(srgb_lin, srgb, &color_srgb_info.gamma); + apply_matrix(xyz, srgb_lin, matrix); + xyz_to_srgb_exact(srgb_lin, xyz); +} + +/* XYZ to sRGB */ +void +xyz_to_srgb_exact(double srgb[3], double xyz[3]) +{ + static double matrix[9] = { + 3.24026666, -1.53704957, -0.49850256, + -0.96928381, 1.87604525, 0.04155678, + 0.05564281, -0.20402363, 1.05721334}; + double srgb_lin[3]; + apply_matrix(srgb_lin, xyz, matrix); + clip(srgb_lin); + correct_gamma_detailed(srgb, srgb_lin, &color_srgb_info.gamma); +} + +/* XYZ to CIE-Luv */ +void +xyz_to_luv_exact(double luv[3], double xyz[3]) +{ + double sum = xyz[0] + 15 * xyz[1] + 3 * xyz[2]; + if (sum < 0.000001) + luv[0] = luv[1] = luv[2] = 0; + else + { + double var_u = 4 * xyz[0] / sum; + double var_v = 9 * xyz[1] / sum; + if (xyz[1] > 0.008856) + luv[0] = 116 * pow(xyz[1], 1 / 3.) - 16; + else + luv[0] = (116 * 7.787) * xyz[1]; + luv[1] = luv[0] * (13 * (var_u - 4 * REF_WHITE_X / (REF_WHITE_X + 15 * REF_WHITE_Y + 3 * REF_WHITE_Z))); + luv[2] = luv[0] * (13 * (var_v - 9 * REF_WHITE_Y / (REF_WHITE_X + 15 * REF_WHITE_Y + 3 * REF_WHITE_Z))); + /* intervals [0..100], [-134..220], [-140..122] */ + } +} + +/* CIE-Luv to XYZ */ +void +luv_to_xyz_exact(double xyz[3], double luv[3]) +{ + double var_u = luv[1] / (13 * luv[0]) + (4 * REF_WHITE_X / (REF_WHITE_X + 15 * REF_WHITE_Y + 3 * REF_WHITE_Z)); + double var_v = luv[2] / (13 * luv[0]) + (9 * REF_WHITE_Y / (REF_WHITE_X + 15 * REF_WHITE_Y + 3 * REF_WHITE_Z)); + double var_y = (luv[0] + 16) / 116; + double pow_y = var_y * var_y * var_y; + if (pow_y > 0.008856) + var_y = pow_y; + else + var_y = (var_y - 16 / 116) / 7.787; + xyz[1] = var_y; + xyz[0] = -(9 * xyz[1] * var_u) / ((var_u - 4) * var_v - var_u * var_v); + xyz[2] = (9 * xyz[1] - 15 * var_v * xyz[1] - var_v * xyz[0]) / (3 * var_v); +} + +/* RGB to CMYK - a very simple version, not too accureate */ +void +rgb_to_cmyk_exact(double cmyk[4], double rgb[3]) +{ + cmyk[0] = 1 - rgb[0]; + cmyk[1] = 1 - rgb[1]; + cmyk[2] = 1 - rgb[2]; + cmyk[3] = MIN(cmyk[0], cmyk[1]); + cmyk[3] = MIN(cmyk[3], cmyk[2]); + if (cmyk[3] > 0.9999) + { + cmyk[3] = 1; + cmyk[0] = cmyk[1] = cmyk[2] = 0; + } + else + { + double d = 1 / (1 - cmyk[3]); + for (uint i = 0; i < 3; i++) + cmyk[i] = d * (cmyk[i] - cmyk[3]); + } +} + +/* CMYK to RGB */ +void +cmyk_to_rgb_exact(double rgb[3], double cmyk[4]) +{ + double d = 1 - cmyk[1]; + for (uint i = 0; i < 3; i++) + rgb[i] = d * (1 - cmyk[i]); +} + +/***************** OPTIMIZED SRGB -> LUV CONVERSION *********************/ + +u16 srgb_to_luv_tab1[256]; +u16 srgb_to_luv_tab2[9 << SRGB_TO_LUV_TAB2_SIZE]; +u32 srgb_to_luv_tab3[20 << SRGB_TO_LUV_TAB3_SIZE]; + +void +srgb_to_luv_init(void) +{ + DBG("Initializing sRGB -> Luv table"); + for (uint i = 0; i < 256; i++) + { + double t = i / 255.; + if (t > 0.04045) + t = pow((t + 0.055) * (1 / 1.055), 2.4); + else + t = t * (1 / 12.92); + srgb_to_luv_tab1[i] = CLAMP(t * 0xfff + 0.5, 0, 0xfff); + } + for (uint i = 0; i < (9 << SRGB_TO_LUV_TAB2_SIZE); i++) + { + double t = i / (double)((9 << SRGB_TO_LUV_TAB2_SIZE) - 1); + if (t > 0.008856) + t = 1.16 * pow(t, 1 / 3.) - 0.16; + else + t = (1.16 * 7.787) * t; + srgb_to_luv_tab2[i] = + CLAMP(t * ((1 << SRGB_TO_LUV_TAB2_SCALE) - 1) + 0.5, + 0, (1 << SRGB_TO_LUV_TAB2_SCALE) - 1); + } + for (uint i = 0; i < (20 << SRGB_TO_LUV_TAB3_SIZE); i++) + { + srgb_to_luv_tab3[i] = i ? (13 << (SRGB_TO_LUV_TAB3_SCALE + SRGB_TO_LUV_TAB3_SIZE)) / i : 0; + } +} + +void +srgb_to_luv_pixels(byte *dest, byte *src, uint count) +{ + while (count--) + { + srgb_to_luv_pixel(dest, src); + dest += 3; + src += 3; + } +} + + +/************************ GRID INTERPOLATION ALGORITHM ************************/ + +struct color_grid_node *srgb_to_luv_grid; +struct color_interpolation_node *color_interpolation_table; + +/* Returns volume of a given tetrahedron multiplied by 6 */ +static inline uint +tetrahedron_volume(uint *v1, uint *v2, uint *v3, uint *v4) +{ + int a[3], b[3], c[3]; + for (uint i = 0; i < 3; i++) + { + a[i] = v2[i] - v1[i]; + b[i] = v3[i] - v1[i]; + c[i] = v4[i] - v1[i]; + } + int result = + a[0] * (b[1] * c[2] - b[2] * c[1]) - + a[1] * (b[0] * c[2] - b[2] * c[0]) + + a[2] * (b[0] * c[1] - b[1] * c[0]); + return (result > 0) ? result : -result; +} + +static void +interpolate_tetrahedron(struct color_interpolation_node *n, uint *p, const uint *c) +{ + uint v[4][3]; + for (uint i = 0; i < 4; i++) + { + v[i][0] = (c[i] & 0001) ? (1 << COLOR_CONV_OFS) : 0; + v[i][1] = (c[i] & 0010) ? (1 << COLOR_CONV_OFS) : 0; + v[i][2] = (c[i] & 0100) ? (1 << COLOR_CONV_OFS) : 0; + n->ofs[i] = + ((c[i] & 0001) ? 1 : 0) + + ((c[i] & 0010) ? (1 << COLOR_CONV_SIZE) : 0) + + ((c[i] & 0100) ? (1 << (COLOR_CONV_SIZE * 2)) : 0); + } + uint vol = tetrahedron_volume(v[0], v[1], v[2], v[3]); + n->mul[0] = ((tetrahedron_volume(p, v[1], v[2], v[3]) << 8) + (vol >> 1)) / vol; + n->mul[1] = ((tetrahedron_volume(v[0], p, v[2], v[3]) << 8) + (vol >> 1)) / vol; + n->mul[2] = ((tetrahedron_volume(v[0], v[1], p, v[3]) << 8) + (vol >> 1)) / vol; + n->mul[3] = ((tetrahedron_volume(v[0], v[1], v[2], p) << 8) + (vol >> 1)) / vol; + uint j; + for (j = 0; j < 4; j++) + if (n->mul[j]) + break; + for (uint i = 0; i < 4; i++) + if (n->mul[i] == 0) + n->ofs[i] = n->ofs[j]; +} + +static void +interpolation_table_init(void) +{ + DBG("Initializing color interpolation table"); + struct color_interpolation_node *n = color_interpolation_table = + xmalloc(sizeof(struct color_interpolation_node) << (COLOR_CONV_OFS * 3)); + uint p[3]; + for (p[2] = 0; p[2] < (1 << COLOR_CONV_OFS); p[2]++) + for (p[1] = 0; p[1] < (1 << COLOR_CONV_OFS); p[1]++) + for (p[0] = 0; p[0] < (1 << COLOR_CONV_OFS); p[0]++) + { + uint index; + static const uint tetrahedra[5][4] = { + {0000, 0001, 0010, 0100}, + {0110, 0111, 0100, 0010}, + {0101, 0100, 0111, 0001}, + {0011, 0010, 0001, 0111}, + {0111, 0001, 0010, 0100}}; + if (p[0] + p[1] + p[2] <= (1 << COLOR_CONV_OFS)) + index = 0; + else if ((1 << COLOR_CONV_OFS) + p[0] <= p[1] + p[2]) + index = 1; + else if ((1 << COLOR_CONV_OFS) + p[1] <= p[0] + p[2]) + index = 2; + else if ((1 << COLOR_CONV_OFS) + p[2] <= p[0] + p[1]) + index = 3; + else + index = 4; + interpolate_tetrahedron(n, p, tetrahedra[index]); + n++; + } +} + +typedef void color_conv_func(double dest[3], double src[3]); + +static void +conv_grid_init(struct color_grid_node **grid, color_conv_func func) +{ + if (*grid) + return; + struct color_grid_node *g = *grid = xmalloc((sizeof(struct color_grid_node)) << (COLOR_CONV_SIZE * 3)); + double src[3], dest[3]; + for (uint k = 0; k < (1 << COLOR_CONV_SIZE); k++) + { + src[2] = k * (255 / (double)((1 << COLOR_CONV_SIZE) - 1)); + for (uint j = 0; j < (1 << COLOR_CONV_SIZE); j++) + { + src[1] = j * (255/ (double)((1 << COLOR_CONV_SIZE) - 1)); + for (uint i = 0; i < (1 << COLOR_CONV_SIZE); i++) + { + src[0] = i * (255 / (double)((1 << COLOR_CONV_SIZE) - 1)); + func(dest, src); + g->val[0] = CLAMP(dest[0] + 0.5, 0, 255); + g->val[1] = CLAMP(dest[1] + 0.5, 0, 255); + g->val[2] = CLAMP(dest[2] + 0.5, 0, 255); + g++; + } + } + } +} + +static void +srgb_to_luv_func(double dest[3], double src[3]) +{ + double srgb[3], xyz[3], luv[3]; + srgb[0] = src[0] / 255.; + srgb[1] = src[1] / 255.; + srgb[2] = src[2] / 255.; + srgb_to_xyz_exact(xyz, srgb); + xyz_to_luv_exact(luv, xyz); + dest[0] = luv[0] * 2.55; + dest[1] = luv[1] * (2.55 / 4) + 128; + dest[2] = luv[2] * (2.55 / 4) + 128; +} + +void +color_conv_init(void) +{ + interpolation_table_init(); + conv_grid_init(&srgb_to_luv_grid, srgb_to_luv_func); +} + +void +color_conv_pixels(byte *dest, byte *src, uint count, struct color_grid_node *grid) +{ + while (count--) + { + color_conv_pixel(dest, src, grid); + dest += 3; + src += 3; + } +} + + +/**************************** TESTS *******************************/ + +#ifdef TEST +#include + +static double +conv_error(u32 color, struct color_grid_node *grid, color_conv_func func) +{ + byte src[3], dest[3]; + src[0] = color & 255; + src[1] = (color >> 8) & 255; + src[2] = (color >> 16) & 255; + color_conv_pixel(dest, src, grid); + double src2[3], dest2[3]; + for (uint i = 0; i < 3; i++) + src2[i] = src[i]; + func(dest2, src2); + double err = 0; + for (uint i = 0; i < 3; i++) + err += (dest[i] - dest2[i]) * (dest[i] - dest2[i]); + return err; +} + +typedef void test_fn(byte *dest, byte *src); + +static double +func_error(u32 color, test_fn test, color_conv_func func) +{ + byte src[3], dest[3]; + src[0] = color & 255; + src[1] = (color >> 8) & 255; + src[2] = (color >> 16) & 255; + test(dest, src); + double src2[3], dest2[3]; + for (uint i = 0; i < 3; i++) + src2[i] = src[i]; + func(dest2, src2); + double err = 0; + for (uint i = 0; i < 3; i++) + err += (dest[i] - dest2[i]) * (dest[i] - dest2[i]); + return err; +} + +static void +test_grid(byte *name, struct color_grid_node *grid, color_conv_func func) +{ + double max_err = 0, sum_err = 0; + uint count = 100000; + for (uint i = 0; i < count; i++) + { + double err = conv_error(random_max(0x1000000), grid, func); + max_err = MAX(err, max_err); + sum_err += err; + } + DBG("%s: error max=%f avg=%f", name, max_err, sum_err / count); + if (max_err > 12) + die("Too large error in %s conversion", name); +} + +static void +test_func(byte *name, test_fn test, color_conv_func func) +{ + double max_err = 0, sum_err = 0; + uint count = 100000; + for (uint i = 0; i < count; i++) + { + double err = func_error(random_max(0x1000000), test, func); + max_err = MAX(err, max_err); + sum_err += err; + } + DBG("%s: error max=%f avg=%f", name, max_err, sum_err / count); + if (max_err > 12) + die("Too large error in %s conversion", name); +} + +int +main(void) +{ + srgb_to_luv_init(); + test_func("func sRGB -> Luv", srgb_to_luv_pixel, srgb_to_luv_func); + color_conv_init(); + test_grid("grid sRGB -> Luv", srgb_to_luv_grid, srgb_to_luv_func); +#ifdef LOCAL_DEBUG +#define CNT 1000000 +#define TESTS 10 + byte *a = xmalloc(3 * CNT), *b = xmalloc(3 * CNT); + for (uint i = 0; i < 3 * CNT; i++) + a[i] = random_max(256); + timestamp_t timer; + init_timer(&timer); + for (uint i = 0; i < TESTS; i++) + memcpy(b, a, CNT * 3); + DBG("memcpy time=%d", get_timer(&timer)); + init_timer(&timer); + for (uint i = 0; i < TESTS; i++) + srgb_to_luv_pixels(b, a, CNT); + DBG("direct time=%d", get_timer(&timer)); + init_timer(&timer); + for (uint i = 0; i < TESTS; i++) + color_conv_pixels(b, a, CNT, srgb_to_luv_grid); + DBG("grid time=%d", get_timer(&timer)); +#endif + return 0; +} +#endif + diff --git a/libucw/images/color.h b/libucw/images/color.h new file mode 100644 index 0000000..71fa061 --- /dev/null +++ b/libucw/images/color.h @@ -0,0 +1,298 @@ +/* + * Image Library -- Color Spaces + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + * + * + * References: + * - A Review of RGB Color Spaces, Danny Pascale (2003) + * - http://www.adobe.com/digitalimag/pdfs/AdobeRGB1998.pdf + * - http://www.tecgraf.puc-rio.br/~mgattass/color/ColorIndex.html + * + * FIXME: + * - fix theoretical problems with rounding errors in srgb_to_luv_pixel() + * - SIMD should help to speed up conversion of large arrays + * - maybe try to generate a long switch in color_conv_pixel() + * with optimized entries instead of access to interpolation table + * - most of multiplications in srgb_to_luv_pixels can be replaced + * with tables lookup... tests shows almost the same speed for random + * input and cca 40% gain when input colors fit in CPU chache + */ + +#ifndef _IMAGES_COLOR_H +#define _IMAGES_COLOR_H + +#include + +#ifdef CONFIG_UCW_CLEAN_ABI +#define cmyk_to_rgb_exact ucw_cmyk_to_rgb_exact +#define color_adobe_rgb_info ucw_color_adobe_rgb_info +#define color_apple_rgb_info ucw_color_apple_rgb_info +#define color_black ucw_color_black +#define color_cie_rgb_info ucw_color_cie_rgb_info +#define color_color_match_rgb_info ucw_color_color_match_rgb_info +#define color_compute_bradford_matrix ucw_color_compute_bradford_matrix +#define color_compute_color_space_to_xyz_matrix ucw_color_compute_color_space_to_xyz_matrix +#define color_compute_color_spaces_conversion_matrix ucw_color_compute_color_spaces_conversion_matrix +#define color_conv_init ucw_color_conv_init +#define color_conv_pixels ucw_color_conv_pixels +#define color_get ucw_color_get +#define color_illuminant_d50 ucw_color_illuminant_d50 +#define color_illuminant_d65 ucw_color_illuminant_d65 +#define color_illuminant_e ucw_color_illuminant_e +#define color_interpolation_table ucw_color_interpolation_table +#define color_invert_matrix ucw_color_invert_matrix +#define color_put ucw_color_put +#define color_space_channels ucw_color_space_channels +#define color_space_id_to_name ucw_color_space_id_to_name +#define color_space_name ucw_color_space_name +#define color_space_name_to_id ucw_color_space_name_to_id +#define color_srgb_info ucw_color_srgb_info +#define color_white ucw_color_white +#define image_conv ucw_image_conv +#define image_conv_defaults ucw_image_conv_defaults +#define luv_to_xyz_exact ucw_luv_to_xyz_exact +#define rgb_to_cmyk_exact ucw_rgb_to_cmyk_exact +#define srgb_to_luv_grid ucw_srgb_to_luv_grid +#define srgb_to_luv_init ucw_srgb_to_luv_init +#define srgb_to_luv_pixels ucw_srgb_to_luv_pixels +#define srgb_to_luv_tab1 ucw_srgb_to_luv_tab1 +#define srgb_to_luv_tab2 ucw_srgb_to_luv_tab2 +#define srgb_to_luv_tab3 ucw_srgb_to_luv_tab3 +#define srgb_to_xyz_exact ucw_srgb_to_xyz_exact +#define xyz_to_luv_exact ucw_xyz_to_luv_exact +#define xyz_to_srgb_exact ucw_xyz_to_srgb_exact +#endif + +/* Basic color spaces */ +enum { + COLOR_SPACE_UNKNOWN = 0, + COLOR_SPACE_UNKNOWN_1 = 1, /* unknown 1-channel color space */ + COLOR_SPACE_UNKNOWN_2 = 2, /* unknown 2-channels color space */ + COLOR_SPACE_UNKNOWN_3 = 3, /* unknown 3-channels color space */ + COLOR_SPACE_UNKNOWN_4 = 4, /* unknown 4-channels color space */ + COLOR_SPACE_UNKNOWN_MAX = 4, + COLOR_SPACE_GRAYSCALE, + COLOR_SPACE_RGB, + COLOR_SPACE_XYZ, + COLOR_SPACE_LAB, + COLOR_SPACE_LUV, + COLOR_SPACE_YCBCR, + COLOR_SPACE_CMYK, + COLOR_SPACE_YCCK, + COLOR_SPACE_MAX +}; + +extern uint color_space_channels[COLOR_SPACE_MAX]; +extern byte *color_space_name[COLOR_SPACE_MAX]; + +/* Color space ID <-> name conversions */ +byte *color_space_id_to_name(uint id); +uint color_space_name_to_id(byte *name); + +/* Struct color manipulation */ +int color_get(struct color *color, byte *src, uint src_space); +int color_put(struct image_context *ctx, struct color *color, byte *dest, uint dest_space); + +static inline void color_make_gray(struct color *color, uint gray) +{ + color->c[0] = gray; + color->color_space = COLOR_SPACE_GRAYSCALE; +} + +static inline void color_make_rgb(struct color *color, uint r, uint g, uint b) +{ + color->c[0] = r; + color->c[1] = g; + color->c[2] = b; + color->color_space = COLOR_SPACE_RGB; +} + +extern struct color color_black, color_white; + +/* Conversion between various pixel formats */ + +enum { + IMAGE_CONV_FILL_ALPHA = 1, + IMAGE_CONV_COPY_ALPHA = 2, + IMAGE_CONV_APPLY_ALPHA = 4, +}; + +struct image_conv_options { + uint flags; + struct color background; +}; + +extern struct image_conv_options image_conv_defaults; + +int image_conv(struct image_context *ctx, struct image *dest, struct image *src, struct image_conv_options *opt); + +/* Color spaces in the CIE 1931 chromacity diagram */ + +struct color_space_chromacity_info { + double prim1[2]; + double prim2[2]; + double prim3[2]; + double white[2]; +}; + +struct color_space_gamma_info { + double simple_gamma; + double detailed_gamma; + double offset; + double transition; + double slope; +}; + +struct color_space_info { + byte *name; + struct color_space_chromacity_info chromacity; + struct color_space_gamma_info gamma; +}; + +extern const double + color_illuminant_d50[2], + color_illuminant_d65[2], + color_illuminant_e[2]; + +extern const struct color_space_info + color_adobe_rgb_info, /* Adobe RGB (1998) */ + color_apple_rgb_info, /* Apple RGB */ + color_cie_rgb_info, /* CIE RGB */ + color_color_match_rgb_info, /* ColorMatch RGB */ + color_srgb_info; /* sRGB */ + +/* These routines do not check numeric errors! */ +void color_compute_color_space_to_xyz_matrix(double matrix[9], const struct color_space_chromacity_info *space); +void color_compute_bradford_matrix(double matrix[9], const double src[2], const double dest[2]); +void color_compute_color_spaces_conversion_matrix(double matrix[9], const struct color_space_chromacity_info *src, const struct color_space_chromacity_info *dest); +void color_invert_matrix(double dest[9], double matrix[9]); + +static inline uint rgb_to_gray_func(uint r, uint g, uint b) +{ + return (r * 19660 + g * 38666 + b * 7210) >> 16; +} + +/* Exact slow conversion routines */ +void srgb_to_xyz_exact(double dest[3], double src[3]); +void xyz_to_srgb_exact(double dest[3], double src[3]); +void xyz_to_luv_exact(double dest[3], double src[3]); +void luv_to_xyz_exact(double dest[3], double src[3]); +void rgb_to_cmyk_exact(double dest[4], double src[3]); +void cmyk_to_rgb_exact(double dest[3], double src[4]); + +/* Reference white */ +#define REF_WHITE_X 0.96422 +#define REF_WHITE_Y 1. +#define REF_WHITE_Z 0.82521 + +/* sRGB -> XYZ matrix */ +#define SRGB_XYZ_XR 0.412424 +#define SRGB_XYZ_XG 0.357579 +#define SRGB_XYZ_XB 0.180464 +#define SRGB_XYZ_YR 0.212656 +#define SRGB_XYZ_YG 0.715158 +#define SRGB_XYZ_YB 0.072186 +#define SRGB_XYZ_ZR 0.019332 +#define SRGB_XYZ_ZG 0.119193 +#define SRGB_XYZ_ZB 0.950444 + + +/*********************** OPTIMIZED CONVERSION ROUTINES **********************/ + +/* sRGB -> Luv parameters */ +#define SRGB_TO_LUV_TAB2_SIZE 9 +#define SRGB_TO_LUV_TAB2_SCALE 11 +#define SRGB_TO_LUV_TAB3_SIZE 8 +#define SRGB_TO_LUV_TAB3_SCALE (39 - SRGB_TO_LUV_TAB2_SCALE - SRGB_TO_LUV_TAB3_SIZE) + +extern u16 srgb_to_luv_tab1[256]; +extern u16 srgb_to_luv_tab2[9 << SRGB_TO_LUV_TAB2_SIZE]; +extern u32 srgb_to_luv_tab3[20 << SRGB_TO_LUV_TAB3_SIZE]; + +void srgb_to_luv_init(void); +void srgb_to_luv_pixels(byte *dest, byte *src, uint count); + +/* L covers the interval [0..255]; u and v are centered to 128 and scaled by 1/4 in respect of L */ +static inline void srgb_to_luv_pixel(byte *dest, byte *src) +{ + uint r = srgb_to_luv_tab1[src[0]]; + uint g = srgb_to_luv_tab1[src[1]]; + uint b = srgb_to_luv_tab1[src[2]]; + uint x = + (uint)(4 * SRGB_XYZ_XR * 0xffff) * r + + (uint)(4 * SRGB_XYZ_XG * 0xffff) * g + + (uint)(4 * SRGB_XYZ_XB * 0xffff) * b; + uint y = + (uint)(9 * SRGB_XYZ_YR * 0xffff) * r + + (uint)(9 * SRGB_XYZ_YG * 0xffff) * g + + (uint)(9 * SRGB_XYZ_YB * 0xffff) * b; + uint l = srgb_to_luv_tab2[y >> (28 - SRGB_TO_LUV_TAB2_SIZE)]; + dest[0] = l >> (SRGB_TO_LUV_TAB2_SCALE - 8); + uint sum = + (uint)((SRGB_XYZ_XR + 15 * SRGB_XYZ_YR + 3 * SRGB_XYZ_ZR) * 0x7fff) * r + + (uint)((SRGB_XYZ_XG + 15 * SRGB_XYZ_YG + 3 * SRGB_XYZ_ZG) * 0x7fff) * g + + (uint)((SRGB_XYZ_XB + 15 * SRGB_XYZ_YB + 3 * SRGB_XYZ_ZB) * 0x7fff) * b; + uint s = srgb_to_luv_tab3[sum >> (27 - SRGB_TO_LUV_TAB3_SIZE)]; + int xs = ((u64)x * s) >> 32; + int ys = ((u64)y * s) >> 32; + int xw = ((4 * 13) << (SRGB_TO_LUV_TAB3_SCALE - 4)) * + REF_WHITE_X / (REF_WHITE_X + 15 * REF_WHITE_Y + 3 * REF_WHITE_Z); + int yw = ((9 * 13) << (SRGB_TO_LUV_TAB3_SCALE - 4)) * + REF_WHITE_Y / (REF_WHITE_X + 15 * REF_WHITE_Y + 3 * REF_WHITE_Z); + int u = (int)(l) * (xs - xw); + int v = (int)(l) * (ys - yw); + dest[1] = 128 + (u >> (SRGB_TO_LUV_TAB3_SCALE + SRGB_TO_LUV_TAB2_SCALE - 10)); + dest[2] = 128 + (v >> (SRGB_TO_LUV_TAB3_SCALE + SRGB_TO_LUV_TAB2_SCALE - 10)); +} + + +/****************** GENERAL INTERPOLATION IN 3D GRID ********************/ + +#define COLOR_CONV_SIZE 5 /* 128K conversion grid size */ +#define COLOR_CONV_OFS 3 /* 8K interpolation table size */ + +struct color_grid_node { + byte val[4]; +}; + +struct color_interpolation_node { + u16 ofs[4]; + u16 mul[4]; +}; + +extern struct color_grid_node *srgb_to_luv_grid; +extern struct color_interpolation_node *color_interpolation_table; + +void color_conv_init(void); +void color_conv_pixels(byte *dest, byte *src, uint count, struct color_grid_node *grid); + +#define COLOR_CONV_SCALE_CONST (((((1 << COLOR_CONV_SIZE) - 1) << 16) + (1 << (16 - COLOR_CONV_OFS))) / 255) + +static inline void color_conv_pixel(byte *dest, byte *src, struct color_grid_node *grid) +{ + uint s0 = src[0] * COLOR_CONV_SCALE_CONST; + uint s1 = src[1] * COLOR_CONV_SCALE_CONST; + uint s2 = src[2] * COLOR_CONV_SCALE_CONST; + struct color_grid_node *g0, *g1, *g2, *g3, *g = grid + + ((s0 >> 16) + ((s1 >> 16) << COLOR_CONV_SIZE) + ((s2 >> 16) << (2 * COLOR_CONV_SIZE))); + struct color_interpolation_node *n = color_interpolation_table + + (((s0 & (0x10000 - (0x10000 >> COLOR_CONV_OFS))) >> (16 - COLOR_CONV_OFS)) + + ((s1 & (0x10000 - (0x10000 >> COLOR_CONV_OFS))) >> (16 - 2 * COLOR_CONV_OFS)) + + ((s2 & (0x10000 - (0x10000 >> COLOR_CONV_OFS))) >> (16 - 3 * COLOR_CONV_OFS))); + g0 = g + n->ofs[0]; + g1 = g + n->ofs[1]; + g2 = g + n->ofs[2]; + g3 = g + n->ofs[3]; + dest[0] = (g0->val[0] * n->mul[0] + g1->val[0] * n->mul[1] + + g2->val[0] * n->mul[2] + g3->val[0] * n->mul[3] + 128) >> 8; + dest[1] = (g0->val[1] * n->mul[0] + g1->val[1] * n->mul[1] + + g2->val[1] * n->mul[2] + g3->val[1] * n->mul[3] + 128) >> 8; + dest[2] = (g0->val[2] * n->mul[0] + g1->val[2] * n->mul[1] + + g2->val[2] * n->mul[2] + g3->val[2] * n->mul[3] + 128) >> 8; +} + +#endif diff --git a/libucw/images/color.t b/libucw/images/color.t new file mode 100644 index 0000000..510a320 --- /dev/null +++ b/libucw/images/color.t @@ -0,0 +1,3 @@ +# Tests for color conversion module + +Run: ../obj/images/color-t diff --git a/libucw/images/config.c b/libucw/images/config.c new file mode 100644 index 0000000..286097e --- /dev/null +++ b/libucw/images/config.c @@ -0,0 +1,76 @@ +/* + * Image Library -- Configuration + * + * (c) 2006 Pavel Charvat + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include +#if defined(CONFIG_IMAGES_SIM) || defined(CONFIG_IMAGES_DUP) +#include +#endif + +#include + +/* ImageLib section */ +uint image_trace; +uint image_max_dim = 0xffff; +uint image_max_bytes = ~0U; + +#if defined(CONFIG_IMAGES_SIM) || defined(CONFIG_IMAGES_DUP) +/* ImageSig section */ +uint image_sig_min_width; +uint image_sig_min_height; +uint *image_sig_prequant_thresholds; +uint image_sig_postquant_min_steps; +uint image_sig_postquant_max_steps; +uint image_sig_postquant_threshold; +double image_sig_border_size; +int image_sig_border_bonus; +double image_sig_inertia_scale[3]; +double image_sig_textured_threshold; +int image_sig_compare_method; +uint image_sig_cmp_features_weights[IMAGE_REG_F + IMAGE_REG_H]; +#endif + +static struct cf_section image_lib_config = { + CF_ITEMS{ + CF_UINT("Trace", &image_trace), + CF_UINT("ImageMaxDim", &image_max_dim), + CF_UINT("ImageMaxBytes", &image_max_bytes), + CF_END + } +}; + +#if defined(CONFIG_IMAGES_SIM) || defined(CONFIG_IMAGES_DUP) +static struct cf_section image_sig_config = { + CF_ITEMS{ + CF_UINT("MinWidth", &image_sig_min_width), + CF_UINT("MinHeight", &image_sig_min_height), + CF_UINT_DYN("PreQuantThresholds", &image_sig_prequant_thresholds, CF_ANY_NUM), + CF_UINT("PostQuantMinSteps", &image_sig_postquant_min_steps), + CF_UINT("PostQuantMaxSteps", &image_sig_postquant_max_steps), + CF_UINT("PostQuantThreshold", &image_sig_postquant_threshold), + CF_DOUBLE("BorderSize", &image_sig_border_size), + CF_INT("BorderBonus", &image_sig_border_bonus), + CF_DOUBLE_ARY("InertiaScale", image_sig_inertia_scale, 3), + CF_DOUBLE("TexturedThreshold", &image_sig_textured_threshold), + CF_LOOKUP("CompareMethod", &image_sig_compare_method, ((const char * const []){"integrated", "fuzzy", "average", NULL})), + CF_UINT_ARY("CompareFeaturesWeights", image_sig_cmp_features_weights, IMAGE_REG_F + IMAGE_REG_H), + CF_END + } +}; +#endif + +static void CONSTRUCTOR +images_init_config(void) +{ + cf_declare_section("ImageLib", &image_lib_config, 0); +#if defined(CONFIG_IMAGES_SIM) || defined(CONFIG_IMAGES_DUP) + cf_declare_section("ImageSig", &image_sig_config, 0); +#endif +} diff --git a/libucw/images/context.c b/libucw/images/context.c new file mode 100644 index 0000000..82ff165 --- /dev/null +++ b/libucw/images/context.c @@ -0,0 +1,61 @@ +/* + * Image Library -- Image contexts + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include + +#include + +void +image_context_init(struct image_context *ctx) +{ + bzero(ctx, sizeof(*ctx)); + bb_init(&ctx->msg_buf); + ctx->tracing_level = image_trace; + ctx->msg_callback = image_context_msg_default; +} + +void +image_context_cleanup(struct image_context *ctx) +{ + IMAGE_TRACE(ctx, 10, "Destroying image thread"); + bb_done(&ctx->msg_buf); +} + +void +image_context_msg_default(struct image_context *ctx) +{ + msg(ctx->msg_code >> 24, "%s", ctx->msg); +} + +void +image_context_msg_silent(struct image_context *ctx UNUSED) +{ +} + +void +image_context_msg(struct image_context *ctx, uint code, char *msg, ...) +{ + va_list args; + va_start(args, msg); + image_context_vmsg(ctx, code, msg, args); + va_end(args); +} + +void +image_context_vmsg(struct image_context *ctx, uint code, char *msg, va_list args) +{ + ctx->msg_code = code; + ctx->msg = bb_vprintf(&ctx->msg_buf, msg, args); + ctx->msg_callback(ctx); +} diff --git a/libucw/images/dup-cmp.c b/libucw/images/dup-cmp.c new file mode 100644 index 0000000..e0a2812 --- /dev/null +++ b/libucw/images/dup-cmp.c @@ -0,0 +1,302 @@ +/* + * Image Library -- Duplicates Comparison + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include +#include + +#include + +static inline uint +err (int a, int b) +{ + a -= b; + return a * a; +} + +static inline u64 +err_sum(byte *pos1, byte *pos2, uint count) +{ + uint e64 = 0; + while (count--) + { + uint e = err(*pos1++, *pos2++); + e += err(*pos1++, *pos2++); + e += err(*pos1++, *pos2++); + e64 += e; + } + return e64; +} + +static inline u64 +err_sum_transformed(byte *pos1, byte *pos2, uint cols, uint rows, int row_step_1, int col_step_2, int row_step_2) +{ + DBG("err_sum_transformed(pos1=%p pos2=%p cols=%u rows=%u row_step_1=%d col_step_2=%d row_step_2=%d)", + pos1, pos2, cols, rows, row_step_1, col_step_2, row_step_2); + u64 e64 = 0; + for (uint j = rows; j--; ) + { + byte *p1 = pos1; + byte *p2 = pos2; + uint e = 0; + for (uint i = cols; i--; ) + { + e += err(p1[0], p2[0]); + e += err(p1[1], p2[1]); + e += err(p1[2], p2[2]); + p1 += 3; + p2 += col_step_2; + } + pos1 += row_step_1; + pos2 += row_step_2; + e64 += e; + } + return e64; +} + +static inline int +aspect_ratio_test(struct image_dup_context *ctx, uint cols1, uint rows1, uint cols2, uint rows2) +{ + DBG("aspect_ratio_test(cols1=%u rows1=%u cols2=%u rows2=%u)", cols1, rows1, cols2, rows2); + uint r1 = cols1 * rows2; + uint r2 = rows1 * cols2; + return + r1 <= ((r2 * ctx->ratio_threshold) >> 7) && + r2 <= ((r1 * ctx->ratio_threshold) >> 7); +} + +static inline int +average_compare(struct image_dup_context *ctx, struct image_dup *dup1, struct image_dup *dup2) +{ + byte *block1 = image_dup_block(dup1, 0, 0); + byte *block2 = image_dup_block(dup2, 0, 0); + uint e = + err(block1[0], block2[0]) + + err(block1[1], block2[1]) + + err(block1[2], block2[2]); + return e <= ctx->error_threshold; +} + +static int +blocks_compare(struct image_dup_context *ctx, struct image_dup *dup1, struct image_dup *dup2, uint tab_col, uint tab_row, uint trans) +{ + DBG("blocks_compare(tab_col=%d tab_row=%d trans=%d)", tab_col, tab_row, trans); + ctx->sum_pixels += 1 << (tab_col + tab_row); + byte *block1 = image_dup_block(dup1, tab_col, tab_row); + byte *block2; + int col_step, row_step; + if (trans < 4) + block2 = image_dup_block(dup2, tab_col, tab_row); + else + block2 = image_dup_block(dup2, tab_row, tab_col); + switch (trans) + { + case 0: ; + uint err = (err_sum(block1, block2, 1 << (tab_col + tab_row)) >> (tab_col + tab_row)); + DBG("average error=%d", err); + ctx->error = err; + return err <= ctx->error_threshold; + case 1: + col_step = -3; + row_step = (3 << tab_col); + block2 += row_step - 3; + break; + case 2: + col_step = 3; + row_step = -(3 << tab_col); + block2 += (3 << (tab_col + tab_row)) + row_step; + break; + case 3: + col_step = -3; + row_step = -(3 << tab_col); + block2 += (3 << (tab_col + tab_row)) - 3; + break; + case 4: + col_step = (3 << tab_row); + row_step = 3; + break; + case 5: + col_step = -(3 << tab_row); + row_step = 3; + block2 += (3 << (tab_col + tab_row)) + col_step; + break; + case 6: + col_step = (3 << tab_row); + row_step = -3; + block2 += col_step - 3; + break; + case 7: + col_step = -(3 << tab_row); + row_step = -3; + block2 += (3 << (tab_col + tab_row)) - 3; + break; + default: + ASSERT(0); + } + uint err = (err_sum_transformed(block1, block2, (1 << tab_col), (1 << tab_row), (3 << tab_col), col_step, row_step) >> (tab_col + tab_row)); + DBG("average error=%d", err); + ctx->error = err; + return err <= ctx->error_threshold; +} + +static int +same_size_compare(struct image_dup_context *ctx, struct image_dup *dup1, struct image_dup *dup2, uint trans) +{ + struct image *img1 = &dup1->image; + struct image *img2 = &dup2->image; + if (!img1->pixels || !img2->pixels) + return 1; + ctx->sum_pixels += img1->cols * img1->rows; + byte *block1 = img1->pixels; + byte *block2 = img2->pixels; + int col_step, row_step; + DBG("same_size_compare(trans=%d)", trans); + switch (trans) + { + case 0: ; + col_step = 3; + row_step = img2->row_size; + break; + case 1: + col_step = -3; + row_step = img2->row_size; + block2 += 3 * (img2->cols - 1); + break; + case 2: + col_step = 3; + row_step = -img2->row_size; + block2 += img2->row_size * (img2->rows - 1); + break; + case 3: + col_step = -3; + row_step = -img2->row_size; + block2 += img2->row_size * (img2->rows - 1) + 3 * (img2->cols - 1); + break; + case 4: + col_step = img2->row_size; + row_step = 3; + break; + case 5: + col_step = -img2->row_size; + row_step = 3; + block2 += img2->row_size * (img2->rows - 1); + break; + case 6: + col_step = img2->row_size; + row_step = -3; + block2 += 3 * (img2->cols - 1); + break; + case 7: + col_step = -img2->row_size; + row_step = -3; + block2 += img2->row_size * (img2->rows - 1) + 3 * (img2->cols - 1); + break; + default: + ASSERT(0); + } + uint err = (err_sum_transformed(block1, block2, img1->cols, img1->rows, img1->row_size, col_step, row_step) / ((u64)img1->cols * img1->rows)); + DBG("average error=%d", err); + ctx->error = err; + return err <= ctx->error_threshold; +} + +uint +image_dup_compare(struct image_dup_context *ctx, struct image_dup *dup1, struct image_dup *dup2) +{ + DBG("image_dup_compare(%p, %p)", dup1, dup2); + if (!average_compare(ctx, dup1, dup2)) + return 0; + struct image *img1 = &dup1->image; + struct image *img2 = &dup2->image; + uint flags = ctx->flags; + if (flags & IMAGE_DUP_SCALE) + { + DBG("Scale support"); + if (!aspect_ratio_test(ctx, img1->cols, img1->rows, img2->cols, img2->rows)) + flags &= ~0x0f; + if (!aspect_ratio_test(ctx, img1->cols, img1->rows, img2->rows, img2->cols)) + flags &= ~0xf0; + } + else + { + DBG("No scale support"); + if (!(img1->cols == img2->cols && img1->rows == img2->rows)) + flags &= ~0x0f; + if (!(img1->cols == img2->rows && img1->rows == img2->cols)) + flags &= ~0xf0; + } + if (!(flags & 0xff)) + return 0; + uint result = 0; + if (flags & 0x0f) + { + uint cols = MIN(dup1->tab_cols, dup2->tab_cols); + uint rows = MIN(dup1->tab_rows, dup2->tab_rows); + for (uint t = 0; t < 4; t++) + if (flags & (1 << t)) + { + DBG("Testing trans %d", t); + uint i = MAX(cols, rows), depth = 1; + while (i--) + { + depth++; + uint col = MAX(0, (int)(cols - i)); + uint row = MAX(0, (int)(rows - i)); + if (!blocks_compare(ctx, dup1, dup2, col, row, t)) + break; + if (!i && + (img1->cols != img2->cols || img1->rows != img2->rows || + same_size_compare(ctx, dup1, dup2, t))) + { + result |= 1 << t; + if (!(flags & IMAGE_DUP_WANT_ALL)) + return result; + else + break; + } + } + ctx->sum_depth += depth; + } + } + if (flags & 0xf0) + { + uint cols = MIN(dup1->tab_cols, dup2->tab_rows); + uint rows = MIN(dup1->tab_rows, dup2->tab_cols); + for (uint t = 4; t < 8; t++) + if (flags & (1 << t)) + { + DBG("Testing trans %d", t); + uint i = MAX(cols, rows), depth = 1; + while (i--) + { + depth++; + uint col = MAX(0, (int)(cols - i)); + uint row = MAX(0, (int)(rows - i)); + if (!blocks_compare(ctx, dup1, dup2, col, row, t)) + break; + if (!i && + (img1->cols != img2->rows || img1->rows != img2->cols || + same_size_compare(ctx, dup1, dup2, t)) ) + { + result |= 1 << t; + if (!(flags & IMAGE_DUP_WANT_ALL)) + return result; + else + break; + } + } + ctx->sum_depth += depth; + } + } + return result; +} diff --git a/libucw/images/dup-init.c b/libucw/images/dup-init.c new file mode 100644 index 0000000..c4c5f4d --- /dev/null +++ b/libucw/images/dup-init.c @@ -0,0 +1,153 @@ +/* + * Image Library -- Duplicates Comparison + * + * (c) 2006--2007 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include +#include +#include + +#include + +void +image_dup_context_init(struct image_context *ic, struct image_dup_context *ctx) +{ + *ctx = (struct image_dup_context) { + .ic = ic, + .flags = IMAGE_DUP_TRANS_ID, + .ratio_threshold = 140, + .error_threshold = 100, + .qtree_limit = 8, + }; +} + +void +image_dup_context_cleanup(struct image_dup_context *ctx UNUSED) +{ +} + +static inline struct image * +image_dup_subimage(struct image_context *ctx, struct image_dup *dup, struct image *block, uint tab_col, uint tab_row) +{ + return image_init_matrix(ctx, block, image_dup_block(dup, tab_col, tab_row), + 1 << tab_col, 1 << tab_row, 3 << tab_col, COLOR_SPACE_RGB); +} + +static inline void +pixels_average(byte *dest, byte *src1, byte *src2) +{ + dest[0] = ((uint)src1[0] + (uint)src2[0]) >> 1; + dest[1] = ((uint)src1[1] + (uint)src2[1]) >> 1; + dest[2] = ((uint)src1[2] + (uint)src2[2]) >> 1; +} + +uint +image_dup_estimate_size(uint cols, uint rows, uint same_size_compare, uint qtree_limit) +{ + uint tab_cols, tab_rows; + for (tab_cols = 0; (uint)(2 << tab_cols) < cols && tab_cols < qtree_limit; tab_cols++); + for (tab_rows = 0; (uint)(2 << tab_rows) < rows && tab_rows < qtree_limit; tab_rows++); + uint size = sizeof(struct image_dup) + (12 << (tab_cols + tab_rows)) + 2 * CPU_STRUCT_ALIGN; + if (same_size_compare) + size += cols * rows * 3 + CPU_STRUCT_ALIGN; + return ALIGN_TO(size, CPU_STRUCT_ALIGN); +} + +uint +image_dup_new(struct image_dup_context *ctx, struct image *img, void *buffer, uint same_size_compare) +{ + DBG("image_dup_init()"); + ASSERT(!((uintptr_t)buffer & (CPU_STRUCT_ALIGN - 1))); + void *ptr = buffer; + + /* Allocate the structure */ + struct image_dup *dup = ptr; + ptr += ALIGN_TO(sizeof(*dup), CPU_STRUCT_ALIGN); + bzero(dup, sizeof(*dup)); + + ASSERT((img->flags & IMAGE_PIXEL_FORMAT) == COLOR_SPACE_RGB); + + /* Clone image */ + if (same_size_compare) + { + if (!image_init_matrix(ctx->ic, &dup->image, ptr, img->cols, img->rows, img->cols * 3, COLOR_SPACE_RGB)) + return 0; + uint size = img->rows * img->cols * 3; + ptr += ALIGN_TO(size, CPU_STRUCT_ALIGN); + byte *s = img->pixels; + byte *d = dup->image.pixels; + for (uint row = img->rows; row--; ) + { + memcpy(d, s, img->row_pixels_size); + d += dup->image.row_size; + s += img->row_size; + } + } + else + { + dup->image.cols = img->cols; + dup->image.rows = img->rows; + } + + for (dup->tab_cols = 0; (uint)(2 << dup->tab_cols) < img->cols && dup->tab_cols < ctx->qtree_limit; dup->tab_cols++); + for (dup->tab_rows = 0; (uint)(2 << dup->tab_rows) < img->rows && dup->tab_rows < ctx->qtree_limit; dup->tab_rows++); + dup->tab_row_size = 6 << dup->tab_cols; + dup->tab_pixels = ptr; + uint size = 12 << (dup->tab_cols + dup->tab_rows); + ptr += ALIGN_TO(size, CPU_STRUCT_ALIGN); + + /* Scale original image to right bottom block */ + { + struct image block; + if (!image_dup_subimage(ctx->ic, dup, &block, dup->tab_cols, dup->tab_rows)) + return 0; + if (!image_scale(ctx->ic, &block, img)) + return 0; + } + + /* Complete bottom row */ + for (uint i = dup->tab_cols; i--; ) + { + byte *d = image_dup_block(dup, i, dup->tab_rows); + byte *s = image_dup_block(dup, i + 1, dup->tab_rows); + for (uint y = 0; y < (uint)(1 << dup->tab_rows); y++) + for (uint x = 0; x < (uint)(1 << i); x++) + { + pixels_average(d, s, s + 3); + d += 3; + s += 6; + } + } + + /* Complete remaining blocks */ + for (uint i = 0; i <= dup->tab_cols; i++) + { + uint line_size = (3 << i); + for (uint j = dup->tab_rows; j--; ) + { + byte *d = image_dup_block(dup, i, j); + byte *s = image_dup_block(dup, i, j + 1); + for (uint y = 0; y < (uint)(1 << j); y++) + { + for (uint x = 0; x < (uint)(1 << i); x++) + { + pixels_average(d, s, s + line_size); + d += 3; + s += 3; + } + s += line_size; + } + } + } + + return ptr - buffer; +} diff --git a/libucw/images/duplicates.h b/libucw/images/duplicates.h new file mode 100644 index 0000000..d824c8c --- /dev/null +++ b/libucw/images/duplicates.h @@ -0,0 +1,65 @@ +#ifndef _IMAGES_DUPLICATES_H +#define _IMAGES_DUPLICATES_H + +#ifdef CONFIG_UCW_CLEAN_ABI +#define image_dup_compare ucw_image_dup_compare +#define image_dup_context_cleanup ucw_image_dup_context_cleanup +#define image_dup_context_init ucw_image_dup_context_init +#define image_dup_estimate_size ucw_image_dup_estimate_size +#define image_dup_new ucw_image_dup_new +#endif + +enum image_dup_flags { + IMAGE_DUP_TRANS_ID = 0x0001, + IMAGE_DUP_FLIP_X = 0x0002, + IMAGE_DUP_FLIP_Y = 0x0004, + IMAGE_DUP_ROT_180 = 0x0008, + IMAGE_DUP_FLIP_BACK = 0x0010, + IMAGE_DUP_ROT_CCW = 0x0020, + IMAGE_DUP_ROT_CW = 0x0040, + IMAGE_DUP_FLIP_SLASH = 0x0080, + IMAGE_DUP_TRANS_ALL = 0x00ff, + IMAGE_DUP_SCALE = 0x0100, + IMAGE_DUP_WANT_ALL = 0x0200, +}; + +struct image_dup_context { + struct image_context *ic; + uint flags; + uint ratio_threshold; + uint error_threshold; + uint qtree_limit; + u64 sum_depth; + u64 sum_pixels; + uint error; +}; + +struct image_dup { + struct image image; + byte *tab_pixels; + u32 tab_cols; + u32 tab_rows; + u32 tab_row_size; + u32 tab_size; +}; + +/* dup-init.c */ + +void image_dup_context_init(struct image_context *ic, struct image_dup_context *ctx); +void image_dup_context_cleanup(struct image_dup_context *ctx); + +uint image_dup_estimate_size(uint cols, uint rows, uint same_size_compare, uint qtree_limit); +uint image_dup_new(struct image_dup_context *ctx, struct image *image, void *buffer, uint same_size_compare); + +/* dup-cmp.c */ + +uint image_dup_compare(struct image_dup_context *ctx, struct image_dup *dup1, struct image_dup *dup2); + +/* internals */ + +static inline byte *image_dup_block(struct image_dup *dup, uint tab_col, uint tab_row) +{ + return dup->tab_pixels + (dup->tab_row_size << tab_row) + (3 << (tab_row + tab_col)); +} + +#endif diff --git a/libucw/images/error.h b/libucw/images/error.h new file mode 100644 index 0000000..77760b2 --- /dev/null +++ b/libucw/images/error.h @@ -0,0 +1,39 @@ +#ifndef _IMAGES_ERROR_H +#define _IMAGES_ERROR_H + +#ifdef CONFIG_UCW_CLEAN_ABI +#define image_trace ucw_image_trace +#endif + +extern uint image_trace; /* ImageLib.Trace */ + +/* Error codes */ + +enum image_msg_code { + IMAGE_MSG_TYPE = 0xff000000, + IMAGE_MSG_TRACE = (L_DEBUG << 24), + IMAGE_MSG_WARN = (L_WARN << 24), + IMAGE_MSG_ERROR = (L_ERROR << 24), + IMAGE_TRACE_LEVEL = 0x0000ffff, + IMAGE_WARN_TYPE = 0x0000ffff, + IMAGE_WARN_SUBTYPE = 0x00ff0000, + IMAGE_ERROR_TYPE = 0x0000ffff, + IMAGE_ERROR_SUBTYPE = 0x00ff0000, + IMAGE_ERROR_NOT_IMPLEMENTED = 1, + IMAGE_ERROR_INVALID_DIMENSIONS = 2, + IMAGE_ERROR_INVALID_FILE_FORMAT = 3, + IMAGE_ERROR_INVALID_PIXEL_FORMAT = 4, + IMAGE_ERROR_READ_FAILED = 5, + IMAGE_ERROR_WRITE_FAILED = 6, +}; + +/* Useful macros */ + +#define IMAGE_WARN(ctx, type, msg...) image_context_msg((ctx), IMAGE_MSG_WARN | (type), msg) +#define IMAGE_ERROR(ctx, type, msg...) image_context_msg((ctx), IMAGE_MSG_ERROR | (type), msg) + +#define IMAGE_TRACE(ctx, level, msg...) do { \ + struct image_context *_ctx = (ctx); uint _level = (level); \ + if (_level < _ctx->tracing_level) image_context_msg(_ctx, IMAGE_MSG_TRACE | _level, msg); } while (0) + +#endif diff --git a/libucw/images/image-test.c b/libucw/images/image-test.c new file mode 100644 index 0000000..8098def --- /dev/null +++ b/libucw/images/image-test.c @@ -0,0 +1,235 @@ +/* + * Image Library -- Simple automatic tests + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static uint want_image_iface; +static uint want_threads; + +#define TRY(x) do { if (!(x)) ASSERT(0); } while (0) + +static void +test_image_iface(void) +{ + struct mempool *pool; + struct image_context ctx; + struct image *i1, *i2; + struct image s1; + + pool = mp_new(1024); + image_context_init(&ctx); + + /* Image allocation */ + i1 = image_new(&ctx, 731, 327, COLOR_SPACE_RGB, NULL); + ASSERT(i1); + ASSERT(i1->pixel_size == 3); + image_destroy(i1); + + /* Test invalid image size */ + ctx.msg_callback = image_context_msg_silent; + i1 = image_new(&ctx, 2214, 0, COLOR_SPACE_RGB, NULL); + ASSERT(!i1); + i1 = image_new(&ctx, 0xffffff, 0xffffff, COLOR_SPACE_RGB, NULL); + ASSERT(!i1); + ctx.msg_callback = image_context_msg_default; + + /* Various image allocatio parameters */ + i1 = image_new(&ctx, 370, 100, COLOR_SPACE_GRAYSCALE, pool); + ASSERT(i1); + ASSERT(i1->pixel_size == 1); + image_destroy(i1); + mp_flush(pool); + + i1 = image_new(&ctx, 373, 101, COLOR_SPACE_RGB | IMAGE_ALIGNED, NULL); + ASSERT(i1); + ASSERT(i1->pixel_size == 4); + ASSERT(IMAGE_SSE_ALIGN_SIZE >= 16); + ASSERT(!(i1->row_size & (IMAGE_SSE_ALIGN_SIZE - 1))); + ASSERT(!((uintptr_t)i1->pixels & (IMAGE_SSE_ALIGN_SIZE - 1))); + image_destroy(i1); + + i1 = image_new(&ctx, 283, 329, COLOR_SPACE_RGB, NULL); + ASSERT(i1); + ASSERT(i1->pixel_size == 3); + + /* Image structures cloning */ + i2 = image_clone(&ctx, i1, COLOR_SPACE_RGB, NULL); + ASSERT(i2); + ASSERT(i2->pixel_size == 3); + image_destroy(i2); + + i2 = image_clone(&ctx, i1, COLOR_SPACE_RGB | IMAGE_PIXELS_ALIGNED, NULL); + ASSERT(i2); + ASSERT(i2->pixel_size == 4); + image_destroy(i2); + + /* Subimages */ + i2 = image_init_subimage(&ctx, &s1, i1, 29, 39, 283 - 29, 100); + ASSERT(i2); + image_destroy(&s1); + + image_destroy(i1); + + image_context_cleanup(&ctx); + mp_delete(pool); +} + +#ifdef CONFIG_UCW_THREADS + +#define TEST_THREADS_COUNT 4 + +static void * +test_threads_thread(void *param UNUSED) +{ + DBG("Starting thread"); + struct image_context ctx; + struct image_io io; + image_context_init(&ctx); + TRY(image_io_init(&ctx, &io)); + + for (uint num = 0; num < 200; num++) + { + int r0 = random_max(100); + + /* realloc context */ + if ((r0 -= 2) < 0) + { + image_io_cleanup(&io); + image_context_cleanup(&ctx); + image_context_init(&ctx); + TRY(image_io_init(&ctx, &io)); + } + + /* realloc I/O */ + else if ((r0 -= 2) < 0) + { + image_io_cleanup(&io); + TRY(image_io_init(&ctx, &io)); + } + + /* encode and decode random image */ + else + { + struct image *img; + + TRY(img = image_new(&ctx, 10 + random_max(140), 10 + random_max(140), COLOR_SPACE_RGB, NULL)); + image_clear(&ctx, img); + +#if defined(CONFIG_IMAGES_LIBJPEG) || defined(CONFIG_IMAGES_LIBPNG) || defined(CONFIG_IMAGES_LIBMAGICK) + + struct fastbuf *wfb = fbmem_create(10000); + struct fastbuf *rfb; + uint format = 0; + while (!format) + { + switch (random_max(3)) + { + case 0: +#if defined(CONFIG_IMAGES_LIBJPEG) || defined(CONFIG_IMAGES_LIBMAGICK) + format = IMAGE_FORMAT_JPEG; +#endif + break; + case 1: +#if defined(CONFIG_IMAGES_LIBPNG) || defined(CONFIG_IMAGES_LIBMAGICK) + format = IMAGE_FORMAT_PNG; +#endif + break; + case 2: +#if defined(CONFIG_IMAGES_LIBMAGICK) + format = IMAGE_FORMAT_GIF; +#endif + break; + default: + ASSERT(0); + } + } + + io.format = format; + io.fastbuf = wfb; + io.image = img; + TRY(image_io_write(&io)); + image_io_reset(&io); + + rfb = fbmem_clone_read(wfb); + io.format = format; + io.fastbuf = rfb; + TRY(image_io_read(&io, 0)); + image_io_reset(&io); + + bclose(rfb); + bclose(wfb); + +#endif + image_destroy(img); + } + } + + image_io_cleanup(&io); + image_context_cleanup(&ctx); + DBG("Stopping thread"); + return NULL; +} + +#endif + +static void +test_threads(void) +{ +#ifdef CONFIG_UCW_THREADS + pthread_t threads[TEST_THREADS_COUNT - 1]; + pthread_attr_t attr; + if (pthread_attr_init(&attr) < 0 || + pthread_attr_setstacksize(&attr, ucwlib_thread_stack_size) < 0) + ASSERT(0); + for (uint i = 0; i < TEST_THREADS_COUNT - 1; i++) + { + if (pthread_create(threads + i, &attr, test_threads_thread, NULL) < 0) + die("Unable to create thread: %m"); + } + test_threads_thread(NULL); + for (uint i = 0; i < TEST_THREADS_COUNT - 1; i++) + if (pthread_join(threads[i], NULL) < 0) + die("Cannot join thread: %m"); +#else + msg(L_WARN, "Disabled CONFIG_UCW_THREADS, threaded tests skipped"); +#endif +} + +int +main(int argc, char **argv) +{ + for (int i = 1; i < argc; i++) + if (!strcmp(argv[i], "image-iface")) + want_image_iface++; + else if (!strcmp(argv[i], "threads")) + want_threads++; + else + die("Invalid parameter"); + + random_gen_seed(); + + if (want_image_iface) + test_image_iface(); + if (want_threads) + test_threads(); + + return 0; +} + diff --git a/libucw/images/image-walk.h b/libucw/images/image-walk.h new file mode 100644 index 0000000..1cdbbf2 --- /dev/null +++ b/libucw/images/image-walk.h @@ -0,0 +1,174 @@ +/* + * Image Library -- Pixels iteration + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef IMAGE_WALK_PREFIX +# error Undefined IMAGE_WALK_PREFIX +#endif + +#define P(x) IMAGE_WALK_PREFIX(x) + +#if !defined(IMAGE_WALK_UNROLL) +# define IMAGE_WALK_UNROLL 1 +#elif IMAGE_WALK_UNROLL != 1 && IMAGE_WALK_UNROLL != 2 && IMAGE_WALK_UNROLL != 4 +# error IMAGE_WALK_UNROLL must be 1, 2 or 4 +#endif + +#ifndef IMAGE_WALK_IMAGE +# define IMAGE_WALK_IMAGE P(img) +#endif +#ifndef IMAGE_WALK_PIXELS +# define IMAGE_WALK_PIXELS (IMAGE_WALK_IMAGE->pixels) +#endif +#ifndef IMAGE_WALK_COLS +# define IMAGE_WALK_COLS (IMAGE_WALK_IMAGE->cols) +#endif +#ifndef IMAGE_WALK_ROWS +# define IMAGE_WALK_ROWS (IMAGE_WALK_IMAGE->rows) +#endif +#ifndef IMAGE_WALK_COL_STEP +# define IMAGE_WALK_COL_STEP (IMAGE_WALK_IMAGE->pixel_size) +#endif +#ifndef IMAGE_WALK_ROW_STEP +# define IMAGE_WALK_ROW_STEP (IMAGE_WALK_IMAGE->row_size) +#endif + +#ifdef IMAGE_WALK_DOUBLE +# ifndef IMAGE_WALK_SEC_IMAGE +# define IMAGE_WALK_SEC_IMAGE P(sec_img) +# endif +# ifndef IMAGE_WALK_SEC_PIXELS +# define IMAGE_WALK_SEC_PIXELS (IMAGE_WALK_SEC_IMAGE->pixels) +# endif +# ifndef IMAGE_WALK_SEC_COLS +# define IMAGE_WALK_SEC_COLS (IMAGE_WALK_SEC_IMAGE->cols) +# endif +# ifndef IMAGE_WALK_SEC_ROWS +# define IMAGE_WALK_SEC_ROWS (IMAGE_WALK_SEC_IMAGE->rows) +# endif +# ifndef IMAGE_WALK_SEC_COL_STEP +# define IMAGE_WALK_SEC_COL_STEP (IMAGE_WALK_SEC_IMAGE->pixel_size) +# endif +# ifndef IMAGE_WALK_SEC_ROW_STEP +# define IMAGE_WALK_SEC_ROW_STEP (IMAGE_WALK_SEC_IMAGE->row_size) +# endif +# define IMAGE_WALK__STEP IMAGE_WALK_DO_STEP; P(pos) += P(col_step); P(sec_pos) += P(sec_col_step) +#else +# define IMAGE_WALK__STEP IMAGE_WALK_DO_STEP; P(pos) += P(col_step) +#endif + +#ifndef IMAGE_WALK_DO_START +# define IMAGE_WALK_DO_START +#endif + +#ifndef IMAGE_WALK_DO_END +# define IMAGE_WALK_DO_END +#endif + +#ifndef IMAGE_WALK_DO_ROW_START +# define IMAGE_WALK_DO_ROW_START +#endif + +#ifndef IMAGE_WALK_DO_ROW_END +# define IMAGE_WALK_DO_ROW_END +#endif + +#ifndef IMAGE_WALK_DO_STEP +# define IMAGE_WALK_DO_STEP +#endif + +#ifndef IMAGE_WALK_INLINE +static void +#ifdef IMAGE_WALK_FUNC_NAME +IMAGE_WALK_FUNC_NAME +#else +P(walk) +#endif + (struct image *P(img) +# ifdef IMAGE_WALK_DOUBLE + , struct image *P(sec_img) +# endif +# ifdef IMAGE_WALK_EXTRA_ARGS + , IMAGE_WALK_EXTRA_ARGS +# endif + ) +#endif +{ + uint P(cols) = IMAGE_WALK_COLS; + uint P(rows) = IMAGE_WALK_ROWS; +# if IMAGE_WALK_UNROLL > 1 + uint P(cols_unroll_block_count) = P(cols) / IMAGE_WALK_UNROLL; + uint P(cols_unroll_end_count) = P(cols) % IMAGE_WALK_UNROLL; +# endif + byte *P(pos) = IMAGE_WALK_PIXELS, *P(row_start) = P(pos); + int P(col_step) = IMAGE_WALK_COL_STEP; + int P(row_step) = IMAGE_WALK_ROW_STEP; +# ifdef IMAGE_WALK_DOUBLE + byte *P(sec_pos) = IMAGE_WALK_SEC_PIXELS, *P(sec_row_start) = P(sec_pos); + int P(sec_col_step) = IMAGE_WALK_SEC_COL_STEP; + int P(sec_row_step) = IMAGE_WALK_SEC_ROW_STEP; +# endif + IMAGE_WALK_DO_START; + while (P(rows)--) + { + IMAGE_WALK_DO_ROW_START; +# if IMAGE_WALK_UNROLL == 1 + for (uint P(_i) = P(cols); P(_i)--; ) +# else + for (uint P(_i) = P(cols_unroll_block_count); P(_i)--; ) +# endif + { +# if IMAGE_WALK_UNROLL >= 4 + IMAGE_WALK__STEP; + IMAGE_WALK__STEP; +# endif +# if IMAGE_WALK_UNROLL >= 2 + IMAGE_WALK__STEP; +# endif + IMAGE_WALK__STEP; + } +# if IMAGE_WALK_UNROLL > 1 + for (uint P(_i) = P(cols_unroll_end_count); P(_i)--; ) + { + IMAGE_WALK__STEP; + } +# endif + IMAGE_WALK_DO_ROW_END; + P(pos) = (P(row_start) += P(row_step)); +# ifdef IMAGE_WALK_DOUBLE + P(sec_pos) = (P(sec_row_start) += P(sec_row_step)); +# endif + } + IMAGE_WALK_DO_END; +} + +#undef IMAGE_WALK_PREFIX +#undef IMAGE_WALK_FUNC_NAME +#undef IMAGE_WALK_INLINE +#undef IMAGE_WALK_UNROLL +#undef IMAGE_WALK_DOUBLE +#undef IMAGE_WALK_EXTRA_ARGS +#undef IMAGE_WALK_IMAGE +#undef IMAGE_WALK_PIXELS +#undef IMAGE_WALK_COLS +#undef IMAGE_WALK_ROWS +#undef IMAGE_WALK_COL_STEP +#undef IMAGE_WALK_ROW_STEP +#undef IMAGE_WALK_SEC_IMAGE +#undef IMAGE_WALK_SEC_PIXELS +#undef IMAGE_WALK_SEC_COLS +#undef IMAGE_WALK_SEC_ROWS +#undef IMAGE_WALK_SEC_COL_STEP +#undef IMAGE_WALK_SEC_ROW_STEP +#undef IMAGE_WALK_DO_START +#undef IMAGE_WALK_DO_END +#undef IMAGE_WALK_DO_ROW_START +#undef IMAGE_WALK_DO_ROW_END +#undef IMAGE_WALK_DO_STEP +#undef IMAGE_WALK__STEP +#undef P diff --git a/libucw/images/image.c b/libucw/images/image.c new file mode 100644 index 0000000..8c0779b --- /dev/null +++ b/libucw/images/image.c @@ -0,0 +1,227 @@ +/* + * Image Library -- Basic image manipulation + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include +#include + +#include + +static inline uint +flags_to_pixel_size(uint flags) +{ + uint pixel_size = color_space_channels[flags & IMAGE_COLOR_SPACE]; + if (flags & IMAGE_ALPHA) + pixel_size++; + return pixel_size; +} + +struct image * +image_new(struct image_context *ctx, uint cols, uint rows, uint flags, struct mempool *pool) +{ + DBG("image_new(cols=%u rows=%u flags=0x%x pool=%p)", cols, rows, flags, pool); + flags &= IMAGE_NEW_FLAGS; + if (unlikely(!image_dimensions_valid(cols, rows))) + { + IMAGE_ERROR(ctx, IMAGE_ERROR_INVALID_DIMENSIONS, "Invalid image dimensions (%ux%u)", cols, rows); + return NULL; + } + struct image *img; + uint channels, pixel_size, row_pixels_size, row_size, align; + pixel_size = channels = flags_to_pixel_size(flags); + if (!channels || channels > 4) + { + IMAGE_ERROR(ctx, IMAGE_ERROR_INVALID_PIXEL_FORMAT, "Invalid number of color channels (%u)", channels); + return NULL; + } + switch (channels) + { + case 1: + case 2: + case 4: + flags |= IMAGE_PIXELS_ALIGNED; + break; + case 3: + if (flags & IMAGE_PIXELS_ALIGNED) + pixel_size = 4; + break; + default: + ASSERT(0); + } + if (flags & IMAGE_SSE_ALIGNED) + align = IMAGE_SSE_ALIGN_SIZE; + else if (flags & IMAGE_PIXELS_ALIGNED) + align = pixel_size; + else + align = 1; + row_pixels_size = cols * pixel_size; + row_size = ALIGN_TO(row_pixels_size, align); + u64 image_size_64 = (u64)row_size * rows; + u64 bytes_64 = image_size_64 + (sizeof(struct image) + IMAGE_SSE_ALIGN_SIZE - 1 + sizeof(uint)); + if (unlikely(bytes_64 > image_max_bytes)) + { + IMAGE_ERROR(ctx, IMAGE_ERROR_INVALID_DIMENSIONS, "Image does not fit in memory"); + return NULL; + } + if (pool) + img = mp_alloc(pool, bytes_64); + else + { + img = xmalloc(bytes_64); + flags |= IMAGE_NEED_DESTROY; + } + bzero(img, sizeof(struct image)); + byte *p = (byte *)img + sizeof(struct image); + img->pixels = ALIGN_PTR(p, IMAGE_SSE_ALIGN_SIZE); + img->flags = flags; + img->channels = channels; + img->pixel_size = pixel_size; + img->cols = cols; + img->rows = rows; + img->row_size = row_size; + img->row_pixels_size = row_pixels_size; + img->image_size = image_size_64; + DBG("img=%p flags=0x%x pixel_size=%u row_size=%u image_size=%u pixels=%p", + img, img->flags, img->pixel_size, img->row_size, img->image_size, img->pixels); + return img; +} + +struct image * +image_clone(struct image_context *ctx, struct image *src, uint flags, struct mempool *pool) +{ + DBG("image_clone(src=%p flags=0x%x pool=%p)", src, src->flags, pool); + struct image *img; + flags &= IMAGE_NEW_FLAGS & ~IMAGE_CHANNELS_FORMAT; + flags |= src->flags & IMAGE_CHANNELS_FORMAT; + if (!(img = image_new(ctx, src->cols, src->rows, flags, pool))) + return NULL; + ASSERT(src->channels == img->channels); + if (img->image_size) + { + if (src->pixel_size != img->pixel_size) /* conversion between aligned and unaligned RGB */ + { + ASSERT(src->channels == 3); +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE img +# define IMAGE_WALK_SEC_IMAGE src +# define IMAGE_WALK_DOUBLE +# define IMAGE_WALK_DO_STEP do{ walk_pos[0] = walk_sec_pos[0]; walk_pos[1] = walk_sec_pos[1]; walk_pos[2] = walk_sec_pos[2]; }while(0) +# include + } + else if (src->row_size != img->row_size || ((img->flags | src->flags) & IMAGE_GAPS_PROTECTED)) + { + byte *s = src->pixels; + byte *d = img->pixels; + for (uint row = src->rows; row--; ) + { + memcpy(d, s, src->row_pixels_size); + d += img->row_size; + s += src->row_size; + } + } + else + memcpy(img->pixels, src->pixels, img->image_size); + } + return img; +} + +void +image_destroy(struct image *img) +{ + DBG("image_destroy(img=%p)", img); + if (img->flags & IMAGE_NEED_DESTROY) + xfree(img); +} + +void +image_clear(struct image_context *ctx UNUSED, struct image *img) +{ + DBG("image_clear(img=%p)", img); + if (img->image_size) + if (img->flags & IMAGE_GAPS_PROTECTED) + { + byte *p = img->pixels; + uint bytes = img->cols * img->pixel_size; + for (uint row = img->rows; row--; p += img->row_size) + bzero(p, bytes); + } + else + bzero(img->pixels, img->image_size); +} + +struct image * +image_init_matrix(struct image_context *ctx, struct image *img, byte *pixels, uint cols, uint rows, uint row_size, uint flags) +{ + DBG("image_init_matrix(img=%p pixels=%p cols=%u rows=%u row_size=%u flags=0x%x)", img, pixels, cols, rows, row_size, flags); + if (unlikely(!image_dimensions_valid(cols, rows))) + { + IMAGE_ERROR(ctx, IMAGE_ERROR_INVALID_DIMENSIONS, "Invalid image dimensions (%ux%u)", cols, rows); + return NULL; + } + img->pixels = pixels; + img->cols = cols; + img->rows = rows; + img->pixel_size = img->channels = flags_to_pixel_size(flags); + img->row_size = row_size; + img->row_pixels_size = cols * img->pixel_size; + img->image_size = rows * row_size; + img->flags = flags & (IMAGE_NEW_FLAGS | IMAGE_GAPS_PROTECTED); + return img; +} + +struct image * +image_init_subimage(struct image_context *ctx UNUSED, struct image *img, struct image *src, uint left, uint top, uint cols, uint rows) +{ + DBG("image_init_subimage(img=%p src=%p left=%u top=%u cols=%u rows=%u)", img, src, left, top, cols, rows); + ASSERT(left + cols <= src->cols && top + rows <= src->rows); + img->pixels = src->pixels + left * src->pixel_size + top * src->row_size; + img->cols = cols; + img->rows = rows; + img->pixel_size = img->channels = src->pixel_size; + img->row_size = src->row_size; + img->row_pixels_size = cols * src->pixel_size; + img->image_size = src->row_size * rows; + img->flags = src->flags & IMAGE_NEW_FLAGS; + img->flags |= IMAGE_GAPS_PROTECTED; + return img; +} + +byte * +image_channels_format_to_name(uint format, byte *buf) +{ + byte *cs_name = color_space_id_to_name(format & IMAGE_COLOR_SPACE); + uint l = strlen(cs_name); + memcpy(buf, cs_name, l + 1); + if (format & IMAGE_ALPHA) + strcpy(buf + l, "+Alpha"); + return buf; +} + +uint +image_name_to_channels_format(byte *name) +{ + uint i; + if (i = color_space_name_to_id(name)) + return i; + uint l = strlen(name); + if (l > 6 && !strcasecmp(name + l - 5, "+alpha")) + { + byte buf[l + 1]; + memcpy(buf, name, l - 6); + buf[l - 6] = 0; + if (i = color_space_name_to_id(name)) + return i; + } + return 0; +} diff --git a/libucw/images/images.h b/libucw/images/images.h new file mode 100644 index 0000000..f59b825 --- /dev/null +++ b/libucw/images/images.h @@ -0,0 +1,200 @@ +/* + * Image Library -- Main header file + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _IMAGES_IMAGES_H +#define _IMAGES_IMAGES_H + +#include + +#ifdef CONFIG_UCW_CLEAN_ABI +#define image_channels_format_to_name ucw_image_channels_format_to_name +#define image_clear ucw_image_clear +#define image_clone ucw_image_clone +#define image_context_cleanup ucw_image_context_cleanup +#define image_context_init ucw_image_context_init +#define image_context_msg ucw_image_context_msg +#define image_context_msg_default ucw_image_context_msg_default +#define image_context_msg_silent ucw_image_context_msg_silent +#define image_context_vmsg ucw_image_context_vmsg +#define image_destroy ucw_image_destroy +#define image_dimensions_fit_to_box ucw_image_dimensions_fit_to_box +#define image_extension_to_format ucw_image_extension_to_format +#define image_file_name_to_format ucw_image_file_name_to_format +#define image_format_to_extension ucw_image_format_to_extension +#define image_init_matrix ucw_image_init_matrix +#define image_init_subimage ucw_image_init_subimage +#define image_io_cleanup ucw_image_io_cleanup +#define image_io_init ucw_image_io_init +#define image_io_read ucw_image_io_read +#define image_io_read_data ucw_image_io_read_data +#define image_io_read_header ucw_image_io_read_header +#define image_io_reset ucw_image_io_reset +#define image_io_write ucw_image_io_write +#define image_max_bytes ucw_image_max_bytes +#define image_max_dim ucw_image_max_dim +#define image_name_to_channels_format ucw_image_name_to_channels_format +#define image_new ucw_image_new +#define image_scale ucw_image_scale +#endif + +struct mempool; +struct fastbuf; + + +/* context.c + * - contexts with error/message handling + * - imagelib is thread-safe as long as threads work in different contexts */ + +struct image_context { + byte *msg; /* last message */ + uint msg_code; /* last message code (see images/error.h for details) */ + bb_t msg_buf; /* message buffer */ + void (*msg_callback)(struct image_context *ctx); /* called for each message (in msg_{str,code}) */ + uint tracing_level; /* tracing level (zero to disable) */ +}; + +/* initialization/cleanup */ +void image_context_init(struct image_context *ctx); +void image_context_cleanup(struct image_context *ctx); + +/* message handling, see images/error.h for useful macros */ +void image_context_msg(struct image_context *ctx, uint code, char *msg, ...); +void image_context_vmsg(struct image_context *ctx, uint code, char *msg, va_list args); + +/* default callback, displays messages with standard libucw's log() routine */ +void image_context_msg_default(struct image_context *ctx); + +/* empty callback */ +void image_context_msg_silent(struct image_context *ctx); + + +/* image.c + * - basic manipulation with images + * - image structure is not directly connected to a single context + * but manipulation routines are (user must synchronize the access himself)! */ + +extern uint image_max_dim; /* ImageLib.ImageMaxDim */ +extern uint image_max_bytes; /* ImageLib.ImageMaxBytes */ + +/* SSE aligning size, see IMAGE_SSE_ALIGNED */ +#define IMAGE_SSE_ALIGN_SIZE 16 + +enum image_flag { + IMAGE_COLOR_SPACE = 0xf, /* mask for enum color_space */ + IMAGE_ALPHA = 0x10, /* alpha channel */ + IMAGE_PIXELS_ALIGNED = 0x20, /* align pixel size to the nearest power of two */ + IMAGE_SSE_ALIGNED = 0x40, /* align scanlines to multiples of 16 bytes (both start and size) */ + IMAGE_NEED_DESTROY = 0x80, /* image is allocated with xmalloc */ + IMAGE_GAPS_PROTECTED = 0x100, /* cannot access gaps between rows */ + IMAGE_CHANNELS_FORMAT = IMAGE_COLOR_SPACE | IMAGE_ALPHA, + IMAGE_PIXEL_FORMAT = IMAGE_CHANNELS_FORMAT | IMAGE_PIXELS_ALIGNED, + IMAGE_ALIGNED = IMAGE_PIXELS_ALIGNED | IMAGE_SSE_ALIGNED, + IMAGE_NEW_FLAGS = IMAGE_PIXEL_FORMAT | IMAGE_SSE_ALIGNED, + IMAGE_INTERNAL_FLAGS = IMAGE_NEED_DESTROY | IMAGE_GAPS_PROTECTED, +}; + +#define IMAGE_MAX_CHANNELS 4 +#define IMAGE_CHANNELS_FORMAT_MAX_SIZE 128 +byte *image_channels_format_to_name(uint format, byte *buf); +uint image_name_to_channels_format(byte *name); + +struct color { + byte c[IMAGE_MAX_CHANNELS]; + byte color_space; +}; + +struct image { + byte *pixels; /* aligned top left pixel, there are at least sizeof(uint) + unused bytes after the buffer (possible optimizations) */ + uint cols; /* number of columns */ + uint rows; /* number of rows */ + uint channels; /* number of color channels including the alpha channel */ + uint pixel_size; /* size of pixel in bytes (1, 2, 3 or 4) */ + uint row_size; /* scanline size in bytes */ + uint row_pixels_size; /* scanline size in bytes excluding rows gaps */ + uint image_size; /* rows * row_size */ + uint flags; /* enum image_flag */ +}; + +struct image *image_new(struct image_context *ctx, uint cols, uint rows, uint flags, struct mempool *pool); +struct image *image_clone(struct image_context *ctx, struct image *src, uint flags, struct mempool *pool); +void image_destroy(struct image *img); +void image_clear(struct image_context *ctx, struct image *img); +struct image *image_init_matrix(struct image_context *ctx, struct image *img, byte *pixels, uint cols, uint rows, uint row_size, uint flags); +struct image *image_init_subimage(struct image_context *ctx, struct image *img, struct image *src, uint left, uint top, uint cols, uint rows); + +static inline int image_dimensions_valid(uint cols, uint rows) +{ + return cols && rows && cols <= image_max_dim && rows <= image_max_dim; +} +/* scale.c */ + +int image_scale(struct image_context *ctx, struct image *dest, struct image *src); +void image_dimensions_fit_to_box(uint *cols, uint *rows, uint max_cols, uint max_rows, uint upsample); + +/* image-io.c */ + +enum image_format { + IMAGE_FORMAT_UNDEFINED, + IMAGE_FORMAT_JPEG, + IMAGE_FORMAT_PNG, + IMAGE_FORMAT_GIF, + IMAGE_FORMAT_MAX +}; + +struct image_io { + /* R - read_header input */ + /* H - read_header output */ + /* I - read_data input */ + /* O - read_data output */ + /* W - write input */ + + struct image *image; /* [ OW] - image data */ + enum image_format format; /* [R W] - file format (IMAGE_FORMAT_x) */ + struct fastbuf *fastbuf; /* [R W] - source/destination stream */ + struct mempool *pool; /* [ I ] - parameter to image_new */ + uint cols; /* [ HI ] - number of columns, parameter to image_new */ + uint rows; /* [ HI ] - number of rows, parameter to image_new */ + uint flags; /* [ HI ] - see enum image_io_flags */ + uint jpeg_quality; /* [ W] - JPEG compression quality (1..100) */ + uint number_of_colors; /* [ H ] - number of image colors */ + struct color background_color; /* [ HI ] - background color, zero if undefined */ + uint exif_size; /* [ H W] - EXIF size in bytes (zero if not present) */ + byte *exif_data; /* [ H W] - EXIF data */ + + /* internals */ + struct image_context *context; + struct mempool *internal_pool; + void *read_data; + void (*read_cancel)(struct image_io *io); +}; + +enum image_io_flags { + IMAGE_IO_IMAGE_FLAGS = 0xffff, /* [ HI ] - mask of parameters to image new, read_header fills IMAGE_CHANNELS_FORMAT */ + IMAGE_IO_NEED_DESTROY = 0x10000, /* [ O ] - enables automatic call of image_destroy */ + IMAGE_IO_HAS_PALETTE = 0x20000, /* [ H ] - true for image with indexed colors */ + IMAGE_IO_USE_BACKGROUND = 0x40000, /* [ I ] - merge transparent pixels with background_color */ + IMAGE_IO_WANT_EXIF = 0x80000, /* [R ] - read EXIF data if present */ +}; + +int image_io_init(struct image_context *ctx, struct image_io *io); +void image_io_cleanup(struct image_io *io); +void image_io_reset(struct image_io *io); + +int image_io_read_header(struct image_io *io); +struct image *image_io_read_data(struct image_io *io, int ref); +struct image *image_io_read(struct image_io *io, int ref); + +int image_io_write(struct image_io *io); + +byte *image_format_to_extension(enum image_format format); +enum image_format image_extension_to_format(byte *extension); +enum image_format image_file_name_to_format(byte *file_name); + +#endif diff --git a/libucw/images/io-libjpeg.c b/libucw/images/io-libjpeg.c new file mode 100644 index 0000000..cc2f206 --- /dev/null +++ b/libucw/images/io-libjpeg.c @@ -0,0 +1,605 @@ +/* + * Image Library -- libjpeg + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +struct libjpeg_err { + struct jpeg_error_mgr pub; + jmp_buf setjmp_buf; + struct image_io *io; +}; + +struct libjpeg_read_internals { + struct jpeg_decompress_struct cinfo; + struct jpeg_source_mgr src; + struct libjpeg_err err; + struct fastbuf *fastbuf; + byte *fastbuf_pos; +}; + +struct libjpeg_write_internals { + struct jpeg_compress_struct cinfo; + struct jpeg_destination_mgr dest; + struct libjpeg_err err; + struct fastbuf *fastbuf; + byte *fastbuf_pos; +}; + +static void NONRET +libjpeg_read_error_exit(j_common_ptr cinfo) +{ + DBG("libjpeg_error_exit()"); + struct libjpeg_err *e = (struct libjpeg_err *)cinfo->err; + byte buf[JMSG_LENGTH_MAX]; + e->pub.format_message(cinfo, buf); + IMAGE_ERROR(e->io->context, IMAGE_ERROR_READ_FAILED, "libjpeg: %s", buf); + longjmp(e->setjmp_buf, 1); +} + +static void NONRET +libjpeg_write_error_exit(j_common_ptr cinfo) +{ + DBG("libjpeg_error_exit()"); + struct libjpeg_err *e = (struct libjpeg_err *)cinfo->err; + byte buf[JMSG_LENGTH_MAX]; + e->pub.format_message(cinfo, buf); + IMAGE_ERROR(e->io->context, IMAGE_ERROR_WRITE_FAILED, "libjpeg: %s", buf); + longjmp(e->setjmp_buf, 1); +} + +static void +libjpeg_emit_message(j_common_ptr cinfo UNUSED, int msg_level UNUSED) +{ +#ifdef LOCAL_DEBUG + byte buf[JMSG_LENGTH_MAX]; + cinfo->err->format_message(cinfo, buf); + DBG("libjpeg_emit_message(): [%d] %s", msg_level, buf); +#endif +#if 0 + // Terminate on warning? + if (unlikely(msg_level == -1)) + { + struct libjpeg_err *e = (struct libjpeg_err *)cinfo->err; + byte buf[JMSG_LENGTH_MAX]; + cinfo->err->format_message(cinfo, buf); + IMAGE_ERROR(e->io->context, 0, "libjpeg: %s", buf); + longjmp(e->setjmp_buf, 1); + } +#endif +} + +static inline uint +libjpeg_fastbuf_read_prepare(struct libjpeg_read_internals *i) +{ + DBG("libjpeg_fb_read_prepare()"); + byte *start; + uint len = bdirect_read_prepare(i->fastbuf, &start); + DBG("readed %u bytes at %p", len, start); + if (!len) + { + // XXX: maybe only generate a warning and return EOI markers to recover from such errors (also in skip_input_data) + IMAGE_ERROR(i->err.io->context, IMAGE_ERROR_READ_FAILED, "Incomplete JPEG file"); + longjmp(i->err.setjmp_buf, 1); + } + i->fastbuf_pos = start + len; + i->src.next_input_byte = start; + i->src.bytes_in_buffer = len; + return len; +} + +static inline void +libjpeg_fastbuf_read_commit(struct libjpeg_read_internals *i) +{ + DBG("libjpeg_fb_read_commit()"); + bdirect_read_commit(i->fastbuf, i->fastbuf_pos); +} + +static void +libjpeg_init_source(j_decompress_ptr cinfo) +{ + DBG("libjpeg_init_source()"); + libjpeg_fastbuf_read_prepare((struct libjpeg_read_internals *)cinfo); +} + +static void +libjpeg_term_source(j_decompress_ptr cinfo UNUSED) +{ + DBG("libjpeg_term_source()"); + //libjpeg_fastbuf_read_commit((struct libjpeg_read_internals *)cinfo); +} + +static boolean +libjpeg_fill_input_buffer(j_decompress_ptr cinfo) +{ + DBG("libjpeg_fill_input_buffer()"); + struct libjpeg_read_internals *i = (struct libjpeg_read_internals *)cinfo; + libjpeg_fastbuf_read_commit(i); + libjpeg_fastbuf_read_prepare(i); + return 1; +} + +static void +libjpeg_skip_input_data(j_decompress_ptr cinfo, long num_bytes) +{ + DBG("libjpeg_skip_input_data(num_bytes=%d)", (int)num_bytes); + if (num_bytes > 0) + { + struct libjpeg_read_internals *i = (struct libjpeg_read_internals *)cinfo; + if ((unsigned long)num_bytes <= i->src.bytes_in_buffer) + { + i->src.next_input_byte += num_bytes; + i->src.bytes_in_buffer -= num_bytes; + } + else + { + num_bytes -= i->src.bytes_in_buffer; + libjpeg_fastbuf_read_commit(i); + if (!bskip(i->fastbuf, num_bytes)) + { + IMAGE_ERROR(i->err.io->context, IMAGE_ERROR_READ_FAILED, "Incomplete JPEG file"); + longjmp(i->err.setjmp_buf, 1); + } + libjpeg_fastbuf_read_prepare(i); + } + } +} + +static inline void +libjpeg_fastbuf_write_prepare(struct libjpeg_write_internals *i) +{ + byte *start; + uint len = bdirect_write_prepare(i->fastbuf, &start); + i->fastbuf_pos = start + len; + i->dest.next_output_byte = start; + i->dest.free_in_buffer = len; + if (!len) + { + IMAGE_ERROR(i->err.io->context, IMAGE_ERROR_WRITE_FAILED, "Unexpected end of stream"); + longjmp(i->err.setjmp_buf, 1); + } +} + +static void +libjpeg_init_destination(j_compress_ptr cinfo) +{ + DBG("libjpeg_init_destination()"); + libjpeg_fastbuf_write_prepare((struct libjpeg_write_internals *)cinfo); +} + +static void +libjpeg_term_destination(j_compress_ptr cinfo) +{ + DBG("libjpeg_term_destination()"); + struct libjpeg_write_internals *i = (struct libjpeg_write_internals *)cinfo; + bdirect_write_commit(i->fastbuf, (byte *)i->dest.next_output_byte); +} + +static boolean +libjpeg_empty_output_buffer(j_compress_ptr cinfo) +{ + DBG("libjpeg_empty_output_buffer()"); + struct libjpeg_write_internals *i = (struct libjpeg_write_internals *)cinfo; + bdirect_write_commit(i->fastbuf, i->fastbuf_pos); + libjpeg_fastbuf_write_prepare(i); + return TRUE; +} + +static inline uint +libjpeg_read_byte(struct libjpeg_read_internals *i) +{ + DBG("libjpeg_read_byte()"); + if (!i->src.bytes_in_buffer) + if (!libjpeg_fill_input_buffer(&i->cinfo)) + ERREXIT(&i->cinfo, JERR_CANT_SUSPEND); + i->src.bytes_in_buffer--; + return *i->src.next_input_byte++; +} + +static inline void +libjpeg_read_buf(struct libjpeg_read_internals *i, byte *buf, uint len) +{ + DBG("libjpeg_read_buf(len=%u)", len); + while (len) + { + if (!i->src.bytes_in_buffer) + if (!libjpeg_fill_input_buffer(&i->cinfo)) + ERREXIT(&i->cinfo, JERR_CANT_SUSPEND); + uint buf_size = i->src.bytes_in_buffer; + uint read_size = MIN(buf_size, len); + memcpy(buf, i->src.next_input_byte, read_size); + i->src.bytes_in_buffer -= read_size; + i->src.next_input_byte += read_size; + len -= read_size; + } +} + +static byte libjpeg_exif_header[6] = { 'E', 'x', 'i', 'f', 0, 0 }; + +static boolean +libjpeg_app1_preprocessor(j_decompress_ptr cinfo) +{ + struct libjpeg_read_internals *i = (struct libjpeg_read_internals *)cinfo; + struct image_io *io = i->err.io; + uint len = libjpeg_read_byte(i) << 8; + len += libjpeg_read_byte(i); + DBG("Found APP1 marker, len=%u", len); + if (len < 2) + return TRUE; + len -= 2; + if (len < 7 /*|| io->exif_size*/) + { + libjpeg_skip_input_data(cinfo, len); + return TRUE; + } + byte header[6]; + libjpeg_read_buf(i, header, 6); + if (memcmp(header, libjpeg_exif_header, 6)) + { + libjpeg_skip_input_data(cinfo, len - 6); + return TRUE; + } + io->exif_size = len; + io->exif_data = mp_alloc(io->internal_pool, len); + memcpy(io->exif_data, header, 6); + libjpeg_read_buf(i, io->exif_data + 6, len - 6); + DBG("Parsed EXIF of length %u", len); + return TRUE; +} + +static void +libjpeg_read_cancel(struct image_io *io) +{ + DBG("libjpeg_read_cancel()"); + struct libjpeg_read_internals *i = io->read_data; + jpeg_destroy_decompress(&i->cinfo); +} + +int +libjpeg_read_header(struct image_io *io) +{ + DBG("libjpeg_read_header()"); + struct libjpeg_read_internals *i = io->read_data = mp_alloc(io->internal_pool, sizeof(*i)); + i->fastbuf = io->fastbuf; + + /* Create libjpeg read structure */ + DBG("Creating libjpeg read structure"); + i->cinfo.err = jpeg_std_error(&i->err.pub); + i->err.pub.error_exit = libjpeg_read_error_exit; + i->err.pub.emit_message = libjpeg_emit_message; + i->err.io = io; + if (setjmp(i->err.setjmp_buf)) + { + DBG("Libjpeg failed to read the image, longjump saved us"); + jpeg_destroy_decompress(&i->cinfo); + return 0; + } + jpeg_create_decompress(&i->cinfo); + + /* Initialize source manager */ + i->cinfo.src = &i->src; + i->src.init_source = libjpeg_init_source; + i->src.fill_input_buffer = libjpeg_fill_input_buffer; + i->src.skip_input_data = libjpeg_skip_input_data; + i->src.resync_to_restart = jpeg_resync_to_restart; + i->src.term_source = libjpeg_term_source; + + if (io->flags & IMAGE_IO_WANT_EXIF) + jpeg_set_marker_processor(&i->cinfo, JPEG_APP0 + 1, libjpeg_app1_preprocessor); + + /* Read JPEG header and setup decompression options */ + DBG("Reading image header"); + jpeg_read_header(&i->cinfo, TRUE); + switch (i->cinfo.jpeg_color_space) + { + case JCS_GRAYSCALE: + io->flags = COLOR_SPACE_GRAYSCALE; + break; + case JCS_RGB: + io->flags = COLOR_SPACE_RGB; + break; + case JCS_YCbCr: + io->flags = COLOR_SPACE_YCBCR; + break; + case JCS_CMYK: + io->flags = COLOR_SPACE_CMYK; + break; + case JCS_YCCK: + io->flags = COLOR_SPACE_YCCK; + break; + default: + if (unlikely(i->cinfo.num_components < 1 || i->cinfo.num_components > 4)) + { + jpeg_destroy_decompress(&i->cinfo); + IMAGE_ERROR(io->context, IMAGE_ERROR_INVALID_PIXEL_FORMAT, "Invalid color space."); + return 0; + } + io->flags = COLOR_SPACE_UNKNOWN + i->cinfo.num_components; + break; + } + if (unlikely(i->cinfo.num_components != (int)color_space_channels[io->flags])) + { + jpeg_destroy_decompress(&i->cinfo); + IMAGE_ERROR(io->context, IMAGE_ERROR_INVALID_PIXEL_FORMAT, "Invalid number of color channels."); + return 0; + } + io->cols = i->cinfo.image_width; + io->rows = i->cinfo.image_height; + io->number_of_colors = (i->cinfo.num_components < 4) ? (1U << (i->cinfo.num_components * 8)) : 0xffffffff; + io->read_cancel = libjpeg_read_cancel; + return 1; +} + +int +libjpeg_read_data(struct image_io *io) +{ + DBG("libjpeg_read_data()"); + + struct libjpeg_read_internals *i = io->read_data; + uint read_flags = io->flags; + + /* Select color space */ + switch (i->cinfo.jpeg_color_space) + { + case JCS_GRAYSCALE: + read_flags = (read_flags & ~IMAGE_COLOR_SPACE & IMAGE_CHANNELS_FORMAT) | COLOR_SPACE_YCBCR; + i->cinfo.out_color_space = JCS_YCbCr; + break; + case JCS_YCbCr: + read_flags = (read_flags & ~IMAGE_COLOR_SPACE & IMAGE_CHANNELS_FORMAT) | COLOR_SPACE_YCBCR; + i->cinfo.out_color_space = JCS_YCbCr; + break; + case JCS_CMYK: + read_flags = (read_flags & ~IMAGE_COLOR_SPACE & IMAGE_CHANNELS_FORMAT) | COLOR_SPACE_CMYK; + i->cinfo.out_color_space = JCS_CMYK; + break; + case JCS_YCCK: + read_flags = (read_flags & ~IMAGE_COLOR_SPACE & IMAGE_CHANNELS_FORMAT) | COLOR_SPACE_YCCK; + i->cinfo.out_color_space = JCS_YCCK; + break; + default: + read_flags = (read_flags & ~IMAGE_COLOR_SPACE & IMAGE_CHANNELS_FORMAT) | COLOR_SPACE_RGB; + i->cinfo.out_color_space = JCS_RGB; + break; + } + + /* Prepare the image */ + struct image_io_read_data_internals rdi; + if (io->cols <= (i->cinfo.image_width >> 3) && io->rows <= (i->cinfo.image_height >> 3)) + { + DBG("Scaling to 1/8"); + i->cinfo.scale_num = 1; + i->cinfo.scale_denom = 8; + } + else if (io->cols <= (i->cinfo.image_width >> 2) && io->rows <= (i->cinfo.image_height >> 2)) + { + DBG("Scaling to 1/4"); + i->cinfo.scale_num = 1; + i->cinfo.scale_denom = 4; + } + else if (io->cols <= (i->cinfo.image_width >> 1) && io->rows <= (i->cinfo.image_height >> 1)) + { + DBG("Scaling to 1/2"); + i->cinfo.scale_num = 1; + i->cinfo.scale_denom = 2; + } + jpeg_calc_output_dimensions(&i->cinfo); + DBG("Output dimensions %ux%u", (uint)i->cinfo.output_width, (uint)i->cinfo.output_height); + if (unlikely(!image_io_read_data_prepare(&rdi, io, i->cinfo.output_width, i->cinfo.output_height, read_flags))) + { + jpeg_destroy_decompress(&i->cinfo); + return 0; + } + + /* Setup fallback */ + if (setjmp(i->err.setjmp_buf)) + { + DBG("Libjpeg failed to read the image, longjump saved us"); + jpeg_destroy_decompress(&i->cinfo); + image_io_read_data_break(&rdi, io); + return 0; + } + + /* Decompress the image */ + struct image *img = rdi.image; + jpeg_start_decompress(&i->cinfo); + if ((int)img->pixel_size == i->cinfo.output_components) + { + byte *pixels = img->pixels; + for (uint r = img->rows; r--; ) + { + jpeg_read_scanlines(&i->cinfo, (JSAMPLE **)&pixels, 1); + pixels += img->row_size; + } + } + else + { + switch (img->pixel_size) + { + case 2: /* Grayscale -> Grayscale+Alpha */ + { + ASSERT(i->cinfo.output_components == 1); + byte buf[img->cols], *src; +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE img +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_COL_STEP 2 +# define IMAGE_WALK_DO_ROW_START do{ src = buf; jpeg_read_scanlines(&i->cinfo, (JSAMPLE **)&src, 1); }while(0) +# define IMAGE_WALK_DO_STEP do{ walk_pos[0] = *src++; walk_pos[1] = 255; }while(0) +# include + } + break; + case 4: /* * -> *+Alpha or aligned * */ + { + ASSERT(i->cinfo.output_components == 3); + byte buf[img->cols * 3], *src; +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE img +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_COL_STEP 4 +# define IMAGE_WALK_DO_ROW_START do{ src = buf; jpeg_read_scanlines(&i->cinfo, (JSAMPLE **)&src, 1); }while(0) +# define IMAGE_WALK_DO_STEP do{ *(u32 *)walk_pos = *(u32 *)src; walk_pos[3] = 255; src += 3; }while(0) +# include + } + break; + default: + ASSERT(0); + } + + } + + ASSERT(i->cinfo.output_scanline == i->cinfo.output_height); + + /* Destroy libjpeg object */ + jpeg_finish_decompress(&i->cinfo); + jpeg_destroy_decompress(&i->cinfo); + + /* Finish the image */ + return image_io_read_data_finish(&rdi, io); +} + +int +libjpeg_write(struct image_io *io) +{ + DBG("libjpeg_write()"); + struct libjpeg_write_internals i; + i.fastbuf = io->fastbuf; + + /* Create libjpeg write structure */ + DBG("Creating libjpeg write structure"); + i.cinfo.err = jpeg_std_error(&i.err.pub); + i.err.pub.error_exit = libjpeg_write_error_exit; + i.err.pub.emit_message = libjpeg_emit_message; + i.err.io = io; + if (setjmp(i.err.setjmp_buf)) + { + DBG("Libjpeg failed to write the image, longjump saved us"); + jpeg_destroy_compress(&i.cinfo); + return 0; + } + jpeg_create_compress(&i.cinfo); + + /* Initialize destination manager */ + i.cinfo.dest = &i.dest; + i.dest.init_destination = libjpeg_init_destination; + i.dest.term_destination = libjpeg_term_destination; + i.dest.empty_output_buffer = libjpeg_empty_output_buffer; + + /* Set output parameters */ + struct image *img = io->image; + i.cinfo.image_width = img->cols; + i.cinfo.image_height = img->rows; + switch (img->flags & IMAGE_COLOR_SPACE) + { + case COLOR_SPACE_GRAYSCALE: + i.cinfo.in_color_space = JCS_GRAYSCALE; + break; + case COLOR_SPACE_RGB: + i.cinfo.in_color_space = JCS_RGB; + break; + case COLOR_SPACE_YCBCR: + i.cinfo.in_color_space = JCS_YCbCr; + break; + case COLOR_SPACE_CMYK: + i.cinfo.in_color_space = JCS_CMYK; + break; + case COLOR_SPACE_YCCK: + i.cinfo.in_color_space = JCS_YCCK; + break; + default: + jpeg_destroy_compress(&i.cinfo); + IMAGE_ERROR(io->context, IMAGE_ERROR_INVALID_PIXEL_FORMAT, "Unsupported pixel format."); + return 0; + } + i.cinfo.input_components = color_space_channels[img->flags & IMAGE_COLOR_SPACE]; + jpeg_set_defaults(&i.cinfo); + jpeg_set_colorspace(&i.cinfo, i.cinfo.in_color_space); + if (io->jpeg_quality) + jpeg_set_quality(&i.cinfo, MIN(io->jpeg_quality, 100), 1); + if (io->exif_size) + { + /* According to the Exif specification, the Exif APP1 marker has to follow immediately after the SOI, + * just as the JFIF specification requires the same for the JFIF APP0 marker! + * Therefore a JPEG file cannot legally be both Exif and JFIF. */ + i.cinfo.write_JFIF_header = FALSE; + i.cinfo.write_Adobe_marker = FALSE; + } + + /* Compress the image */ + jpeg_start_compress(&i.cinfo, TRUE); + if (io->exif_size) + { + DBG("Writing EXIF"); + jpeg_write_marker(&i.cinfo, JPEG_APP0 + 1, io->exif_data, io->exif_size); + } + if ((int)img->pixel_size == i.cinfo.input_components) + { + byte *pixels = img->pixels; + for (uint r = img->rows; r--; ) + { + jpeg_write_scanlines(&i.cinfo, (JSAMPLE **)&pixels, 1); + pixels += img->row_size; + } + } + else + { + switch (img->pixel_size) + { + case 2: /* Grayscale+Alpha -> Grayscale */ + { + ASSERT(i.cinfo.input_components == 1); + byte buf[img->cols], *dest = buf; +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE img +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_COL_STEP 2 +# define IMAGE_WALK_DO_ROW_END do{ dest = buf; jpeg_write_scanlines(&i.cinfo, (JSAMPLE **)&dest, 1); }while(0) +# define IMAGE_WALK_DO_STEP do{ *dest++ = walk_pos[0]; }while(0) +# include + break; + } + case 4: /* *+Alpha or aligned * -> * */ + { + ASSERT(i.cinfo.input_components == 3); + byte buf[img->cols * 3], *dest = buf; +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE img +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_COL_STEP 4 +# define IMAGE_WALK_DO_ROW_END do{ dest = buf; jpeg_write_scanlines(&i.cinfo, (JSAMPLE **)&dest, 1); }while(0) +# define IMAGE_WALK_DO_STEP do{ *dest++ = walk_pos[0]; *dest++ = walk_pos[1]; *dest++ = walk_pos[2]; }while(0) +# include + break; + } + default: + ASSERT(0); + } + } + ASSERT(i.cinfo.next_scanline == i.cinfo.image_height); + jpeg_finish_compress(&i.cinfo); + jpeg_destroy_compress(&i.cinfo); + return 1; +} diff --git a/libucw/images/io-libmagick.c b/libucw/images/io-libmagick.c new file mode 100644 index 0000000..3636163 --- /dev/null +++ b/libucw/images/io-libmagick.c @@ -0,0 +1,421 @@ +/* + * Image Library -- GraphicsMagick (slow fallback library) + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define MAX_FILE_SIZE (1 << 30) +#define QUANTUM_SCALE (QuantumDepth - 8) +#define QUANTUM_TO_BYTE(x) ((uint)(x) >> QUANTUM_SCALE) +#define BYTE_TO_QUANTUM(x) ((uint)(x) << QUANTUM_SCALE) +#define ALPHA_TO_BYTE(x) (255 - QUANTUM_TO_BYTE(x)) +#define BYTE_TO_ALPHA(x) (BYTE_TO_QUANTUM(255 - (x))) + +static pthread_mutex_t libmagick_mutex = PTHREAD_MUTEX_INITIALIZER; +static uint libmagick_counter; + +struct magick_read_data { + ExceptionInfo exception; + ImageInfo *info; + Image *image; +}; + +int +libmagick_init(struct image_io *io UNUSED) +{ + pthread_mutex_lock(&libmagick_mutex); + if (!libmagick_counter++) + InitializeMagick(NULL); + pthread_mutex_unlock(&libmagick_mutex); + return 1; +} + +void +libmagick_cleanup(struct image_io *io UNUSED) +{ + pthread_mutex_lock(&libmagick_mutex); + if (!--libmagick_counter) + DestroyMagick(); + pthread_mutex_unlock(&libmagick_mutex); +} + +static void +libmagick_destroy_read_data(struct magick_read_data *rd) +{ + if (rd->image) + DestroyImage(rd->image); + DestroyImageInfo(rd->info); + DestroyExceptionInfo(&rd->exception); +} + +static void +libmagick_read_cancel(struct image_io *io) +{ + DBG("libmagick_read_cancel()"); + + struct magick_read_data *rd = io->read_data; + libmagick_destroy_read_data(rd); +} + +int +libmagick_read_header(struct image_io *io) +{ + DBG("libmagick_read_header()"); + + /* Read entire stream */ + ucw_off_t file_size = bfilesize(io->fastbuf) - btell(io->fastbuf); + if (unlikely(file_size > MAX_FILE_SIZE)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_READ_FAILED, "Too long stream."); + return 0; + } + uint buf_size = file_size; + byte *buf = xmalloc(buf_size); + breadb(io->fastbuf, buf, buf_size); + + /* Allocate read structure */ + struct magick_read_data *rd = io->read_data = mp_alloc_zero(io->internal_pool, sizeof(*rd)); + + /* Initialize GraphicsMagick */ + GetExceptionInfo(&rd->exception); + rd->info = CloneImageInfo(NULL); + rd->info->subrange = 1; + + /* Read the image */ + rd->image = BlobToImage(rd->info, buf, buf_size, &rd->exception); + xfree(buf); + if (unlikely(!rd->image)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_READ_FAILED, "GraphicsMagick failed to read the image."); + goto err; + } + if (unlikely(rd->image->columns > image_max_dim || rd->image->rows > image_max_dim)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_INVALID_DIMENSIONS, "Image too large."); + goto err; + } + + /* Fill image parameters */ + io->cols = rd->image->columns; + io->rows = rd->image->rows; + switch (rd->image->colorspace) + { + case GRAYColorspace: + io->flags = COLOR_SPACE_GRAYSCALE; + break; + default: + io->flags = COLOR_SPACE_RGB; + break; + } + if (rd->image->matte) + io->flags |= IMAGE_ALPHA; + io->number_of_colors = rd->image->colors; + if (rd->image->storage_class == PseudoClass && rd->image->compression != JPEGCompression) + io->flags |= IMAGE_IO_HAS_PALETTE; + + io->read_cancel = libmagick_read_cancel; + return 1; + +err: + libmagick_destroy_read_data(rd); + return 0; +} + +static inline byte +libmagick_pixel_to_gray(PixelPacket *pixel) +{ + return rgb_to_gray_func(pixel->red, pixel->green, pixel->blue) >> QUANTUM_SCALE; +} + +int +libmagick_read_data(struct image_io *io) +{ + DBG("libmagick_read_data()"); + + struct magick_read_data *rd = io->read_data; + + /* Quantize image */ + switch (rd->image->colorspace) + { + case RGBColorspace: + case GRAYColorspace: + break; + default: ; + QuantizeInfo quantize; + GetQuantizeInfo(&quantize); + quantize.colorspace = RGBColorspace; + QuantizeImage(&quantize, rd->image); + break; + } + + /* Prepare the image */ + struct image_io_read_data_internals rdi; + uint read_flags = io->flags; + uint cs = read_flags & IMAGE_COLOR_SPACE; + if (cs != COLOR_SPACE_GRAYSCALE && cs != COLOR_SPACE_RGB) + read_flags = (read_flags & ~IMAGE_COLOR_SPACE & IMAGE_PIXEL_FORMAT) | COLOR_SPACE_RGB; + if ((read_flags & IMAGE_IO_USE_BACKGROUND) && !(read_flags & IMAGE_ALPHA)) + read_flags = (read_flags & IMAGE_CHANNELS_FORMAT) | IMAGE_ALPHA; + if (unlikely(!image_io_read_data_prepare(&rdi, io, rd->image->columns, rd->image->rows, read_flags))) + { + libmagick_destroy_read_data(rd); + return 0; + } + + /* Acquire pixels */ + PixelPacket *src = (PixelPacket *)AcquireImagePixels(rd->image, 0, 0, rd->image->columns, rd->image->rows, &rd->exception); + if (unlikely(!src)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_READ_FAILED, "Cannot acquire image pixels."); + libmagick_destroy_read_data(rd); + image_io_read_data_break(&rdi, io); + return 0; + } + + /* Convert pixels */ + switch (rdi.image->pixel_size) + { + case 1: +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE (rdi.image) +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_COL_STEP 1 +# define IMAGE_WALK_DO_STEP do{ \ + walk_pos[0] = libmagick_pixel_to_gray(src); \ + src++; }while(0) +# include + break; + + case 2: +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE (rdi.image) +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_COL_STEP 2 +# define IMAGE_WALK_DO_STEP do{ \ + walk_pos[0] = libmagick_pixel_to_gray(src); \ + walk_pos[1] = ALPHA_TO_BYTE(src->opacity); \ + src++; }while(0) +# include + break; + + case 3: +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE (rdi.image) +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_COL_STEP 3 +# define IMAGE_WALK_DO_STEP do{ \ + walk_pos[0] = QUANTUM_TO_BYTE(src->red); \ + walk_pos[1] = QUANTUM_TO_BYTE(src->green); \ + walk_pos[2] = QUANTUM_TO_BYTE(src->blue); \ + src++; }while(0) +# include + break; + + case 4: +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE (rdi.image) +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_COL_STEP 4 +# define IMAGE_WALK_DO_STEP do{ \ + walk_pos[0] = QUANTUM_TO_BYTE(src->red); \ + walk_pos[1] = QUANTUM_TO_BYTE(src->green); \ + walk_pos[2] = QUANTUM_TO_BYTE(src->blue); \ + walk_pos[3] = ALPHA_TO_BYTE(src->opacity); \ + src++; }while(0) +# include + break; + + default: + ASSERT(0); + } + + /* Free GraphicsMagick structures */ + libmagick_destroy_read_data(rd); + + /* Finish the image */ + return image_io_read_data_finish(&rdi, io); +} + +int +libmagick_write(struct image_io *io) +{ + DBG("libmagick_write()"); + + /* Initialize GraphicsMagick */ + int result = 0; + ExceptionInfo exception; + ImageInfo *info; + GetExceptionInfo(&exception); + info = CloneImageInfo(NULL); + + /* Setup image parameters and allocate the image*/ + struct image *img = io->image; + switch (img->flags & IMAGE_COLOR_SPACE) + { + case COLOR_SPACE_GRAYSCALE: + info->colorspace = GRAYColorspace; + break; + case COLOR_SPACE_RGB: + info->colorspace = RGBColorspace; + break; + default: + IMAGE_ERROR(io->context, IMAGE_ERROR_WRITE_FAILED, "Unsupported color space."); + goto err; + } + switch (io->format) + { + case IMAGE_FORMAT_JPEG: + strcpy(info->magick, "JPEG"); + if (io->jpeg_quality) + info->quality = MIN(io->jpeg_quality, 100); + break; + case IMAGE_FORMAT_PNG: + strcpy(info->magick, "PNG"); + break; + case IMAGE_FORMAT_GIF: + strcpy(info->magick, "GIF"); + break; + default: + ASSERT(0); + } + Image *image = AllocateImage(info); + if (unlikely(!image)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_WRITE_FAILED, "GraphicsMagick failed to allocate the image."); + goto err; + } + image->columns = img->cols; + image->rows = img->rows; + + /* Get pixels */ + PixelPacket *pixels = SetImagePixels(image, 0, 0, img->cols, img->rows), *dest = pixels; + if (unlikely(!pixels)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_WRITE_FAILED, "Cannot get GraphicsMagick pixels."); + goto err2; + } + + /* Convert pixels */ + switch (img->pixel_size) + { + case 1: +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE img +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_COL_STEP 1 +# define IMAGE_WALK_DO_STEP do{ \ + dest->red = BYTE_TO_QUANTUM(walk_pos[0]); \ + dest->green = BYTE_TO_QUANTUM(walk_pos[0]); \ + dest->blue = BYTE_TO_QUANTUM(walk_pos[0]); \ + dest->opacity = 0; \ + dest++; }while(0) +# include + break; + + case 2: +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE img +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_COL_STEP 2 +# define IMAGE_WALK_DO_STEP do{ \ + dest->red = BYTE_TO_QUANTUM(walk_pos[0]); \ + dest->green = BYTE_TO_QUANTUM(walk_pos[0]); \ + dest->blue = BYTE_TO_QUANTUM(walk_pos[0]); \ + dest->opacity = BYTE_TO_ALPHA(walk_pos[1]); \ + dest++; }while(0) +# include + break; + + case 3: +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE img +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_COL_STEP 3 +# define IMAGE_WALK_DO_STEP do{ \ + dest->red = BYTE_TO_QUANTUM(walk_pos[0]); \ + dest->green = BYTE_TO_QUANTUM(walk_pos[1]); \ + dest->blue = BYTE_TO_QUANTUM(walk_pos[2]); \ + dest->opacity = 0; \ + dest++; }while(0) +# include + break; + + case 4: +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE img +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_COL_STEP 4 +# define IMAGE_WALK_DO_STEP do{ \ + dest->red = BYTE_TO_QUANTUM(walk_pos[0]); \ + dest->green = BYTE_TO_QUANTUM(walk_pos[1]); \ + dest->blue = BYTE_TO_QUANTUM(walk_pos[2]); \ + dest->opacity = BYTE_TO_ALPHA(walk_pos[3]); \ + dest++; }while(0) +# include + break; + + default: + ASSERT(0); + } + + /* Store pixels */ + if (unlikely(!SyncImagePixels(image))) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_WRITE_FAILED, "Cannot sync GraphicsMagick pixels."); + goto err2; + } + + /* Write image */ + size_t buf_len = 0; + void *buf = ImageToBlob(info, image, &buf_len, &exception); + if (unlikely(!buf)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_WRITE_FAILED, "GraphicsMagick failed to compress the image."); + goto err2; + } + if (unlikely(buf_len > MAX_FILE_SIZE)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_WRITE_FAILED, "Image too large."); + goto err2; + } + + /* Write to stream */ + bwrite(io->fastbuf, buf, buf_len); + + /* Success */ + result = 1; + +err2: + DestroyImage(image); +err: + DestroyImageInfo(info); + DestroyExceptionInfo(&exception); + return result; +} diff --git a/libucw/images/io-libpng.c b/libucw/images/io-libpng.c new file mode 100644 index 0000000..e39c8b7 --- /dev/null +++ b/libucw/images/io-libpng.c @@ -0,0 +1,382 @@ +/* + * Image Library -- libpng + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +struct libpng_read_data { + png_structp png_ptr; + png_infop info_ptr; + png_infop end_ptr; + png_uint_32 cols; + png_uint_32 rows; + int bit_depth; + int color_type; +}; + +static png_voidp +libpng_malloc(png_structp png_ptr, png_size_t size) +{ + DBG("libpng_malloc(size=%u)", (uint)size); + return mp_alloc(png_get_mem_ptr(png_ptr), size); +} + +static void +libpng_free(png_structp png_ptr UNUSED, png_voidp ptr UNUSED) +{ + DBG("libpng_free()"); +} + +static void NONRET +libpng_read_error(png_structp png_ptr, png_const_charp msg) +{ + DBG("libpng_read_error()"); + IMAGE_ERROR(png_get_error_ptr(png_ptr), IMAGE_ERROR_READ_FAILED, "%s", msg); + longjmp(png_jmpbuf(png_ptr), 1); +} + +static void NONRET +libpng_write_error(png_structp png_ptr, png_const_charp msg) +{ + DBG("libpng_write_error()"); + IMAGE_ERROR(png_get_error_ptr(png_ptr), IMAGE_ERROR_WRITE_FAILED, "%s", msg); + longjmp(png_jmpbuf(png_ptr), 1); +} + +static void +libpng_warning(png_structp png_ptr UNUSED, png_const_charp msg UNUSED) +{ + DBG("libpng_warning(): %s", (byte *)msg); +} + +static void +libpng_read_fn(png_structp png_ptr, png_bytep data, png_size_t length) +{ + DBG("libpng_read_fn(len=%u)", (uint)length); + if (unlikely(bread((struct fastbuf *)png_get_io_ptr(png_ptr), (byte *)data, length) < length)) + png_error(png_ptr, "Incomplete data"); +} + +static void +libpng_write_fn(png_structp png_ptr, png_bytep data, png_size_t length) +{ + DBG("libpng_write_fn(len=%u)", (uint)length); + bwrite((struct fastbuf *)png_get_io_ptr(png_ptr), (byte *)data, length); +} + +static void +libpng_flush_fn(png_structp png_ptr UNUSED) +{ + DBG("libpng_flush_fn()"); +} + +static void +libpng_read_cancel(struct image_io *io) +{ + DBG("libpng_read_cancel()"); + + struct libpng_read_data *rd = io->read_data; + png_destroy_read_struct(&rd->png_ptr, &rd->info_ptr, &rd->end_ptr); +} + +int +libpng_read_header(struct image_io *io) +{ + DBG("libpng_read_header()"); + + /* Create libpng structures */ + struct libpng_read_data *rd = io->read_data = mp_alloc(io->internal_pool, sizeof(*rd)); + rd->png_ptr = png_create_read_struct_2(PNG_LIBPNG_VER_STRING, + io->context, libpng_read_error, libpng_warning, + io->internal_pool, libpng_malloc, libpng_free); + if (unlikely(!rd->png_ptr)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_READ_FAILED, "Cannot create libpng read structure."); + return 0; + } + rd->info_ptr = png_create_info_struct(rd->png_ptr); + if (unlikely(!rd->info_ptr)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_READ_FAILED, "Cannot create libpng info structure."); + png_destroy_read_struct(&rd->png_ptr, NULL, NULL); + return 0; + } + rd->end_ptr = png_create_info_struct(rd->png_ptr); + if (unlikely(!rd->end_ptr)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_READ_FAILED, "Cannot create libpng info structure."); + png_destroy_read_struct(&rd->png_ptr, &rd->info_ptr, NULL); + return 0; + } + + /* Setup libpng longjump */ + if (unlikely(setjmp(png_jmpbuf(rd->png_ptr)))) + { + DBG("Libpng failed to read the image, longjump saved us"); + png_destroy_read_struct(&rd->png_ptr, &rd->info_ptr, &rd->end_ptr); + return 0; + } + + /* Setup libpng IO */ + png_set_read_fn(rd->png_ptr, io->fastbuf, libpng_read_fn); + png_set_user_limits(rd->png_ptr, image_max_dim, image_max_dim); + + /* Read header */ + png_read_info(rd->png_ptr, rd->info_ptr); + png_get_IHDR(rd->png_ptr, rd->info_ptr, &rd->cols, &rd->rows, &rd->bit_depth, &rd->color_type, NULL, NULL, NULL); + + /* Fill image_io values */ + io->cols = rd->cols; + io->rows = rd->rows; + switch (rd->color_type) + { + case PNG_COLOR_TYPE_GRAY: + io->flags = COLOR_SPACE_GRAYSCALE; + io->number_of_colors = 1 << 8; + break; + case PNG_COLOR_TYPE_GRAY_ALPHA: + io->flags = COLOR_SPACE_GRAYSCALE | IMAGE_ALPHA; + io->number_of_colors = 1 << 8; + break; + case PNG_COLOR_TYPE_RGB: + io->flags = COLOR_SPACE_RGB; + io->number_of_colors = 1 << 24; + break; + case PNG_COLOR_TYPE_RGB_ALPHA: + io->number_of_colors = 1 << 24; + io->flags = COLOR_SPACE_RGB | IMAGE_ALPHA; + break; + case PNG_COLOR_TYPE_PALETTE: + io->flags = COLOR_SPACE_RGB | IMAGE_ALPHA | IMAGE_IO_HAS_PALETTE; + int num_palette; + if (png_get_PLTE(rd->png_ptr, rd->info_ptr, NULL, &num_palette)) + io->number_of_colors = num_palette; + else + io->number_of_colors = 1 << rd->bit_depth; + break; + default: + png_destroy_read_struct(&rd->png_ptr, &rd->info_ptr, &rd->end_ptr); + IMAGE_ERROR(io->context, IMAGE_ERROR_READ_FAILED, "Unknown color type"); + break; + } + + /* Success */ + io->read_cancel = libpng_read_cancel; + return 1; +} + +int +libpng_read_data(struct image_io *io) +{ + DBG("libpng_read_data()"); + + struct libpng_read_data *rd = io->read_data; + + struct image_io_read_data_internals rdi; + rdi.image = NULL; + + if (setjmp(png_jmpbuf(rd->png_ptr))) + { + DBG("Libpng failed to read the image, longjump saved us"); + png_destroy_read_struct(&rd->png_ptr, &rd->info_ptr, &rd->end_ptr); + if (rdi.image) + image_io_read_data_break(&rdi, io); + return 0; + } + + uint read_flags = io->flags; + + /* Apply transformations */ + if (rd->bit_depth == 16) + png_set_strip_16(rd->png_ptr); + switch (rd->color_type) + { + case PNG_COLOR_TYPE_PALETTE: + if ((read_flags & IMAGE_COLOR_SPACE) == COLOR_SPACE_GRAYSCALE) + { + png_set_palette_to_rgb(rd->png_ptr); + png_set_rgb_to_gray_fixed(rd->png_ptr, 1, 21267, 71514); + } + else + { + png_set_palette_to_rgb(rd->png_ptr); + read_flags = (read_flags & ~IMAGE_COLOR_SPACE & IMAGE_CHANNELS_FORMAT) | COLOR_SPACE_RGB; + } + if (!(read_flags & IMAGE_ALPHA)) + { + if (io->flags & IMAGE_IO_USE_BACKGROUND) + { + png_set_add_alpha(rd->png_ptr, 255, PNG_FILLER_AFTER); + read_flags = (read_flags & IMAGE_CHANNELS_FORMAT) | IMAGE_ALPHA; + } + else if ((read_flags & IMAGE_PIXEL_FORMAT) == (COLOR_SPACE_RGB | IMAGE_PIXELS_ALIGNED)) + png_set_add_alpha(rd->png_ptr, 255, PNG_FILLER_AFTER); + else + png_set_strip_alpha(rd->png_ptr); + } + else + png_set_add_alpha(rd->png_ptr, 255, PNG_FILLER_AFTER); + break; + case PNG_COLOR_TYPE_GRAY: + if ((read_flags & IMAGE_COLOR_SPACE) != COLOR_SPACE_GRAYSCALE) + { + png_set_gray_to_rgb(rd->png_ptr); + read_flags = (read_flags & ~IMAGE_COLOR_SPACE & IMAGE_CHANNELS_FORMAT) | COLOR_SPACE_RGB; + } + if (read_flags & IMAGE_ALPHA) + png_set_add_alpha(rd->png_ptr, 255, PNG_FILLER_AFTER); + break; + case PNG_COLOR_TYPE_GRAY_ALPHA: + if ((read_flags & IMAGE_COLOR_SPACE) != COLOR_SPACE_GRAYSCALE) + { + png_set_gray_to_rgb(rd->png_ptr); + read_flags = (read_flags & ~IMAGE_COLOR_SPACE & IMAGE_CHANNELS_FORMAT) | COLOR_SPACE_RGB; + } + if (!(read_flags & IMAGE_ALPHA)) + { + if (io->flags & IMAGE_IO_USE_BACKGROUND) + read_flags = (read_flags & IMAGE_CHANNELS_FORMAT) | IMAGE_ALPHA; + else + png_set_strip_alpha(rd->png_ptr); + } + break; + case PNG_COLOR_TYPE_RGB: + if ((read_flags & IMAGE_COLOR_SPACE) == COLOR_SPACE_GRAYSCALE) + png_set_rgb_to_gray_fixed(rd->png_ptr, 1, 21267, 71514); + else + read_flags = (read_flags & ~IMAGE_COLOR_SPACE & IMAGE_CHANNELS_FORMAT) | COLOR_SPACE_RGB; + if ((read_flags & IMAGE_ALPHA) || (read_flags & IMAGE_PIXEL_FORMAT) == (COLOR_SPACE_RGB | IMAGE_PIXELS_ALIGNED)) + png_set_add_alpha(rd->png_ptr, 255, PNG_FILLER_AFTER); + break; + case PNG_COLOR_TYPE_RGB_ALPHA: + if ((read_flags & IMAGE_COLOR_SPACE) == COLOR_SPACE_GRAYSCALE) + png_set_rgb_to_gray_fixed(rd->png_ptr, 1, 21267, 71514); + else + read_flags = (read_flags & ~IMAGE_COLOR_SPACE & IMAGE_CHANNELS_FORMAT) | COLOR_SPACE_RGB; + if (!(read_flags & IMAGE_ALPHA)) + if (io->flags & IMAGE_IO_USE_BACKGROUND) + read_flags = (read_flags & IMAGE_CHANNELS_FORMAT) | IMAGE_ALPHA; + else if ((read_flags & IMAGE_PIXEL_FORMAT) != (COLOR_SPACE_RGB | IMAGE_PIXELS_ALIGNED)) + png_set_strip_alpha(rd->png_ptr); + break; + default: + ASSERT(0); + } + png_read_update_info(rd->png_ptr, rd->info_ptr); + + /* Prepare the image */ + if (unlikely(!image_io_read_data_prepare(&rdi, io, rd->cols, rd->rows, read_flags))) + { + png_destroy_read_struct(&rd->png_ptr, &rd->info_ptr, &rd->end_ptr); + return 0; + } + /* Read image data */ + DBG("Reading image data"); + struct image *img = rdi.image; + byte *pixels = img->pixels; + png_bytep rows[img->rows]; + for (uint r = 0; r < img->rows; r++, pixels += img->row_size) + rows[r] = (png_bytep)pixels; + png_read_image(rd->png_ptr, rows); + png_read_end(rd->png_ptr, rd->end_ptr); + + /* Destroy libpng read structure */ + png_destroy_read_struct(&rd->png_ptr, &rd->info_ptr, &rd->end_ptr); + + /* Finish the image */ + return image_io_read_data_finish(&rdi, io); +} + +int +libpng_write(struct image_io *io) +{ + DBG("libpng_write()"); + + /* Create libpng structures */ + png_structp png_ptr = png_create_write_struct_2(PNG_LIBPNG_VER_STRING, + io->context, libpng_write_error, libpng_warning, + io->internal_pool, libpng_malloc, libpng_free); + if (unlikely(!png_ptr)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_WRITE_FAILED, "Cannot create libpng write structure."); + return 0; + } + png_infop info_ptr = png_create_info_struct(png_ptr); + if (unlikely(!info_ptr)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_WRITE_FAILED, "Cannot create libpng info structure."); + png_destroy_write_struct(&png_ptr, NULL); + return 0; + } + + /* Setup libpng longjump */ + if (unlikely(setjmp(png_jmpbuf(png_ptr)))) + { + DBG("Libpng failed to write the image, longjump saved us."); + png_destroy_write_struct(&png_ptr, &info_ptr); + return 0; + } + + /* Setup libpng IO */ + png_set_write_fn(png_ptr, io->fastbuf, libpng_write_fn, libpng_flush_fn); + + /* Setup PNG parameters */ + struct image *img = io->image; + switch (img->flags & IMAGE_PIXEL_FORMAT) + { + case COLOR_SPACE_GRAYSCALE | IMAGE_PIXELS_ALIGNED: + png_set_IHDR(png_ptr, info_ptr, img->cols, img->rows, 8, PNG_COLOR_TYPE_GRAY, + PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); + break; + case COLOR_SPACE_GRAYSCALE | IMAGE_ALPHA | IMAGE_PIXELS_ALIGNED: + png_set_IHDR(png_ptr, info_ptr, img->cols, img->rows, 8, PNG_COLOR_TYPE_GRAY_ALPHA, + PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); + break; + case COLOR_SPACE_RGB: + png_set_IHDR(png_ptr, info_ptr, img->cols, img->rows, 8, PNG_COLOR_TYPE_RGB, + PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); + break; + case COLOR_SPACE_RGB | IMAGE_ALPHA | IMAGE_PIXELS_ALIGNED: + png_set_IHDR(png_ptr, info_ptr, img->cols, img->rows, 8, PNG_COLOR_TYPE_RGB_ALPHA, + PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); + break; + case COLOR_SPACE_RGB | IMAGE_PIXELS_ALIGNED: + png_set_IHDR(png_ptr, info_ptr, img->cols, img->rows, 8, PNG_COLOR_TYPE_RGB, + PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); + png_set_filler(png_ptr, 0, PNG_FILLER_AFTER); + break; + default: + IMAGE_ERROR(io->context, IMAGE_ERROR_WRITE_FAILED, "Libpng does not support this pixel format (0x%x)", img->flags & IMAGE_PIXEL_FORMAT); + png_destroy_write_struct(&png_ptr, &info_ptr); + return 0; + } + png_write_info(png_ptr, info_ptr); + + /* Write pixels */ + byte *pixels = img->pixels; + png_bytep rows[img->rows]; + for (uint r = 0; r < img->rows; r++, pixels += img->row_size) + rows[r] = (png_bytep)pixels; + png_write_image(png_ptr, rows); + png_write_end(png_ptr, info_ptr); + + /* Free libpng structure */ + png_destroy_write_struct(&png_ptr, &info_ptr); + return 1; +} diff --git a/libucw/images/io-libungif.c b/libucw/images/io-libungif.c new file mode 100644 index 0000000..f444fc5 --- /dev/null +++ b/libucw/images/io-libungif.c @@ -0,0 +1,306 @@ +/* + * Image Library -- libungif + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include +#include +#include +#include + +#include + +// API of gif_lib has changed recenly +#ifndef GIFLIB_MAJOR +#define GIFLIB_MAJOR 0 +#endif +#if GIFLIB_MAJOR > 5 || GIFLIB_MAJOR == 5 && GIFLIB_MINOR >= 1 +static int dgif_error_code; // Scratch pad only, no locking needed +#define DGIF_OPEN(_userptr, _readfunc) DGifOpen(_userptr, _readfunc, &dgif_error_code) +#define DGIF_CLOSE_FILE(_gif) DGifCloseFile(_gif, &dgif_error_code) +#else +#define DGIF_OPEN(_userptr, _readfunc) DGifOpen(_userptr, _readfunc) +#define DGIF_CLOSE_FILE(_gif) DGifCloseFile(_gif) +#endif + +struct libungif_read_data { + GifFileType *gif; + int transparent_index; +}; + +static int +libungif_read_func(GifFileType *gif, GifByteType *ptr, int len) +{ + DBG("libungif_read_func(len=%d)", len); + return bread((struct fastbuf *)gif->UserData, (byte *)ptr, len); +} + +static void +libungif_read_cancel(struct image_io *io) +{ + DBG("libungif_read_cancel()"); + + struct libungif_read_data *rd = io->read_data; + DGIF_CLOSE_FILE(rd->gif); +} + +int +libungif_read_header(struct image_io *io) +{ + DBG("libungif_read_header()"); + + /* Create libungif structure */ + GifFileType *gif; + if (unlikely(!(gif = DGIF_OPEN(io->fastbuf, libungif_read_func)))) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_READ_FAILED, "Cannot create libungif structure."); + return 0; + } + + struct libungif_read_data *rd = io->read_data = mp_alloc(io->internal_pool, sizeof(*rd)); + rd->gif = gif; + + DBG("executing DGifSlurp()"); + if (unlikely(DGifSlurp(gif) != GIF_OK)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_READ_FAILED, "Gif read failed."); + DGIF_CLOSE_FILE(gif); + return 0; + } + + DBG("ImageCount=%d ColorResolution=%d SBackGroundColor=%d SColorMap=%p", gif->ImageCount, gif->SColorResolution, gif->SBackGroundColor, gif->SColorMap); + if (unlikely(!gif->ImageCount)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_READ_FAILED, "There are no images in gif file."); + DGIF_CLOSE_FILE(gif); + return 0; + } + + /* Read image parameters */ + SavedImage *image = gif->SavedImages; + if (unlikely(image->ImageDesc.Width <= 0 || image->ImageDesc.Height <= 0 || + image->ImageDesc.Width > (int)image_max_dim || image->ImageDesc.Height > (int)image_max_dim)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_INVALID_DIMENSIONS, "Invalid gif dimensions."); + DGIF_CLOSE_FILE(gif); + return 0; + } + ColorMapObject *color_map = image->ImageDesc.ColorMap ? : gif->SColorMap; + if (unlikely(!color_map)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_READ_FAILED, "Missing palette."); + DGIF_CLOSE_FILE(gif); + return 0; + } + io->cols = image->ImageDesc.Width; + io->rows = image->ImageDesc.Height; + if (unlikely((io->number_of_colors = color_map->ColorCount) > 256)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_READ_FAILED, "Too many gif colors."); + DGIF_CLOSE_FILE(gif); + return 0; + } + io->flags = COLOR_SPACE_RGB | IMAGE_IO_HAS_PALETTE; + + /* Search extension blocks */ + rd->transparent_index = -1; + for (int i = 0; i < image->ExtensionBlockCount; i++) + { + ExtensionBlock *e = image->ExtensionBlocks + i; + if (e->Function == 0xF9) + { + DBG("Found graphics control extension"); + if (unlikely(e->ByteCount != 4)) + { + IMAGE_ERROR(io->context, IMAGE_ERROR_READ_FAILED, "Invalid graphics control extension."); + DGIF_CLOSE_FILE(gif); + return 0; + } + byte *b = e->Bytes; + /* transparent color present */ + if (b[0] & 1) + { + rd->transparent_index = b[3]; + io->flags |= IMAGE_ALPHA; + if (gif->SColorMap) + { + GifColorType *background = color_map->Colors + gif->SBackGroundColor; + color_make_rgb(&io->background_color, background->Red, background->Green, background->Blue); + } + } + /* We've got everything we need :-) */ + break; + } + else + DBG("Found unknown extension: type=%d size=%d", e->Function, e->ByteCount); + } + + /* Success */ + io->read_cancel = libungif_read_cancel; + return 1; +} + +int +libungif_read_data(struct image_io *io) +{ + DBG("libungif_read_data()"); + + struct libungif_read_data *rd = io->read_data; + GifFileType *gif = rd->gif; + SavedImage *image = gif->SavedImages; + + /* Prepare image */ + struct image_io_read_data_internals rdi; + uint read_flags = io->flags; + uint cs = read_flags & IMAGE_COLOR_SPACE; + if (cs != COLOR_SPACE_GRAYSCALE && cs != COLOR_SPACE_RGB) + read_flags = (read_flags & ~IMAGE_COLOR_SPACE & IMAGE_CHANNELS_FORMAT) | COLOR_SPACE_RGB; + if (unlikely(!image_io_read_data_prepare(&rdi, io, image->ImageDesc.Width, image->ImageDesc.Height, read_flags))) + { + DGIF_CLOSE_FILE(gif); + return 0; + } + + /* Get pixels and palette */ + byte *pixels = (byte *)image->RasterBits; + ColorMapObject *color_map = image->ImageDesc.ColorMap ? : gif->SColorMap; + GifColorType *palette = color_map->Colors; + byte *img_end = rdi.image->pixels + rdi.image->image_size; + + /* Handle deinterlacing */ + uint dein_step, dein_next; + if (image->ImageDesc.Interlace) + { + DBG("Deinterlaced image"); + dein_step = dein_next = rdi.image->row_size << 3; + } + else + dein_step = dein_next = rdi.image->row_size; + + /* Convert pixels */ + switch (rdi.image->pixel_size) + { + case 1: + { + byte pal[256], *pal_pos = pal, *pal_end = pal + 256; + for (uint i = 0; i < (uint)color_map->ColorCount; i++, pal_pos++, palette++) + *pal_pos = rgb_to_gray_func(palette->Red, palette->Green, palette->Blue); + if (pal_pos != pal_end) + bzero(pal_pos, pal_end - pal_pos); + if (rd->transparent_index >= 0 && (io->flags & IMAGE_IO_USE_BACKGROUND)) + if (!color_put(io->context, &io->background_color, pal + rd->transparent_index, COLOR_SPACE_GRAYSCALE)) + { + DGIF_CLOSE_FILE(gif); + return 0; + } +# define DO_ROW_END do{ \ + walk_row_start += dein_step; \ + while (walk_row_start >= img_end) \ + { uint n = dein_next >> 1; walk_row_start = rdi.image->pixels + n, dein_step = dein_next; dein_next = n; } \ + }while(0) +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE (rdi.image) +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_COL_STEP 1 +# define IMAGE_WALK_ROW_STEP 0 +# define IMAGE_WALK_DO_STEP do{ *walk_pos = pal[*pixels++]; }while(0) +# define IMAGE_WALK_DO_ROW_END DO_ROW_END +# include + break; + } + case 2: + { + byte pal[256 * 2], *pal_pos = pal, *pal_end = pal + 256 * 2; + for (uint i = 0; i < (uint)color_map->ColorCount; i++, pal_pos += 2, palette++) + { + pal_pos[0] = rgb_to_gray_func(palette->Red, palette->Green, palette->Blue); + pal_pos[1] = 255; + } + if (pal_pos != pal_end) + bzero(pal_pos, pal_end - pal_pos); + if (rd->transparent_index >= 0) + pal[rd->transparent_index * 2 + 1] = 0; +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE (rdi.image) +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_COL_STEP 2 +# define IMAGE_WALK_ROW_STEP 0 +# define IMAGE_WALK_DO_STEP do{ *(u16 *)walk_pos = ((u16 *)pal)[*pixels++]; }while(0) +# define IMAGE_WALK_DO_ROW_END DO_ROW_END +# include + break; + } + case 3: + { + byte pal[256 * 4], *pal_pos = pal, *pal_end = pal + 256 * 4; + for (uint i = 0; i < (uint)color_map->ColorCount; i++, pal_pos += 4, palette++) + { + pal_pos[0] = palette->Red; + pal_pos[1] = palette->Green; + pal_pos[2] = palette->Blue; + } + if (pal_pos != pal_end) + bzero(pal_pos, pal_end - pal_pos); + if (rd->transparent_index >= 0 && (io->flags & IMAGE_IO_USE_BACKGROUND)) + if (!color_put(io->context, &io->background_color, pal + 4 * rd->transparent_index, COLOR_SPACE_RGB)) + { + DGIF_CLOSE_FILE(gif); + return 0; + } +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE (rdi.image) +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_COL_STEP 3 +# define IMAGE_WALK_ROW_STEP 0 +# define IMAGE_WALK_DO_STEP do{ byte *p = pal + 4 * (*pixels++); walk_pos[0] = p[0]; walk_pos[1] = p[1]; walk_pos[2] = p[2]; }while(0) +# define IMAGE_WALK_DO_ROW_END DO_ROW_END +# include + break; + } + case 4: + { + byte pal[256 * 4], *pal_pos = pal, *pal_end = pal + 256 * 4; + for (uint i = 0; i < (uint)color_map->ColorCount; i++, pal_pos += 4, palette++) + { + pal_pos[0] = palette->Red; + pal_pos[1] = palette->Green; + pal_pos[2] = palette->Blue; + pal_pos[3] = 255; + } + if (pal_pos != pal_end) + bzero(pal_pos, pal_end - pal_pos); + if (rd->transparent_index >= 0) + pal[rd->transparent_index * 4 + 3] = 0; +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_IMAGE (rdi.image) +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_COL_STEP 4 +# define IMAGE_WALK_ROW_STEP 0 +# define IMAGE_WALK_DO_STEP do{ *(u32 *)walk_pos = ((u32 *)pal)[*pixels++]; }while(0) +# define IMAGE_WALK_DO_ROW_END DO_ROW_END +# include + break; + } + default: + ASSERT(0); + } + + /* Destroy libungif structure */ + DGIF_CLOSE_FILE(gif); + + /* Finish image */ + return image_io_read_data_finish(&rdi, io); +} diff --git a/libucw/images/io-main.c b/libucw/images/io-main.c new file mode 100644 index 0000000..2244cf8 --- /dev/null +++ b/libucw/images/io-main.c @@ -0,0 +1,378 @@ +/* + * Image Library -- Image compression/decompression interface + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include +#include +#include + +#include + +int +image_io_init(struct image_context *ctx, struct image_io *io) +{ + DBG("image_io_init()"); + bzero(io, sizeof(*io)); + io->context = ctx; +#ifdef CONFIG_IMAGES_LIBJPEG + if (!libjpeg_init(io)) + goto libjpeg_failed; +#endif +#ifdef CONFIG_IMAGES_LIBPNG + if (!libpng_init(io)) + goto libpng_failed; +#endif +#ifdef CONFIG_IMAGES_LIBUNGIF + if (!libungif_init(io)) + goto libungif_failed; +#endif +#ifdef CONFIG_IMAGES_LIBMAGICK + if (!libmagick_init(io)) + goto libmagick_failed; +#endif + io->internal_pool = mp_new(1024); + return 1; +#ifdef CONFIG_IMAGES_LIBMAGICK + libmagick_cleanup(io); +libmagick_failed: +#endif +#ifdef CONFIG_IMAGES_LIBUNGIF + libungif_cleanup(io); +libungif_failed: +#endif +#ifdef CONFIG_IMAGES_LIBPNG + libpng_cleanup(io); +libpng_failed: +#endif +#ifdef CONFIG_IMAGES_LIBJPEG + libjpeg_cleanup(io); +libjpeg_failed: +#endif + return 0; +} + +static inline void +image_io_read_cancel(struct image_io *io) +{ + if (io->read_cancel) + { + io->read_cancel(io); + io->read_cancel = NULL; + } +} + +static inline void +image_io_image_destroy(struct image_io *io) +{ + if (io->image && (io->flags & IMAGE_IO_NEED_DESTROY)) + { + image_destroy(io->image); + io->flags &= ~IMAGE_IO_NEED_DESTROY; + io->image = NULL; + } +} + +void +image_io_cleanup(struct image_io *io) +{ + DBG("image_io_cleanup()"); + image_io_read_cancel(io); + image_io_image_destroy(io); +#ifdef CONFIG_IMAGES_LIBMAGICK + libmagick_cleanup(io); +#endif +#ifdef CONFIG_IMAGES_LIBUNGIF + libungif_cleanup(io); +#endif +#ifdef CONFIG_IMAGES_LIBPNG + libpng_cleanup(io); +#endif +#ifdef CONFIG_IMAGES_LIBJPEG + libjpeg_cleanup(io); +#endif + mp_delete(io->internal_pool); +} + +void +image_io_reset(struct image_io *io) +{ + DBG("image_io_reset()"); + image_io_read_cancel(io); + image_io_image_destroy(io); + struct mempool *pool = io->internal_pool; + struct image_context *ctx = io->context; + mp_flush(pool); + bzero(io, sizeof(*io)); + io->internal_pool = pool; + io->context = ctx; +} + +int +image_io_read_header(struct image_io *io) +{ + DBG("image_io_read_header()"); + image_io_read_cancel(io); + image_io_image_destroy(io); + switch (io->format) { + case IMAGE_FORMAT_JPEG: +#if defined(CONFIG_IMAGES_LIBJPEG) + return libjpeg_read_header(io); +#elif defined(CONFIG_IMAGES_LIBMAGICK) + return libmagick_read_header(io); +#endif + break; + + case IMAGE_FORMAT_PNG: +#if defined(CONFIG_IMAGES_LIBPNG) + return libpng_read_header(io); +#elif defined(CONFIG_IMAGES_LIBMAGICK) + return libmagick_read_header(io); +#endif + break; + + case IMAGE_FORMAT_GIF: +#if defined(CONFIG_IMAGES_LIBUNGIF) || defined(CONFIG_IMAGES_LIBGIF) + return libungif_read_header(io); +#elif defined(CONFIG_IMAGES_LIBMAGICK) + return libmagick_read_header(io); +#endif + break; + + case IMAGE_FORMAT_UNDEFINED: +#if defined (CONFIG_IMAGES_LIBMAGICK) + return libmagick_read_header(io); +#endif + break; + + default: + ASSERT(0); + } + IMAGE_ERROR(io->context, IMAGE_ERROR_INVALID_FILE_FORMAT, "Image format not supported."); + return 0; +} + +struct image * +image_io_read_data(struct image_io *io, int ref) +{ + DBG("image_io_read_data()"); + ASSERT(io->read_cancel); + io->read_cancel = NULL; + int result; + switch (io->format) { + case IMAGE_FORMAT_JPEG: +#if defined(CONFIG_IMAGES_LIBJPEG) + result = libjpeg_read_data(io); +#elif defined(CONFIG_IMAGES_LIBMAGICK) + result = libmagick_read_data(io); +#else + ASSERT(0); +#endif + break; + + case IMAGE_FORMAT_PNG: +#if defined(CONFIG_IMAGES_LIBPNG) + result = libpng_read_data(io); +#elif defined(CONFIG_IMAGES_LIBMAGICK) + result = libmagick_read_data(io); +#else + ASSERT(0); +#endif + break; + + case IMAGE_FORMAT_GIF: +#if defined(CONFIG_IMAGES_LIBUNGIF) || defined(CONFIG_IMAGES_LIBGIF) + result = libungif_read_data(io); +#elif defined(CONFIG_IMAGES_LIBMAGICK) + result = libmagick_read_data(io); +#else + ASSERT(0); +#endif + break; + + case IMAGE_FORMAT_UNDEFINED: +#if defined(CONFIG_IMAGES_LIBMAGICK) + result = libmagick_read_data(io); +#else + ASSERT(0); +#endif + break; + + default: + ASSERT(0); + } + if (result) + { + if (!ref) + io->flags |= IMAGE_IO_NEED_DESTROY; + else + io->flags &= ~IMAGE_IO_NEED_DESTROY; + return io->image; + } + else + return NULL; +} + +struct image * +image_io_read(struct image_io *io, int ref) +{ + if (!image_io_read_header(io)) + return NULL; + return image_io_read_data(io, ref); +} + +int +image_io_write(struct image_io *io) +{ + DBG("image_io_write()"); + image_io_read_cancel(io); + switch (io->format) { + case IMAGE_FORMAT_JPEG: +#if defined(CONFIG_IMAGES_LIBJPEG) + return libjpeg_write(io); +#elif defined(CONFIG_IMAGES_LIBMAGICK) + return libmagick_write(io); +#endif + break; + + case IMAGE_FORMAT_PNG: +#if defined(CONFIG_IMAGES_LIBPNG) + return libpng_write(io); +#elif defined(CONFIG_IMAGES_LIBMAGICK) + return libmagick_write(io); +#endif + break; + + case IMAGE_FORMAT_GIF: +#if defined(CONFIG_IMAGES_LIBMAGICK) + return libmagick_write(io); +#endif + break; + + default: + break; + } + IMAGE_ERROR(io->context, IMAGE_ERROR_INVALID_FILE_FORMAT, "Output format not supported."); + return 0; +} + +byte * +image_format_to_extension(enum image_format format) +{ + switch (format) + { + case IMAGE_FORMAT_JPEG: + return "jpg"; + case IMAGE_FORMAT_PNG: + return "png"; + case IMAGE_FORMAT_GIF: + return "gif"; + default: + return NULL; + } +} + +enum image_format +image_extension_to_format(byte *extension) +{ + if (!strcasecmp(extension, "jpg")) + return IMAGE_FORMAT_JPEG; + if (!strcasecmp(extension, "jpeg")) + return IMAGE_FORMAT_JPEG; + if (!strcasecmp(extension, "png")) + return IMAGE_FORMAT_PNG; + if (!strcasecmp(extension, "gif")) + return IMAGE_FORMAT_GIF; + return IMAGE_FORMAT_UNDEFINED; +} + +enum image_format +image_file_name_to_format(byte *file_name) +{ + byte *extension = strrchr(file_name, '.'); + return extension ? image_extension_to_format(extension + 1) : IMAGE_FORMAT_UNDEFINED; +} + +struct image * +image_io_read_data_prepare(struct image_io_read_data_internals *rdi, struct image_io *io, uint cols, uint rows, uint flags) +{ + DBG("image_io_read_data_prepare()"); + if (rdi->need_transformations = io->cols != cols || io->rows != rows || + ((io->flags ^ flags) & IMAGE_NEW_FLAGS)) + return rdi->image = image_new(io->context, cols, rows, flags & IMAGE_IO_IMAGE_FLAGS, NULL); + else + return rdi->image = image_new(io->context, io->cols, io->rows, io->flags & IMAGE_IO_IMAGE_FLAGS, io->pool); +} + +int +image_io_read_data_finish(struct image_io_read_data_internals *rdi, struct image_io *io) +{ + DBG("image_io_read_data_finish()"); + if (rdi->need_transformations) + { + /* Scale the image */ + if (io->cols != rdi->image->cols || io->rows != rdi->image->rows) + { + DBG("Scaling image"); + uint flags = rdi->image->flags; + if (!(rdi->need_transformations = ((io->flags ^ rdi->image->flags) & (IMAGE_NEW_FLAGS & ~IMAGE_PIXELS_ALIGNED)))) + flags = io->flags; + struct image *img = image_new(io->context, io->cols, io->rows, flags, rdi->need_transformations ? NULL : io->pool); + if (unlikely(!img)) + { + image_destroy(rdi->image); + return 0; + } + if (unlikely(!image_scale(io->context, img, rdi->image))) + { + image_destroy(rdi->image); + image_destroy(img); + return 0; + } + image_destroy(rdi->image); + rdi->image = img; + } + + /* Convert pixel format */ + if (io->flags != rdi->image->flags) + { + struct image *img = image_new(io->context, io->cols, io->rows, io->flags, io->pool); + if (unlikely(!img)) + { + image_destroy(rdi->image); + return 0; + } + struct image_conv_options opt = image_conv_defaults; + opt.background = io->background_color; + if (unlikely(!image_conv(io->context, img, rdi->image, &opt))) + { + image_destroy(rdi->image); + image_destroy(img); + return 0; + } + image_destroy(rdi->image); + rdi->image = img; + } + } + + /* Success */ + io->image = rdi->image; + return 1; +} + +void +image_io_read_data_break(struct image_io_read_data_internals *rdi, struct image_io *io UNUSED) +{ + DBG("image_io_read_data_break()"); + if (rdi->image) + image_destroy(rdi->image); +} diff --git a/libucw/images/io-main.h b/libucw/images/io-main.h new file mode 100644 index 0000000..a048b67 --- /dev/null +++ b/libucw/images/io-main.h @@ -0,0 +1,55 @@ +#ifndef _IMAGES_IO_MAIN_H +#define _IMAGES_IO_MAIN_H + +#ifdef CONFIG_UCW_CLEAN_ABI +#define image_io_read_data_break ucw_image_io_read_data_break +#define image_io_read_data_finish ucw_image_io_read_data_finish +#define image_io_read_data_prepare ucw_image_io_read_data_prepare +#define libjpeg_read_data ucw_libjpeg_read_data +#define libjpeg_read_header ucw_libjpeg_read_header +#define libjpeg_write ucw_libjpeg_write +#define libmagick_cleanup ucw_libmagick_cleanup +#define libmagick_init ucw_libmagick_init +#define libmagick_read_data ucw_libmagick_read_data +#define libmagick_read_header ucw_libmagick_read_header +#define libmagick_write ucw_libmagick_write +#define libpng_read_data ucw_libpng_read_data +#define libpng_read_header ucw_libpng_read_header +#define libpng_write ucw_libpng_write +#define libungif_read_data ucw_libungif_read_data +#define libungif_read_header ucw_libungif_read_header +#endif + +static inline int libjpeg_init(struct image_io *io UNUSED) { return 1; } +static inline void libjpeg_cleanup(struct image_io *io UNUSED) {} +int libjpeg_read_header(struct image_io *io); +int libjpeg_read_data(struct image_io *io); +int libjpeg_write(struct image_io *io); + +static inline int libpng_init(struct image_io *io UNUSED) { return 1; } +static inline void libpng_cleanup(struct image_io *io UNUSED) {} +int libpng_read_header(struct image_io *io); +int libpng_read_data(struct image_io *io); +int libpng_write(struct image_io *io); + +static inline int libungif_init(struct image_io *io UNUSED) { return 1; } +static inline void libungif_cleanup(struct image_io *io UNUSED) {} +int libungif_read_header(struct image_io *io); +int libungif_read_data(struct image_io *io); + +int libmagick_init(struct image_io *io); +void libmagick_cleanup(struct image_io *io); +int libmagick_read_header(struct image_io *io); +int libmagick_read_data(struct image_io *io); +int libmagick_write(struct image_io *io); + +struct image_io_read_data_internals { + struct image *image; + int need_transformations; +}; + +struct image *image_io_read_data_prepare(struct image_io_read_data_internals *rdi, struct image_io *io, uint cols, uint rows, uint flags); +int image_io_read_data_finish(struct image_io_read_data_internals *rdi, struct image_io *io); +void image_io_read_data_break(struct image_io_read_data_internals *rdi, struct image_io *io); + +#endif diff --git a/libucw/images/libucw-images.pc b/libucw/images/libucw-images.pc new file mode 100644 index 0000000..9cc39c0 --- /dev/null +++ b/libucw/images/libucw-images.pc @@ -0,0 +1,12 @@ +# pkg-config metadata for libucw-images + +libdir=@LIBDIR@ +incdir=. + +Name: libucw-images +Description: UCW image library +Version: @UCW_VERSION@ +Cflags: -I${incdir} +Libs: -L${libdir} @SO_LINK_PATH@ -lucw-images@UCW_ABI_SUFFIX@ +Libs.private: @LIBIMAGES_LIBS@ +Requires.private: @DEPS@ diff --git a/libucw/images/math.c b/libucw/images/math.c new file mode 100644 index 0000000..0e88ca9 --- /dev/null +++ b/libucw/images/math.c @@ -0,0 +1,64 @@ +/* + * Image Library -- Math routines + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU General Public License. + */ + +#include +#include + +const u32 fast_div_tab[] = { + 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, + 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, + 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, + 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, + 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, + 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, + 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, + 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, + 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, + 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, + 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, + 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, + 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, + 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, + 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, + 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, + 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, + 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, + 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, + 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, + 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, + 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, + 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, + 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, + 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, + 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, + 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, + 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, + 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, + 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, + 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, + 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010 }; + +const byte fast_sqrt_tab[] = { + 0, 16, 23, 28, 32, 36, 39, 43, 46, 48, 51, 53, 56, 58, 60, 62, + 64, 66, 68, 70, 72, 74, 75, 77, 79, 80, 82, 83, 85, 86, 88, 89, + 91, 92, 94, 95, 96, 98, 99, 100, 101, 103, 104, 105, 106, 108, 109, 110, + 111, 112, 113, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, + 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, + 143, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, 156, + 157, 158, 159, 159, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, 169, + 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, 181, + 181, 182, 183, 183, 184, 185, 186, 186, 187, 188, 188, 189, 190, 190, 191, 192, + 192, 193, 194, 194, 195, 196, 196, 197, 198, 198, 199, 199, 200, 201, 201, 202, + 203, 203, 204, 205, 205, 206, 206, 207, 208, 208, 209, 210, 210, 211, 211, 212, + 213, 213, 214, 214, 215, 216, 216, 217, 217, 218, 219, 219, 220, 220, 221, 221, + 222, 223, 223, 224, 224, 225, 225, 226, 227, 227, 228, 228, 229, 229, 230, 230, + 231, 232, 232, 233, 233, 234, 234, 235, 235, 236, 237, 237, 238, 238, 239, 239, + 240, 240, 241, 241, 242, 242, 243, 243, 244, 245, 245, 246, 246, 247, 247, 248, + 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 255 }; + diff --git a/libucw/images/math.h b/libucw/images/math.h new file mode 100644 index 0000000..b322949 --- /dev/null +++ b/libucw/images/math.h @@ -0,0 +1,94 @@ +#ifndef _IMAGES_MATH_H +#define _IMAGES_MATH_H + +#ifdef CONFIG_UCW_CLEAN_ABI +#define fast_div_tab ucw_fast_div_tab +#define fast_sqrt_tab ucw_fast_sqrt_tab +#endif + +extern const u32 fast_div_tab[]; +extern const byte fast_sqrt_tab[]; + +static inline uint isqr(int x) +{ + return x * x; +} + +static inline uint fast_div_u32_u8(uint x, uint y) +{ + return ((u64)(x) * fast_div_tab[y]) >> 32; +} + +static inline uint fast_sqrt_u16(uint x) +{ + uint y; + if (x < (1 << 10) - 3) + y = fast_sqrt_tab[(x + 3) >> 2] >> 3; + else if (x < (1 << 14) - 28) + y = fast_sqrt_tab[(x + 28) >> 6] >> 1; + else + y = fast_sqrt_tab[x >> 8]; + return (x < y * y) ? y - 1 : y; +} + +static inline uint fast_sqrt_u32(uint x) +{ + uint y; + if (x < (1 << 16)) + { + if (x < (1 << 10) - 3) + y = fast_sqrt_tab[(x + 3) >> 2] >> 3; + else if (x < (1 << 14) - 28) + y = fast_sqrt_tab[(x + 28) >> 6] >> 1; + else + y = fast_sqrt_tab[x >> 8]; + } + else + { + if (x < (1 << 24)) + { + if (x < (1 << 20)) + { + y = fast_sqrt_tab[x >> 12]; + y = (fast_div_u32_u8(x, y) >> 3) + (y << 1); + } + else + { + y = fast_sqrt_tab[x >> 16]; + y = (fast_div_u32_u8(x, y) >> 5) + (y << 3); + } + } + else + { + if (x < (1 << 28)) + { + if (x < (1 << 26)) + { + y = fast_sqrt_tab[x >> 18]; + y = (fast_div_u32_u8(x, y) >> 6) + (y << 4); + } + else + { + y = fast_sqrt_tab[x >> 20]; + y = (fast_div_u32_u8(x, y) >> 7) + (y << 5); + } + } + else + { + if (x < (1 << 30)) + { + y = fast_sqrt_tab[x >> 22]; + y = (fast_div_u32_u8(x, y) >> 8) + (y << 6); + } + else + { + y = fast_sqrt_tab[x >> 24]; + y = (fast_div_u32_u8(x, y) >> 9) + (y << 7); + } + } + } + } + return (x < y * y) ? y - 1 : y; +} + +#endif diff --git a/libucw/images/object.c b/libucw/images/object.c new file mode 100644 index 0000000..ae7b021 --- /dev/null +++ b/libucw/images/object.c @@ -0,0 +1,110 @@ +/* + * Image Library -- Image cards manipulations + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + */ + +#undef LOCAL_DEBUG + +#include "sherlock/sherlock.h" +#include +#include +#include +#include "sherlock/object.h" +#include +#include +#include +#include +#include +#include + +uint +get_image_obj_info(struct image_obj_info *ioi, struct odes *o) +{ + byte *v = obj_find_aval(o, 'G'); + if (!v) + { + DBG("Missing image info attribute"); + return 0; + } + byte color_space[16], thumb_format[16]; + UNUSED uint cnt = sscanf(v, "%d%d%s%d%d%d%s", &ioi->cols, &ioi->rows, color_space, + &ioi->colors, &ioi->thumb_cols, &ioi->thumb_rows, thumb_format); + ASSERT(cnt == 7); + ioi->thumb_format = (*thumb_format == 'p') ? IMAGE_FORMAT_PNG : IMAGE_FORMAT_JPEG; + DBG("Readed image info attribute: dim=%ux%u", ioi->cols, ioi->rows); + return 1; +} + +uint +get_image_obj_thumb(struct image_obj_info *ioi, struct odes *o, struct mempool *pool) +{ + struct oattr *a = obj_find_attr(o, 'N'); + if (!a) + { + DBG("Missing image thumbnail attribute"); + return 0; + } + uint count = 0; + uint max_len = 0; + for (struct oattr *b = a; b; b = b->same) + { + count++; + max_len += strlen(b->val); + } + byte buf[max_len + 1], *b = buf; + for (; a; a = a->same) + b += base224_decode(b, a->val, strlen(a->val)); + ASSERT(b != buf); + ioi->thumb_data = mp_alloc(pool, ioi->thumb_size = b - buf); + memcpy(ioi->thumb_data, buf, ioi->thumb_size); + DBG("Readed thumbnail of size %u", ioi->thumb_size); + return 1; +} + +struct image * +read_image_obj_thumb(struct image_obj_info *ioi, struct fastbuf *fb, struct image_io *io, struct mempool *pool) +{ + struct fastbuf tmp_fb; + if (!fb) + fbbuf_init_read(fb = &tmp_fb, ioi->thumb_data, ioi->thumb_size, 0); + io->format = ioi->thumb_format; + io->fastbuf = fb; + if (!image_io_read_header(io)) + goto error; + io->pool = pool; + io->flags = COLOR_SPACE_RGB | IMAGE_IO_USE_BACKGROUND; + if (!io->background_color.color_space) + io->background_color = color_white; + struct image *img; + if (!(img = image_io_read_data(io, 1))) + goto error; + DBG("Decompressed thumbnail: size=%ux%u", img->cols, img->rows); + return img; +error: + DBG("Failed to decompress thumbnail: %s", io->thread->err_msg); + return NULL; +} + +void +put_image_obj_signature(struct odes *o, struct image_signature *sig) +{ + /* signatures should be short enough to in a single attribute */ + uint size = image_signature_size(sig->len); + byte buf[BASE224_ENC_LENGTH(size) + 1]; + buf[base224_encode(buf, (byte *)sig, size)] = 0; + obj_set_attr(o, 'H', buf); +} + +uint +get_image_obj_signature(struct image_signature *sig, struct odes *o) +{ + byte *a = obj_find_aval(o, 'H'); + if (!a) + return 0; + UNUSED uint size = base224_decode((byte *)sig, a, strlen(a)); + ASSERT(size == image_signature_size(sig->len)); + return 1; +} diff --git a/libucw/images/object.h b/libucw/images/object.h new file mode 100644 index 0000000..4f03166 --- /dev/null +++ b/libucw/images/object.h @@ -0,0 +1,35 @@ +#ifndef _IMAGES_OBJECT_H +#define _IMAGES_OBJECT_H + +#include + +#ifdef CONFIG_UCW_CLEAN_ABI +#define get_image_obj_info ucw_get_image_obj_info +#define get_image_obj_signature ucw_get_image_obj_signature +#define get_image_obj_thumb ucw_get_image_obj_thumb +#define put_image_obj_signature ucw_put_image_obj_signature +#define read_image_obj_thumb ucw_read_image_obj_thumb +#endif + +struct image_obj_info { + uint cols; + uint rows; + uint colors; + enum image_format thumb_format; + uint thumb_cols; + uint thumb_rows; + uint thumb_size; + byte *thumb_data; +}; + +struct odes; +struct mempool; +struct image_signature; + +uint get_image_obj_info(struct image_obj_info *ioi, struct odes *o); +uint get_image_obj_thumb(struct image_obj_info *ioi, struct odes *o, struct mempool *pool); +struct image *read_image_obj_thumb(struct image_obj_info *ioi, struct fastbuf *fb, struct image_io *io, struct mempool *pool); +void put_image_obj_signature(struct odes *o, struct image_signature *sig); +uint get_image_obj_signature(struct image_signature *sig, struct odes *o); + +#endif diff --git a/libucw/images/scale-gen.h b/libucw/images/scale-gen.h new file mode 100644 index 0000000..e6f1685 --- /dev/null +++ b/libucw/images/scale-gen.h @@ -0,0 +1,385 @@ +/* + * Image Library -- Image scaling algorithms + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef IMAGE_SCALE_CHANNELS +# define IMAGE_SCALE_CHANNELS IMAGE_SCALE_PIXEL_SIZE +#endif + +#undef IMAGE_COPY_PIXEL +#if IMAGE_SCALE_PIXEL_SIZE == 1 +#define IMAGE_COPY_PIXEL(dest, src) do{ *(byte *)dest = *(byte *)src; }while(0) +#elif IMAGE_SCALE_PIXEL_SIZE == 2 +#define IMAGE_COPY_PIXEL(dest, src) do{ *(u16 *)dest = *(u16 *)src; }while(0) +#elif IMAGE_SCALE_PIXEL_SIZE == 3 +#define IMAGE_COPY_PIXEL(dest, src) do{ ((byte *)dest)[0] = ((byte *)src)[0]; ((byte *)dest)[1] = ((byte *)src)[1]; ((byte *)dest)[2] = ((byte *)src)[2]; }while(0) +#elif IMAGE_SCALE_PIXEL_SIZE == 4 +#define IMAGE_COPY_PIXEL(dest, src) do{ *(u32 *)dest = *(u32 *)src; }while(0) +#endif + +static void +IMAGE_SCALE_PREFIX(nearest_xy)(struct image *dest, struct image *src) +{ + uint x_inc = (src->cols << 16) / dest->cols; + uint y_inc = (src->rows << 16) / dest->rows; + uint x_start = x_inc >> 1, x_pos; + uint y_pos = y_inc >> 1; + byte *row_start; +# define IMAGE_WALK_PREFIX(x) walk_##x +# define IMAGE_WALK_INLINE +# define IMAGE_WALK_UNROLL 4 +# define IMAGE_WALK_IMAGE dest +# define IMAGE_WALK_COL_STEP IMAGE_SCALE_PIXEL_SIZE +# define IMAGE_WALK_DO_ROW_START do{ row_start = src->pixels + (y_pos >> 16) * src->row_size; y_pos += y_inc; x_pos = x_start; }while(0) +# define IMAGE_WALK_DO_STEP do{ byte *pos = row_start + (x_pos >> 16) * IMAGE_SCALE_PIXEL_SIZE; x_pos += x_inc; IMAGE_COPY_PIXEL(walk_pos, pos); }while(0) +# include +} + +#if 0 /* Experiments with rearranging pixels for SSE... */ +static void +IMAGE_SCALE_PREFIX(linear_x)(struct image *dest, struct image *src) +{ + /* Handle problematic special case */ + byte *src_row = src->pixels; + byte *dest_row = dest->pixels; + if (src->cols == 1) + { + for (uint y_counter = dest->rows; y_counter--; ) + { + // FIXME + ASSERT(0); + src_row += src->row_size; + dest_row += dest->row_size; + } + return; + } + /* Initialize the main loop */ + uint x_inc = ((src->cols - 1) << 16) / (dest->cols - 1); +# define COLS_AT_ONCE 256 + byte pixel_buf[COLS_AT_ONCE * 2 * IMAGE_SCALE_PIXEL_SIZE]; /* Buffers should fit in cache */ + u16 coef_buf[COLS_AT_ONCE * IMAGE_SCALE_PIXEL_SIZE]; + /* Main loop */ + for (uint y_counter = dest->rows; y_counter--; ) + { + uint x_pos = 0; + byte *dest_pos = dest_row; + for (uint x_counter = dest->cols; --x_counter; ) + for (uint x_counter = dest->cols; x_counter > COLS_AT_ONCE; x_counter -= COLS_AT_ONCE) + { + byte *pixel_buf_pos = pixel_buf; + u16 *coef_buf_pos = coef_buf; + for (uint i = 0; i < COLS_AT_ONCE / 2; i++) + { + byte *src_pos = src_row + (x_pos >> 16) * IMAGE_SCALE_PIXEL_SIZE; + uint ofs = x_pos & 0xffff; + x_pos += x_inc; + byte *src_pos_2 = src_row + (x_pos >> 16) * IMAGE_SCALE_PIXEL_SIZE; + uint ofs_2 = x_pos & 0xffff; + x_pos += x_inc; + *coef_buf_pos++ = ofs; + byte *pixel_buf_pos_2 = pixel_buf_pos + IMAGE_SCALE_PIXEL_SIZE; + byte *pixel_buf_pos_3 = pixel_buf_pos + IMAGE_SCALE_PIXEL_SIZE * 2; + byte *pixel_buf_pos_4 = pixel_buf_pos + IMAGE_SCALE_PIXEL_SIZE * 3; + IMAGE_COPY_PIXEL(pixel_buf_pos, src_pos); + IMAGE_COPY_PIXEL(pixel_buf_pos_2, src_pos + IMAGE_SCALE_PIXEL_SIZE); + IMAGE_COPY_PIXEL(pixel_buf_pos_3, src_pos_2); + IMAGE_COPY_PIXEL(pixel_buf_pos_4, src_pos_2 + IMAGE_SCALE_PIXEL_SIZE); + pixel_buf_pos += 4 * IMAGE_SCALE_PIXEL_SIZE; + *coef_buf_pos++ = ofs_2; + } +/* + byte *src_pos = src_row + (x_pos >> 16) * IMAGE_SCALE_PIXEL_SIZE; + uint ofs = x_pos & 0xffff; + x_pos += x_inc; + dest_pos[0] = LINEAR_INTERPOLATE(src_pos[0], src_pos[0 + IMAGE_SCALE_PIXEL_SIZE], ofs); +# if IMAGE_SCALE_CHANNELS >= 2 + dest_pos[1] = LINEAR_INTERPOLATE(src_pos[1], src_pos[1 + IMAGE_SCALE_PIXEL_SIZE], ofs); +# endif +# if IMAGE_SCALE_CHANNELS >= 3 + dest_pos[2] = LINEAR_INTERPOLATE(src_pos[2], src_pos[2 + IMAGE_SCALE_PIXEL_SIZE], ofs); +# endif +# if IMAGE_SCALE_CHANNELS >= 4 + dest_pos[3] = LINEAR_INTERPOLATE(src_pos[3], src_pos[3 + IMAGE_SCALE_PIXEL_SIZE], ofs); +# endif + dest_pos += IMAGE_SCALE_PIXEL_SIZE;*/ + + } + /* Always copy the last column - handle "x_pos == dest->cols * 0x10000" overflow */ + IMAGE_COPY_PIXEL(dest_pos, src_row + src->row_pixels_size - IMAGE_SCALE_PIXEL_SIZE); + /* Next step */ + src_row += src->row_size; + dest_row += dest->row_size; + } +#undef COLS_AT_ONCE +} + +static void +IMAGE_SCALE_PREFIX(bilinear_xy)(struct image *dest, struct image *src) +{ + uint x_inc = (((src->cols - 1) << 16) - 1) / (dest->cols); + uint y_inc = (((src->rows - 1) << 16) - 1) / (dest->rows); + uint y_pos = 0x10000; + byte *cache[2], buf1[dest->row_pixels_size + 16], buf2[dest->row_pixels_size + 16], *pbuf[2]; + byte *dest_row = dest->pixels, *dest_pos; + uint cache_index = ~0U, cache_i = 0; + pbuf[0] = cache[0] = ALIGN_PTR((void *)buf1, 16); + pbuf[1] = cache[1] = ALIGN_PTR((void *)buf2, 16); +#ifdef __SSE2__ + __m128i zero = _mm_setzero_si128(); +#endif + for (uint row_counter = dest->rows; row_counter--; ) + { + dest_pos = dest_row; + uint y_index = y_pos >> 16; + uint y_ofs = y_pos & 0xffff; + y_pos += y_inc; + uint x_pos = 0; + if (y_index > (uint)(cache_index + 1)) + cache_index = y_index - 1; + while (y_index > cache_index) + { + cache[0] = cache[1]; + cache[1] = pbuf[cache_i ^= 1]; + cache_index++; + byte *src_row = src->pixels + cache_index * src->row_size; + byte *cache_pos = cache[1]; + for (uint col_counter = dest->cols; --col_counter; ) + { + byte *c1 = src_row + (x_pos >> 16) * IMAGE_SCALE_PIXEL_SIZE; + byte *c2 = c1 + IMAGE_SCALE_PIXEL_SIZE; + uint ofs = x_pos & 0xffff; + cache_pos[0] = LINEAR_INTERPOLATE(c1[0], c2[0], ofs); +# if IMAGE_SCALE_CHANNELS >= 2 + cache_pos[1] = LINEAR_INTERPOLATE(c1[1], c2[1], ofs); +# endif +# if IMAGE_SCALE_CHANNELS >= 3 + cache_pos[2] = LINEAR_INTERPOLATE(c1[2], c2[2], ofs); +# endif +# if IMAGE_SCALE_CHANNELS >= 4 + cache_pos[3] = LINEAR_INTERPOLATE(c1[3], c2[3], ofs); +# endif + cache_pos += IMAGE_SCALE_PIXEL_SIZE; + x_pos += x_inc; + } + IMAGE_COPY_PIXEL(cache_pos, src_row + src->row_pixels_size - IMAGE_SCALE_PIXEL_SIZE); + } + uint i = 0; +#ifdef __SSE2__ + __m128i coef = _mm_set1_epi16(y_ofs >> 9); + for (; (int)i < (int)dest->row_pixels_size - 15; i += 16) + { + __m128i a2 = _mm_loadu_si128((__m128i *)(cache[0] + i)); + __m128i a1 = _mm_unpacklo_epi8(a2, zero); + a2 = _mm_unpackhi_epi8(a2, zero); + __m128i b2 = _mm_loadu_si128((__m128i *)(cache[1] + i)); + __m128i b1 = _mm_unpacklo_epi8(b2, zero); + b2 = _mm_unpackhi_epi8(b2, zero); + b1 = _mm_sub_epi16(b1, a1); + b2 = _mm_sub_epi16(b2, a2); + a1 = _mm_slli_epi16(a1, 7); + a2 = _mm_slli_epi16(a2, 7); + b1 = _mm_mullo_epi16(b1, coef); + b2 = _mm_mullo_epi16(b2, coef); + a1 = _mm_add_epi16(a1, b1); + a2 = _mm_add_epi16(a2, b2); + a1 = _mm_srli_epi16(a1, 7); + a2 = _mm_srli_epi16(a2, 7); + a1 = _mm_packus_epi16(a1, a2); + _mm_storeu_si128((__m128i *)(dest_pos + i), a1); + } +#elif 1 + for (; (int)i < (int)dest->row_pixels_size - 3; i += 4) + { + dest_pos[i + 0] = LINEAR_INTERPOLATE(cache[0][i + 0], cache[1][i + 0], y_ofs); + dest_pos[i + 1] = LINEAR_INTERPOLATE(cache[0][i + 1], cache[1][i + 1], y_ofs); + dest_pos[i + 2] = LINEAR_INTERPOLATE(cache[0][i + 2], cache[1][i + 2], y_ofs); + dest_pos[i + 3] = LINEAR_INTERPOLATE(cache[0][i + 3], cache[1][i + 3], y_ofs); + } +#endif + for (; i < dest->row_pixels_size; i++) + dest_pos[i] = LINEAR_INTERPOLATE(cache[0][i], cache[1][i], y_ofs); + dest_row += dest->row_size; + } +} +#endif + +static void +IMAGE_SCALE_PREFIX(downsample_xy)(struct image *dest, struct image *src) +{ + /* FIXME slow */ + byte *rsrc = src->pixels, *psrc; + byte *rdest = dest->pixels, *pdest; + u64 x_inc = ((u64)dest->cols << 32) / src->cols, x_pos; + u64 y_inc = ((u64)dest->rows << 32) / src->rows, y_pos = 0; + uint x_inc_frac = (u64)0xffffffffff / x_inc; + uint y_inc_frac = (u64)0xffffffffff / y_inc; + uint final_mul = ((u64)(x_inc >> 16) * (y_inc >> 16)) >> 16; + uint buf_size = dest->cols * IMAGE_SCALE_CHANNELS; + u32 buf[buf_size], *pbuf; + buf_size *= sizeof(u32); + bzero(buf, buf_size); + for (uint rows_counter = src->rows; rows_counter--; ) + { + pbuf = buf; + psrc = rsrc; + rsrc += src->row_size; + x_pos = 0; + y_pos += y_inc; + if (y_pos <= 0x100000000) + { + for (uint cols_counter = src->cols; cols_counter--; ) + { + x_pos += x_inc; + if (x_pos <= 0x100000000) + { + pbuf[0] += psrc[0]; +# if IMAGE_SCALE_CHANNELS >= 2 + pbuf[1] += psrc[1]; +# endif +# if IMAGE_SCALE_CHANNELS >= 3 + pbuf[2] += psrc[2]; +# endif +# if IMAGE_SCALE_CHANNELS >= 4 + pbuf[3] += psrc[3]; +# endif + } + else + { + x_pos -= 0x100000000; + uint mul2 = (uint)(x_pos >> 16) * x_inc_frac; + uint mul1 = 0xffffff - mul2; + pbuf[0] += (psrc[0] * mul1) >> 24; + pbuf[0 + IMAGE_SCALE_CHANNELS] += (psrc[0] * mul2) >> 24; +# if IMAGE_SCALE_CHANNELS >= 2 + pbuf[1] += (psrc[1] * mul1) >> 24; + pbuf[1 + IMAGE_SCALE_CHANNELS] += (psrc[1] * mul2) >> 24; +# endif +# if IMAGE_SCALE_CHANNELS >= 3 + pbuf[2] += (psrc[2] * mul1) >> 24; + pbuf[2 + IMAGE_SCALE_CHANNELS] += (psrc[2] * mul2) >> 24; +# endif +# if IMAGE_SCALE_CHANNELS >= 4 + pbuf[3] += (psrc[3] * mul1) >> 24; + pbuf[3 + IMAGE_SCALE_CHANNELS] += (psrc[3] * mul2) >> 24; +# endif + pbuf += IMAGE_SCALE_CHANNELS; + } + psrc += IMAGE_SCALE_PIXEL_SIZE; + } + } + else + { + y_pos -= 0x100000000; + pdest = rdest; + rdest += dest->row_size; + uint mul2 = (uint)(y_pos >> 16) * y_inc_frac; + uint mul1 = 0xffffff - mul2; + uint a0 = 0; +# if IMAGE_SCALE_CHANNELS >= 2 + uint a1 = 0; +# endif +# if IMAGE_SCALE_CHANNELS >= 3 + uint a2 = 0; +# endif +# if IMAGE_SCALE_CHANNELS >= 4 + uint a3 = 0; +# endif + for (uint cols_counter = src->cols; cols_counter--; ) + { + x_pos += x_inc; + if (x_pos <= 0x100000000) + { + pbuf[0] += ((psrc[0] * mul1) >> 24); + a0 += (psrc[0] * mul2) >> 24; +# if IMAGE_SCALE_CHANNELS >= 2 + pbuf[1] += ((psrc[1] * mul1) >> 24); + a1 += (psrc[1] * mul2) >> 24; +# endif +# if IMAGE_SCALE_CHANNELS >= 3 + pbuf[2] += ((psrc[2] * mul1) >> 24); + a2 += (psrc[2] * mul2) >> 24; +# endif +# if IMAGE_SCALE_CHANNELS >= 4 + pbuf[3] += ((psrc[3] * mul1) >> 24); + a3 += (psrc[3] * mul2) >> 24; +# endif + } + else + { + x_pos -= 0x100000000; + uint mul4 = (uint)(x_pos >> 16) * x_inc_frac; + uint mul3 = 0xffffff - mul4; + uint mul13 = ((u64)mul1 * mul3) >> 24; + uint mul23 = ((u64)mul2 * mul3) >> 24; + uint mul14 = ((u64)mul1 * mul4) >> 24; + uint mul24 = ((u64)mul2 * mul4) >> 24; + pdest[0] = ((((psrc[0] * mul13) >> 24) + pbuf[0]) * final_mul) >> 16; + pbuf[0] = ((psrc[0] * mul23) >> 24) + a0; + pbuf[0 + IMAGE_SCALE_CHANNELS] += ((psrc[0 + IMAGE_SCALE_PIXEL_SIZE] * mul14) >> 24); + a0 = ((psrc[0 + IMAGE_SCALE_PIXEL_SIZE] * mul24) >> 24); +# if IMAGE_SCALE_CHANNELS >= 2 + pdest[1] = ((((psrc[1] * mul13) >> 24) + pbuf[1]) * final_mul) >> 16; + pbuf[1] = ((psrc[1] * mul23) >> 24) + a1; + pbuf[1 + IMAGE_SCALE_CHANNELS] += ((psrc[1 + IMAGE_SCALE_PIXEL_SIZE] * mul14) >> 24); + a1 = ((psrc[1 + IMAGE_SCALE_PIXEL_SIZE] * mul24) >> 24); +# endif +# if IMAGE_SCALE_CHANNELS >= 3 + pdest[2] = ((((psrc[2] * mul13) >> 24) + pbuf[2]) * final_mul) >> 16; + pbuf[2] = ((psrc[2] * mul23) >> 24) + a2; + pbuf[2 + IMAGE_SCALE_CHANNELS] += ((psrc[2 + IMAGE_SCALE_PIXEL_SIZE] * mul14) >> 24); + a2 = ((psrc[2 + IMAGE_SCALE_PIXEL_SIZE] * mul24) >> 24); +# endif +# if IMAGE_SCALE_CHANNELS >= 4 + pdest[3] = ((((psrc[3] * mul13) >> 24) + pbuf[3]) * final_mul) >> 16; + pbuf[3] = ((psrc[3] * mul23) >> 24) + a3; + pbuf[3 + IMAGE_SCALE_CHANNELS] += ((psrc[3 + IMAGE_SCALE_PIXEL_SIZE] * mul14) >> 24); + a3 = ((psrc[3 + IMAGE_SCALE_PIXEL_SIZE] * mul24) >> 24); +# endif + pbuf += IMAGE_SCALE_CHANNELS; + pdest += IMAGE_SCALE_PIXEL_SIZE; + } + psrc += IMAGE_SCALE_PIXEL_SIZE; + } + pdest[0] = (pbuf[0] * final_mul) >> 16; + pbuf[0] = a0; +# if IMAGE_SCALE_CHANNELS >= 2 + pdest[1] = (pbuf[1] * final_mul) >> 16; + pbuf[1] = a1; +# endif +# if IMAGE_SCALE_CHANNELS >= 3 + pdest[2] = (pbuf[2] * final_mul) >> 16; + pbuf[2] = a2; +# endif +# if IMAGE_SCALE_CHANNELS >= 4 + pdest[3] = (pbuf[3] * final_mul) >> 16; + pbuf[3] = a3; +# endif + } + } + pdest = rdest; + pbuf = buf; + for (uint cols_counter = dest->cols; cols_counter--; ) + { + pdest[0] = (pbuf[0] * final_mul) >> 16; +# if IMAGE_SCALE_CHANNELS >= 2 + pdest[1] = (pbuf[1] * final_mul) >> 16; +# endif +# if IMAGE_SCALE_CHANNELS >= 3 + pdest[2] = (pbuf[2] * final_mul) >> 16; +# endif +# if IMAGE_SCALE_CHANNELS >= 4 + pdest[3] = (pbuf[3] * final_mul) >> 16; +# endif + pbuf += IMAGE_SCALE_CHANNELS; + pdest += IMAGE_SCALE_PIXEL_SIZE; + } +} + +#undef IMAGE_SCALE_PREFIX +#undef IMAGE_SCALE_PIXEL_SIZE +#undef IMAGE_SCALE_CHANNELS diff --git a/libucw/images/scale.c b/libucw/images/scale.c new file mode 100644 index 0000000..2695696 --- /dev/null +++ b/libucw/images/scale.c @@ -0,0 +1,278 @@ +/* + * Image Library -- Image scaling algorithms + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include + +#include + +#ifdef __SSE2__ +#include +#endif + +#define LINEAR_INTERPOLATE(a, b, t) (((int)((a) << 16) + (int)(t) * ((int)(b) - (int)(a)) + 0x8000) >> 16) + +/* Generate optimized code for various pixel formats */ + +#define IMAGE_SCALE_PREFIX(x) image_scale_1_##x +#define IMAGE_SCALE_PIXEL_SIZE 1 +#include + +#define IMAGE_SCALE_PREFIX(x) image_scale_2_##x +#define IMAGE_SCALE_PIXEL_SIZE 2 +#include + +#define IMAGE_SCALE_PREFIX(x) image_scale_3_##x +#define IMAGE_SCALE_PIXEL_SIZE 3 +#include + +#define IMAGE_SCALE_PREFIX(x) image_scale_4_##x +#define IMAGE_SCALE_PIXEL_SIZE 4 +#include + +/* Simple "nearest neighbour" algorithm */ + +static void +image_scale_nearest_xy(struct image *dest, struct image *src) +{ + switch (src->pixel_size) + { + case 1: + image_scale_1_nearest_xy(dest, src); + return; + case 2: + image_scale_2_nearest_xy(dest, src); + return; + case 3: + image_scale_3_nearest_xy(dest, src); + return; + case 4: + image_scale_4_nearest_xy(dest, src); + return; + default: + ASSERT(0); + } +} + +static inline void +image_scale_nearest_x(struct image *dest, struct image *src) +{ + image_scale_nearest_xy(dest, src); +} + +static void +image_scale_nearest_y(struct image *dest, struct image *src) +{ + uint y_inc = (src->rows << 16) / dest->rows; + uint y_pos = y_inc >> 1; + byte *dest_pos = dest->pixels; + for (uint row_counter = dest->rows; row_counter--; ) + { + byte *src_pos = src->pixels + (y_pos >> 16) * src->row_size; + y_pos += y_inc; + memcpy(dest_pos, src_pos, dest->row_pixels_size); + dest_pos += dest->row_size; + } +} + +/* Bilinear filter */ + +UNUSED static void +image_scale_linear_y(struct image *dest, struct image *src) +{ + byte *dest_row = dest->pixels; + /* Handle problematic special case */ + if (src->rows == 1) + { + for (uint y_counter = dest->rows; y_counter--; dest_row += dest->row_size) + memcpy(dest_row, src->pixels, src->row_pixels_size); + return; + } + /* Initialize the main loop */ + uint y_inc = ((src->rows - 1) << 16) / (dest->rows - 1), y_pos = 0; +#ifdef __SSE2__ + __m128i zero = _mm_setzero_si128(); +#endif + /* Main loop */ + for (uint y_counter = dest->rows; --y_counter; ) + { + uint coef = y_pos & 0xffff; + byte *src_row_1 = src->pixels + (y_pos >> 16) * src->row_size; + byte *src_row_2 = src_row_1 + src->row_size; + uint i = 0; +#ifdef __SSE2__ + /* SSE2 */ + __m128i sse_coef = _mm_set1_epi16(coef >> 9); + for (; (int)i < (int)dest->row_pixels_size - 15; i += 16) + { + __m128i a2 = _mm_loadu_si128((__m128i *)(src_row_1 + i)); + __m128i a1 = _mm_unpacklo_epi8(a2, zero); + a2 = _mm_unpackhi_epi8(a2, zero); + __m128i b2 = _mm_loadu_si128((__m128i *)(src_row_2 + i)); + __m128i b1 = _mm_unpacklo_epi8(b2, zero); + b2 = _mm_unpackhi_epi8(b2, zero); + b1 = _mm_sub_epi16(b1, a1); + b2 = _mm_sub_epi16(b2, a2); + a1 = _mm_slli_epi16(a1, 7); + a2 = _mm_slli_epi16(a2, 7); + b1 = _mm_mullo_epi16(b1, sse_coef); + b2 = _mm_mullo_epi16(b2, sse_coef); + a1 = _mm_add_epi16(a1, b1); + a2 = _mm_add_epi16(a2, b2); + a1 = _mm_srli_epi16(a1, 7); + a2 = _mm_srli_epi16(a2, 7); + a1 = _mm_packus_epi16(a1, a2); + _mm_storeu_si128((__m128i *)(dest_row + i), a1); + } +#endif + /* Unrolled loop using general-purpose registers */ + for (; (int)i < (int)dest->row_pixels_size - 3; i += 4) + { + dest_row[i + 0] = LINEAR_INTERPOLATE(src_row_1[i + 0], src_row_2[i + 0], coef); + dest_row[i + 1] = LINEAR_INTERPOLATE(src_row_1[i + 1], src_row_2[i + 1], coef); + dest_row[i + 2] = LINEAR_INTERPOLATE(src_row_1[i + 2], src_row_2[i + 2], coef); + dest_row[i + 3] = LINEAR_INTERPOLATE(src_row_1[i + 3], src_row_2[i + 3], coef); + } + /* Remaining columns */ + for (; i < dest->row_pixels_size; i++) + dest_row[i] = LINEAR_INTERPOLATE(src_row_1[i], src_row_2[i], coef); + dest_row += dest->row_size; + y_pos += y_inc; + } + /* Always copy the last row - faster and also handle "y_pos == dest->rows * 0x10000" overflow */ + memcpy(dest_row, src->pixels + src->image_size - src->row_size, src->row_pixels_size); +} + +/* Box filter */ + +static void +image_scale_downsample_xy(struct image *dest, struct image *src) +{ + switch (src->pixel_size) + { + case 1: + image_scale_1_downsample_xy(dest, src); + return; + case 2: + image_scale_2_downsample_xy(dest, src); + return; + case 3: + image_scale_3_downsample_xy(dest, src); + return; + case 4: + image_scale_4_downsample_xy(dest, src); + return; + default: + ASSERT(0); + } +} + +/* General routine + * FIXME: customizable; implement at least bilinear and bicubic filters */ + +int +image_scale(struct image_context *ctx, struct image *dest, struct image *src) +{ + if ((src->flags & IMAGE_PIXEL_FORMAT) != (dest->flags & IMAGE_PIXEL_FORMAT)) + { + IMAGE_ERROR(ctx, IMAGE_ERROR_INVALID_PIXEL_FORMAT, "Different pixel formats not supported."); + return 0; + } + if (dest->cols == src->cols) + { + if (dest->rows == src->rows) + { + /* No scale, copy only */ + image_scale_nearest_y(dest, src); + return 1; + } + else if (dest->rows < src->rows) + { + /* Downscale vertically */ + image_scale_downsample_xy(dest, src); + return 1; + } + else + { + /* Upscale vertically */ + image_scale_nearest_y(dest, src); + return 1; + } + } + else if (dest->rows == src->rows) + { + if (dest->cols < src->cols) + { + /* Downscale horizontally */ + image_scale_downsample_xy(dest, src); + return 1; + } + else + { + /* Upscale horizontally */ + image_scale_nearest_x(dest, src); + return 1; + } + } + else + { + if (dest->cols <= src->cols && dest->rows <= src->rows) + { + /* Downscale in both dimensions */ + image_scale_downsample_xy(dest, src); + return 1; + } + else + { + image_scale_nearest_xy(dest, src); + return 1; + } + } +} + +void +image_dimensions_fit_to_box(uint *cols, uint *rows, uint max_cols, uint max_rows, uint upsample) +{ + ASSERT(image_dimensions_valid(*cols, *rows)); + ASSERT(image_dimensions_valid(max_cols, max_rows)); + if (*cols <= max_cols && *rows <= max_rows) + { + if (!upsample) + return; + if (max_cols * *rows > max_rows * *cols) + { + *cols = *cols * max_rows / *rows; + *cols = MIN(*cols, max_cols); + *rows = max_rows; + } + else + { + *rows = *rows * max_cols / *cols; + *rows = MIN(*rows, max_rows); + *cols = max_cols; + } + } + else if (*cols <= max_cols) + goto down_cols; + else if (*rows <= max_rows || max_rows * *cols > max_cols * *rows) + goto down_rows; +down_cols: + *cols = *cols * max_rows / *rows; + *cols = MAX(*cols, 1); + *rows = max_rows; + return; +down_rows: + *rows = *rows * max_cols / *cols; + *rows = MAX(*rows, 1); + *cols = max_cols; +} diff --git a/libucw/images/sig-cmp-gen.h b/libucw/images/sig-cmp-gen.h new file mode 100644 index 0000000..15d92da --- /dev/null +++ b/libucw/images/sig-cmp-gen.h @@ -0,0 +1,389 @@ +#ifdef CONFIG_UCW_CLEAN_ABI +#define image_signatures_dist ucw_image_signatures_dist +#define image_signatures_dist_explain ucw_image_signatures_dist_explain +#endif + +#ifdef EXPLAIN +# define MSG(x...) do{ line += sprintf(line, x); }while(0) +# define LINE do{ line = buf; msg(line, param); }while(0) + +static void explain_signature(struct image_signature *sig, void (*msg)(byte *text, void *param), void *param) +{ + byte buf[1024], *line = buf; + MSG("signature: flags=0x%x df=%u dh=%u f=(%u", sig->flags, sig->df, sig->dh, sig->vec.f[0]); + for (uint i = 1; i < IMAGE_VEC_F; i++) + MSG(" %u", sig->vec.f[i]); + MSG(")"); + LINE; + for (uint j = 0; j < sig->len; j++) + { + struct image_region *reg = sig->reg + j; + MSG("region %u: wa=%u wb=%u f=(%u", j, reg->wa, reg->wb, reg->f[0]); + for (uint i = 1; i < IMAGE_VEC_F; i++) + MSG(" %u", reg->f[i]); + MSG(") h=(%u", reg->h[0]); + for (uint i = 1; i < IMAGE_REG_H; i++) + MSG(" %u", reg->h[i]); + MSG(")"); + LINE; + } +} + +#else +# define MSG(x...) do{}while(0) +# define LINE do{}while(0) +#endif + +#define MSGL(x...) do{ MSG(x); LINE; }while(0) + +#ifndef EXPLAIN +static uint image_signatures_dist_integrated(struct image_signature *sig1, struct image_signature *sig2) +#else +static uint image_signatures_dist_integrated_explain(struct image_signature *sig1, struct image_signature *sig2, void (*msg)(byte *text, void *param), void *param) +#endif +{ + uint dist[IMAGE_REG_MAX * IMAGE_REG_MAX], p[IMAGE_REG_MAX], q[IMAGE_REG_MAX]; + uint n, i, j, k, l, s, d; + struct image_region *reg1, *reg2; +#ifdef EXPLAIN + byte buf[1024], *line = buf; + MSGL("Integrated matching"); + explain_signature(sig1, msg, param); + explain_signature(sig2, msg, param); +#endif + + /* FIXME: do not mux textured and non-textured images (should be split in clusters tree) */ + if ((sig1->flags ^ sig2->flags) & IMAGE_SIG_TEXTURED) + { + MSGL("Textured vs non-textured"); + return ~0U; + } + + /* Compute distance matrix */ + n = 0; + MSGL("Distance matrix:"); + /* ... for non-textured images */ + if (!((sig1->flags | sig2->flags) & IMAGE_SIG_TEXTURED)) + for (j = 0, reg2 = sig2->reg; j < sig2->len; j++, reg2++) + for (i = 0, reg1 = sig1->reg; i < sig1->len; i++, reg1++) + { + uint dt = 0, ds = 0, dp = 0, d; + for (uint i = 0; i < IMAGE_VEC_F; i++) + dt += image_sig_cmp_features_weights[i] * isqr((int)reg1->f[i] - (int)reg2->f[i]); + for (uint i = 0; i < 3; i++) + ds += image_sig_cmp_features_weights[IMAGE_VEC_F + i] * isqr((int)reg1->h[i] - (int)reg2->h[i]); + for (uint i = 3; i < 5; i++) + dp += image_sig_cmp_features_weights[IMAGE_VEC_F + i] * isqr((int)reg1->h[i] - (int)reg2->h[i]); +#if 0 + int x1, y1, x2, y2; + if (sig1->cols > sig1->rows) + { + x1 = reg1->h[3]; + y1 = ((int)reg1->h[4] - 64) * (int)sig1->rows / (int)sig1->cols + 64; + } + else + { + y1 = reg1->h[4]; + x1 = ((int)reg1->h[3] - 64) * (int)sig1->cols / (int)sig1->rows + 64; + } + if (sig2->cols > sig2->rows) + { + x2 = reg2->h[3]; + y2 = ((int)reg2->h[4] - 64) * (int)sig2->rows / (int)sig2->cols + 64; + } + else + { + y2 = reg2->h[4]; + x2 = ((int)reg2->h[3] - 64) * (int)sig2->cols / (int)sig2->rows + 64; + } + MSGL("%d %d %d %d", x1, y1, x2, y2); + dp = image_sig_cmp_features_weights[IMAGE_VEC_F + 3] * isqr(x1 - x2) + + image_sig_cmp_features_weights[IMAGE_VEC_F + 4] * isqr(y1 - y2); +#endif +#if 0 + d = dt * (4 + MIN(8, (ds >> 12))) * (4 + MIN(8, (dp >> 10))) + (ds >> 11) + (dp >> 10); + MSG("[%u, %u] d=%u=(%u * %u * %u + %u + %u) dt=%u ds=%u dp=%u df=(%d", i, j, d, + dt, 4 + MIN(8, (ds >> 12)), 4 + MIN(8, dp >> 10), ds >> 11, dp >> 10, dt, ds, dp, (int)reg1->f[0] - (int)reg2->f[0]); +#endif +#if 1 + d = dt; + if (ds < 1000) + d = d * 4; + else if (ds < 4000) + d = d * 6 + 8; + else if (ds < 10000) + d = d * 8 + 20; + else if (ds < 50000) + d = d * 10 + 50; + else + d = d * 12 + 100; + if (dp < 1000) + d = d * 2; + else if (dp < 4000) + d = d * 3 + 100; + else if (dp < 10000) + d = d * 4 + 800; + else + d = d * 5 + 3000; +#endif + dist[n++] = (d << 8) + i + (j << 4); + MSG("[%u, %u] d=%u dt=%u ds=%u dp=%u df=(%d", i, j, d, dt, ds, dp, (int)reg1->f[0] - (int)reg2->f[0]); +#ifdef EXPLAIN + for (uint i = 1; i < IMAGE_VEC_F; i++) + MSG(" %d", (int)reg1->f[i] - (int)reg2->f[i]); + MSG(") dh=(%d", (int)reg1->h[0] - (int)reg2->h[0]); + for (uint i = 1; i < IMAGE_REG_H; i++) + MSG(" %d", (int)reg1->h[i] - (int)reg2->h[i]); + MSGL(")"); +#endif + } + /* ... for textured images (ignore shape properties) */ + else + for (j = 0, reg2 = sig2->reg; j < sig2->len; j++, reg2++) + for (i = 0, reg1 = sig1->reg; i < sig1->len; i++, reg1++) + { + uint dt = 0; + for (uint i = 0; i < IMAGE_VEC_F; i++) + dt += image_sig_cmp_features_weights[i] * isqr((int)reg1->f[i] - (int)reg2->f[i]); + dist[n++] = (dt << 12) + i + (j << 4); +#ifdef EXPLAIN + MSG("[%u, %u] dt=%u df=(%d", i, j, dt, (int)reg1->f[0] - (int)reg2->f[0]); + for (uint i = 1; i < IMAGE_VEC_F; i++) + MSG(" %d", (int)reg1->f[i] - (int)reg2->f[i]); + MSGL(")"); +#endif + } + + /* One or both signatures have no regions */ + if (!n) + return ~0U; + + /* Get percentages */ + for (i = 0, reg1 = sig1->reg; i < sig1->len; i++, reg1++) + p[i] = reg1->wb; + for (i = 0, reg2 = sig2->reg; i < sig2->len; i++, reg2++) + q[i] = reg2->wb; + + /* Sort entries in distance matrix */ + image_signatures_dist_integrated_sort(dist, n); + + /* Compute significance matrix and resulting distance */ + uint sum = 0; + MSGL("Significance matrix:"); + for (k = 0, l = 128; l; k++) + { + i = dist[k] & 15; + j = (dist[k] >> 4) & 15; + d = dist[k] >> 8; + if (p[i] <= q[j]) + { + s = p[i]; + q[j] -= p[i]; + p[i] = 0; + } + else + { + s = q[j]; + p[i] -= q[j]; + q[j] = 0; + } + l -= s; + sum += s * d; +#ifdef EXPLAIN + reg1 = sig1->reg + i; + reg2 = sig2->reg + j; + MSG("[%u, %u] s=%u d=%u df=(%d", i, j, s, d, (int)reg1->f[0] - (int)reg2->f[0]); + for (uint i = 1; i < IMAGE_VEC_F; i++) + MSG(" %d", (int)reg1->f[i] - (int)reg2->f[i]); + if (!((sig1->flags | sig2->flags) & IMAGE_SIG_TEXTURED)) + { + MSG(") dh=(%d", (int)reg1->h[0] - (int)reg2->h[0]); + for (uint i = 1; i < IMAGE_REG_H; i++) + MSG(" %d", (int)reg1->h[i] - (int)reg2->h[i]); + } + MSGL(")"); +#endif + } + + d = sum / 32; + + uint a = sig1->cols * sig2->rows; + uint b = sig1->rows * sig2->cols; + if (a < 2 * b && b < 2 * a) + d = d * 2; + else if (a < 4 * b && b < 4 * a) + d = d * 3; + else + d = d * 5; + + a = sig1->cols * sig1->rows; + b = sig2->cols * sig2->rows; + + if ((a < 1000 && b > 5000) || (b < 1000 && a > 5000)) + d = d * 2; + else if ((a < 5000 && b > 20000) || (b < 5000 && a > 20000)) + d = d * 3 / 2; + + return d; +} + +#ifndef EXPLAIN +static uint image_signatures_dist_fuzzy(struct image_signature *sig1, struct image_signature *sig2) +#else +static uint image_signatures_dist_fuzzy_explain(struct image_signature *sig1, struct image_signature *sig2, void (*msg)(byte *text, void *param), void *param) +#endif +{ +#ifdef EXPLAIN + byte buf[1024], *line = buf; + MSGL("Fuzzy matching"); + explain_signature(sig1, msg, param); + explain_signature(sig2, msg, param); +#endif + + /* FIXME: do not mux textured and non-textured images (should be split in clusters tree) */ + if ((sig1->flags ^ sig2->flags) & IMAGE_SIG_TEXTURED) + { + MSGL("Textured vs non-textured"); + return ~0U; + } + + uint cnt1 = sig1->len; + uint cnt2 = sig2->len; + struct image_region *reg1 = sig1->reg; + struct image_region *reg2 = sig2->reg; + uint mf[IMAGE_REG_MAX][IMAGE_REG_MAX], mh[IMAGE_REG_MAX][IMAGE_REG_MAX]; + uint lf[IMAGE_REG_MAX * 2], lh[IMAGE_REG_MAX * 2]; + uint df = sig1->df + sig2->df, dh = sig1->dh + sig2->dh; + + /* Compute distance matrix */ + for (uint i = 0; i < cnt1; i++) + for (uint j = 0; j < cnt2; j++) + { + uint d = 0; + for (uint k = 0; k < IMAGE_VEC_F; k++) + { + int dif = reg1[i].f[k] - reg2[j].f[k]; + d += image_sig_cmp_features_weights[k] * dif * dif; + } + mf[i][j] = d; + d = 0; + for (uint k = 0; k < IMAGE_REG_H; k++) + { + int dif = reg1[i].h[k] - reg2[j].h[k]; + d += image_sig_cmp_features_weights[k + IMAGE_VEC_F] * dif * dif; + } + mh[i][j] = d; + } + + uint lfs = 0, lhs = 0; + for (uint i = 0; i < cnt1; i++) + { + uint f = mf[i][0], h = mh[i][0]; + for (uint j = 1; j < cnt2; j++) + { + f = MIN(f, mf[i][j]); + h = MIN(h, mh[i][j]); + } + lf[i] = (df * 0x10000) / (df + fast_sqrt_u32(f)); + lh[i] = (dh * 0x10000) / (dh + fast_sqrt_u32(h)); + lfs += lf[i] * (6 * reg1[i].wa + 2 * reg1[i].wb); + lhs += lh[i] * reg1[i].wa; + } + for (uint i = 0; i < cnt2; i++) + { + uint f = mf[0][i], h = mh[0][i]; + for (uint j = 1; j < cnt1; j++) + { + f = MIN(f, mf[j][i]); + h = MIN(h, mh[j][i]); + } + lf[i + cnt1] = (df * 0x10000) / (df + fast_sqrt_u32(f)); + lh[i + cnt1] = (dh * 0x10000) / (dh + fast_sqrt_u32(h)); + lfs += lf[i] * (6 * reg2[i].wa + 2 * reg2[i].wb); + lhs += lh[i] * reg2[i].wa; + } + + uint measure = lfs * 6 + lhs * 2 * 8; + +#ifdef EXPLAIN + /* Display similarity vectors */ + MSG("Lf=("); + for (uint i = 0; i < cnt1 + cnt2; i++) + { + if (i) + MSG(" "); + if (i == cnt1) + MSG("~ "); + MSG("%.4f", (double)lf[i] / 0x10000); + } + MSGL(")"); + MSG("Lh=("); + for (uint i = 0; i < cnt1 + cnt2; i++) + { + if (i) + MSG(" "); + if (i == cnt1) + MSG("~ "); + MSG("%.4f", (double)lh[i] / 0x10000); + } + MSGL(")"); + MSGL("Lfm=%.4f", lfs / (double)(1 << (3 + 8 + 16))); + MSGL("Lhm=%.4f", lhs / (double)(1 << (8 + 16))); + MSGL("measure=%.4f", measure / (double)(1 << (3 + 3 + 8 + 16))); +#endif + + return (1 << (3 + 3 + 8 + 16)) - measure; +} + +#ifndef EXPLAIN +static uint image_signatures_dist_average(struct image_signature *sig1, struct image_signature *sig2) +#else +static uint image_signatures_dist_average_explain(struct image_signature *sig1, struct image_signature *sig2, void (*msg)(byte *text, void *param), void *param) +#endif +{ +#ifdef EXPLAIN + byte buf[1024], *line = buf; + MSGL("Average matching"); +#endif + + uint dist = 0; + for (uint i = 0; i < IMAGE_VEC_F; i++) + { + uint d = image_sig_cmp_features_weights[0] * isqr((int)sig1->vec.f[i] - (int)sig2->vec.f[i]); + MSGL("feature %u: d=%u (%u %u)", i, d, sig1->vec.f[i], sig2->vec.f[i]); + dist += d; + } + + MSGL("dist=%u", dist); + return dist; +} + +#ifndef EXPLAIN +#define CALL(x) image_signatures_dist_##x(sig1, sig2) +uint image_signatures_dist(struct image_signature *sig1, struct image_signature *sig2) +#else +#define CALL(x) image_signatures_dist_##x##_explain(sig1, sig2, msg, param) +uint image_signatures_dist_explain(struct image_signature *sig1, struct image_signature *sig2, void (*msg)(byte *text, void *param), void *param) +#endif +{ + if (!sig1->len) + return CALL(average); + else + switch (image_sig_compare_method) + { + case 0: + return CALL(integrated); + case 1: + return CALL(fuzzy); + case 2: + return CALL(average); + default: + ASSERT(0); + } +} +#undef CALL + +#undef EXPLAIN +#undef MSG +#undef LINE +#undef MSGL diff --git a/libucw/images/sig-cmp.c b/libucw/images/sig-cmp.c new file mode 100644 index 0000000..e7ed0df --- /dev/null +++ b/libucw/images/sig-cmp.c @@ -0,0 +1,25 @@ +/* + * Image Library -- Comparisions of image signatures + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include + +#include + +#define ASORT_PREFIX(x) image_signatures_dist_integrated_##x +#define ASORT_KEY_TYPE uint +#include + +#define EXPLAIN +#include +#include diff --git a/libucw/images/sig-dump.c b/libucw/images/sig-dump.c new file mode 100644 index 0000000..7163fa4 --- /dev/null +++ b/libucw/images/sig-dump.c @@ -0,0 +1,52 @@ +/* + * Image Library -- Dumping of image signatures + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include + +byte * +image_vector_dump(byte *buf, struct image_vector *vec) +{ + byte *p = buf; + *p++ = '('; + for (uint i = 0; i < IMAGE_VEC_F; i++) + { + if (i) + *p++ = ' '; + p += sprintf(p, "%u", vec->f[i]); + } + *p++ = ')'; + *p = 0; + return buf; +} + +byte * +image_region_dump(byte *buf, struct image_region *reg) +{ + byte *p = buf; + p += sprintf(p, "(txt="); + for (uint i = 0; i < IMAGE_REG_F; i++) + { + if (i) + *p++ = ' '; + p += sprintf(p, "%u", reg->f[i]); + } + p += sprintf(p, " shp="); + for (uint i = 0; i < IMAGE_REG_H; i++) + { + if (i) + *p++ = ' '; + p += sprintf(p, "%u", reg->h[i]); + } + p += sprintf(p, " wa=%u wb=%u)", reg->wa, reg->wb); + *p = 0; + return buf; +} diff --git a/libucw/images/sig-init.c b/libucw/images/sig-init.c new file mode 100644 index 0000000..37ee502 --- /dev/null +++ b/libucw/images/sig-init.c @@ -0,0 +1,324 @@ +/* + * Image Library -- Computation of image signatures + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +int +image_sig_init(struct image_context *ctx, struct image_sig_data *data, struct image *image) +{ + ASSERT((image->flags & IMAGE_PIXEL_FORMAT) == COLOR_SPACE_RGB); + data->image = image; + data->flags = 0; + data->cols = (image->cols + 3) >> 2; + data->rows = (image->rows + 3) >> 2; + data->full_cols = image->cols >> 2; + data->full_rows = image->rows >> 2; + data->blocks_count = data->cols * data->rows; + if (data->blocks_count >= 0x10000) + { + IMAGE_ERROR(ctx, IMAGE_ERROR_INVALID_DIMENSIONS, "Image too large for implemented signature algorithm."); + return 0; + } + data->blocks = xmalloc(data->blocks_count * sizeof(struct image_sig_block)); + data->area = image->cols * image->rows; + DBG("Computing signature for image of %ux%u pixels (%ux%u blocks)", + image->cols, image->rows, data->cols, data->rows); + return 1; +} + +void +image_sig_preprocess(struct image_sig_data *data) +{ + struct image *image = data->image; + struct image_sig_block *block = data->blocks; + uint sum[IMAGE_VEC_F]; + bzero(sum, sizeof(sum)); + + /* Every block of 4x4 pixels */ + byte *row_start = image->pixels; + for (uint block_y = 0; block_y < data->rows; block_y++, row_start += image->row_size * 4) + { + byte *p = row_start; + for (uint block_x = 0; block_x < data->cols; block_x++, p += 12, block++) + { + int t[16], s[16], *tp = t; + block->x = block_x; + block->y = block_y; + + /* Convert pixels to Luv color space and compute average coefficients */ + uint l_sum = 0, u_sum = 0, v_sum = 0; + byte *p2 = p; + if (block_x < data->full_cols && block_y < data->full_rows) + { + for (uint y = 0; y < 4; y++, p2 += image->row_size - 12) + for (uint x = 0; x < 4; x++, p2 += 3) + { + byte luv[3]; + srgb_to_luv_pixel(luv, p2); + l_sum += *tp++ = luv[0] / 4; + u_sum += luv[1]; + v_sum += luv[2]; + } + block->area = 16; + sum[0] += l_sum; + sum[1] += u_sum; + sum[2] += v_sum; + block->v[0] = (l_sum >> 4); + block->v[1] = (u_sum >> 4); + block->v[2] = (v_sum >> 4); + } + /* Incomplete square near the edge */ + else + { + uint x, y; + uint square_cols = (block_x < data->full_cols) ? 4 : image->cols & 3; + uint square_rows = (block_y < data->full_rows) ? 4 : image->rows & 3; + for (y = 0; y < square_rows; y++, p2 += image->row_size) + { + byte *p3 = p2; + for (x = 0; x < square_cols; x++, p3 += 3) + { + byte luv[3]; + srgb_to_luv_pixel(luv, p3); + l_sum += *tp++ = luv[0] / 4; + u_sum += luv[1]; + v_sum += luv[2]; + } + for (; x < 4; x++) + { + *tp = tp[-(int)square_cols]; + tp++; + } + } + for (; y < 4; y++) + for (x = 0; x < 4; x++) + { + *tp = tp[-(int)square_rows * 4]; + tp++; + } + block->area = square_cols * square_rows; + uint inv = 0x10000 / block->area; + sum[0] += l_sum; + sum[1] += u_sum; + sum[2] += v_sum; + block->v[0] = (l_sum * inv) >> 16; + block->v[1] = (u_sum * inv) >> 16; + block->v[2] = (v_sum * inv) >> 16; + } + + /* Apply Daubechies wavelet transformation */ + +# define DAUB_0 31651 /* (1 + sqrt 3) / (4 * sqrt 2) * 0x10000 */ +# define DAUB_1 54822 /* (3 + sqrt 3) / (4 * sqrt 2) * 0x10000 */ +# define DAUB_2 14689 /* (3 - sqrt 3) / (4 * sqrt 2) * 0x10000 */ +# define DAUB_3 -8481 /* (1 - sqrt 3) / (4 * sqrt 2) * 0x10000 */ + + /* ... to the rows */ + uint i; + for (i = 0; i < 16; i += 4) + { + s[i + 0] = (DAUB_0 * t[i + 2] + DAUB_1 * t[i + 3] + DAUB_2 * t[i + 0] + DAUB_3 * t[i + 1]) / 0x10000; + s[i + 1] = (DAUB_0 * t[i + 0] + DAUB_1 * t[i + 1] + DAUB_2 * t[i + 2] + DAUB_3 * t[i + 3]) / 0x10000; + s[i + 2] = (DAUB_3 * t[i + 2] - DAUB_2 * t[i + 3] + DAUB_1 * t[i + 0] - DAUB_0 * t[i + 1]) / 0x10000; + s[i + 3] = (DAUB_3 * t[i + 0] - DAUB_2 * t[i + 1] + DAUB_1 * t[i + 2] - DAUB_0 * t[i + 3]) / 0x10000; + } + + /* ... and to the columns... skip LL band */ + for (i = 0; i < 2; i++) + { + t[i + 8] = (DAUB_3 * s[i + 8] - DAUB_2 * s[i +12] + DAUB_1 * s[i + 0] - DAUB_0 * s[i + 4]) / 0x10000; + t[i +12] = (DAUB_3 * s[i + 0] - DAUB_2 * s[i + 4] + DAUB_1 * s[i + 8] - DAUB_0 * s[i +12]) / 0x10000; + } + for (; i < 4; i++) + { + t[i + 0] = (DAUB_0 * s[i + 8] + DAUB_1 * s[i +12] + DAUB_2 * s[i + 0] + DAUB_3 * s[i + 4]) / 0x10000; + t[i + 4] = (DAUB_0 * s[i + 0] + DAUB_1 * s[i + 4] + DAUB_2 * s[i + 8] + DAUB_3 * s[i +12]) / 0x10000; + t[i + 8] = (DAUB_3 * s[i + 8] - DAUB_2 * s[i +12] + DAUB_1 * s[i + 0] - DAUB_0 * s[i + 4]) / 0x10000; + t[i +12] = (DAUB_3 * s[i + 0] - DAUB_2 * s[i + 4] + DAUB_1 * s[i + 8] - DAUB_0 * s[i +12]) / 0x10000; + } + + /* Extract energies in LH, HL and HH bands */ + block->v[3] = fast_sqrt_u32(isqr(t[8]) + isqr(t[9]) + isqr(t[12]) + isqr(t[13])); + block->v[4] = fast_sqrt_u32(isqr(t[2]) + isqr(t[3]) + isqr(t[6]) + isqr(t[7])); + block->v[5] = fast_sqrt_u32(isqr(t[10]) + isqr(t[11]) + isqr(t[14]) + isqr(t[15])); + sum[3] += block->v[3] * block->area; + sum[4] += block->v[4] * block->area; + sum[5] += block->v[5] * block->area; + } + } + + /* Compute featrures average */ + uint inv = 0xffffffffU / data->area; + for (uint i = 0; i < IMAGE_VEC_F; i++) + data->f[i] = ((u64)sum[i] * inv) >> 32; + + if (image->cols < image_sig_min_width || image->rows < image_sig_min_height) + { + data->valid = 0; + data->regions_count = 0; + } + else + data->valid = 1; +} + +void +image_sig_finish(struct image_sig_data *data, struct image_signature *sig) +{ + for (uint i = 0; i < IMAGE_VEC_F; i++) + sig->vec.f[i] = data->f[i]; + sig->len = data->regions_count; + sig->flags = data->flags; + if (!sig->len) + return; + + /* For each region */ + u64 w_total = 0; + uint w_border = MIN(data->cols, data->rows) * image_sig_border_size; + int w_mul = w_border ? image_sig_border_bonus * 256 / (int)w_border : 0; + for (uint i = 0; i < sig->len; i++) + { + struct image_sig_region *r = data->regions + i; + DBG("Processing region %u: count=%u", i, r->count); + ASSERT(r->count); + + /* Copy texture properties */ + sig->reg[i].f[0] = r->a[0]; + sig->reg[i].f[1] = r->a[1]; + sig->reg[i].f[2] = r->a[2]; + sig->reg[i].f[3] = r->a[3]; + sig->reg[i].f[4] = r->a[4]; + sig->reg[i].f[5] = r->a[5]; + + /* Compute coordinates centroid and region weight */ + u64 x_sum = 0, y_sum = 0, w_sum = 0; + for (struct image_sig_block *b = r->blocks; b; b = b->next) + { + x_sum += b->x; + y_sum += b->y; + uint d = b->x; + d = MIN(d, b->y); + d = MIN(d, data->cols - b->x - 1); + d = MIN(d, data->rows - b->y - 1); + if (d >= w_border) + w_sum += 128; + else + w_sum += 128 + (int)(w_border - d) * w_mul / 256; + } + w_total += w_sum; + r->w_sum = w_sum; + uint x_avg = x_sum / r->count; + uint y_avg = y_sum / r->count; + DBG(" centroid=(%u %u)", x_avg, y_avg); + + /* Compute normalized inertia */ + u64 sum1 = 0, sum2 = 0, sum3 = 0; + for (struct image_sig_block *b = r->blocks; b; b = b->next) + { + uint inc2 = isqr(x_avg - b->x) + isqr(y_avg - b->y); + uint inc1 = fast_sqrt_u32(inc2); + sum1 += inc1; + sum2 += inc2; + sum3 += inc1 * inc2; + } + sig->reg[i].h[0] = CLAMP(image_sig_inertia_scale[0] * sum1 * ((3 * M_PI * M_PI) / 2) * pow(r->count, -1.5), 0, 255); + sig->reg[i].h[1] = CLAMP(image_sig_inertia_scale[1] * sum2 * ((4 * M_PI * M_PI * M_PI) / 2) / ((u64)r->count * r->count), 0, 255); + sig->reg[i].h[2] = CLAMP(image_sig_inertia_scale[2] * sum3 * ((5 * M_PI * M_PI * M_PI * M_PI) / 2) * pow(r->count, -2.5), 0, 255); + sig->reg[i].h[3] = (uint)x_avg * 127 / data->cols; + sig->reg[i].h[4] = (uint)y_avg * 127 / data->rows; + } + + /* Compute average differences */ + u64 df = 0, dh = 0; + + if (sig->len < 2) + { + sig->df = 1; + sig->dh = 1; + } + else + { + uint cnt = 0; + for (uint i = 0; i < sig->len; i++) + for (uint j = i + 1; j < sig->len; j++) + { + uint d = 0; + for (uint k = 0; k < IMAGE_REG_F; k++) + d += image_sig_cmp_features_weights[k] * isqr(sig->reg[i].f[k] - sig->reg[j].f[k]); + df += fast_sqrt_u32(d); + d = 0; + for (uint k = 0; k < IMAGE_REG_H; k++) + d += image_sig_cmp_features_weights[k + IMAGE_REG_F] * isqr(sig->reg[i].h[k] - sig->reg[j].h[k]); + dh += fast_sqrt_u32(d); + cnt++; + } + sig->df = CLAMP(df / cnt, 1, 0xffff); + sig->dh = CLAMP(dh / cnt, 1, 0xffff); + } + DBG("Average regions difs: df=%u dh=%u", sig->df, sig->dh); + + /* Compute normalized weights */ + uint wa = 128, wb = 128; + for (uint i = sig->len; --i > 0; ) + { + struct image_sig_region *r = data->regions + i; + wa -= sig->reg[i].wa = CLAMP(r->count * 128 / data->blocks_count, 1, (int)(wa - i)); + wb -= sig->reg[i].wb = CLAMP(r->w_sum * 128 / w_total, 1, (int)(wb - i)); + } + sig->reg[0].wa = wa; + sig->reg[0].wb = wb; + + /* Store image dimensions */ + sig->cols = data->image->cols; + sig->rows = data->image->rows; + + /* Dump regions features */ +#ifdef LOCAL_DEBUG + for (uint i = 0; i < sig->len; i++) + { + byte buf[IMAGE_REGION_DUMP_MAX]; + image_region_dump(buf, sig->reg + i); + DBG("region %u: features=%s", i, buf); + } +#endif +} + +void +image_sig_cleanup(struct image_sig_data *data) +{ + xfree(data->blocks); +} + +int +compute_image_signature(struct image_context *ctx, struct image_signature *sig, struct image *image) +{ + struct image_sig_data data; + if (!image_sig_init(ctx, &data, image)) + return 0; + image_sig_preprocess(&data); + if (data.valid) + { + image_sig_segmentation(&data); + image_sig_detect_textured(&data); + } + image_sig_finish(&data, sig); + image_sig_cleanup(&data); + return 1; +} diff --git a/libucw/images/sig-seg.c b/libucw/images/sig-seg.c new file mode 100644 index 0000000..b89cac9 --- /dev/null +++ b/libucw/images/sig-seg.c @@ -0,0 +1,348 @@ +/* + * Image Library -- Image segmentation + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include +#include +#include + +#include + +#ifdef LOCAL_DEBUG +static void +dump_segmentation(struct image_sig_region *regions, uint regions_count) +{ + uint cols = 0, rows = 0; + for (uint i = 0; i < regions_count; i++) + for (struct image_sig_block *b = regions[i].blocks; b; b = b->next) + { + cols = MAX(cols, b->x + 1); + rows = MAX(rows, b->y + 1); + } + uint size = (cols + 1) * rows; + byte buf[size]; + bzero(buf, size); + for (uint i = 0; i < regions_count; i++) + { + byte c = (i < 10) ? '0' + i : 'A' - 10 + i; + for (struct image_sig_block *b = regions[i].blocks; b; b = b->next) + buf[b->x + b->y * (cols + 1)] = c; + } + for (uint i = 0; i < rows; i++) + log(L_DEBUG, "%s", &buf[i * (cols + 1)]); +} +#endif + +/* Pre-quantization - recursively split groups of blocks with large error */ + +static inline void +prequant_init_region(struct image_sig_region *region) +{ + bzero(region, sizeof(*region)); +} + +static inline void +prequant_add_block(struct image_sig_region *region, struct image_sig_block *block) +{ + block->next = region->blocks; + region->blocks = block; + region->count++; + for (uint i = 0; i < IMAGE_VEC_F; i++) + { + region->b[i] += block->v[i]; + region->c[i] += isqr(block->v[i]); + } +} + +static void +prequant_finish_region(struct image_sig_region *region) +{ + if (region->count < 2) + { + region->e = 0; + } + else + { + u64 a = 0; + region->e = 0; + for (uint i = 0; i < IMAGE_VEC_F; i++) + { + region->e += region->c[i]; + a += (u64)region->b[i] * region->b[i]; + } + region->e -= a / region->count; + DBG("Finished region %u", (uint)region->e / region->count); + } +} + +static inline uint +prequant_heap_cmp(struct image_sig_region *a, struct image_sig_region *b) +{ + return a->e > b->e; +} + +#define ASORT_PREFIX(x) prequant_##x +#define ASORT_KEY_TYPE uint +#include + +static uint +prequant(struct image_sig_block *blocks, uint blocks_count, struct image_sig_region *regions) +{ + DBG("Starting pre-quantization"); + + uint regions_count, heap_count, axis; + struct image_sig_block *blocks_end = blocks + blocks_count, *block, *block2; + struct image_sig_region *heap[IMAGE_REG_MAX + 1], *region, *region2; + + /* Initialize single region with all blocks */ + regions_count = heap_count = 1; + heap[1] = regions; + prequant_init_region(regions); + for (block = blocks; block != blocks_end; block++) + prequant_add_block(regions, block); + prequant_finish_region(regions); + + /* Main cycle */ + while (regions_count < IMAGE_REG_MAX && + regions_count <= DARY_LEN(image_sig_prequant_thresholds) && heap_count) + { + region = heap[1]; + DBG("Step... regions_count=%u heap_count=%u region->count=%u, region->e=%u", + regions_count, heap_count, region->count, (uint)region->e); + if (region->count < 2 || + region->e < image_sig_prequant_thresholds[regions_count - 1] * blocks_count) + { + HEAP_DELETE_MIN(struct image_sig_region *, heap, heap_count, prequant_heap_cmp, HEAP_SWAP); + continue; + } + + /* Select axis to split - the one with maximum average quadratic error */ + axis = 0; + u64 cov = (u64)region->count * region->c[0] - (u64)region->b[0] * region->b[0]; + for (uint i = 1; i < 6; i++) + { + uint j = (u64)region->count * region->c[i] - (u64)region->b[i] * region->b[i]; + if (j > cov) + { + axis = i; + cov = j; + } + } + DBG("Splitting axis %u with average quadratic error %u", axis, (uint)(cov / (region->count * region->count))); + + /* Sort values on the split axis */ + uint val[256], cnt[256], cval; + if (region->count > 64) + { + bzero(cnt, sizeof(cnt)); + for (block = region->blocks; block; block = block->next) + cnt[block->v[axis]]++; + cval = 0; + for (uint i = 0; i < 256; i++) + if (cnt[i]) + { + val[cval] = i; + cnt[cval] = cnt[i]; + cval++; + } + } + else + { + block = region->blocks; + for (uint i = 0; i < region->count; i++, block = block->next) + val[i] = block->v[axis]; + prequant_sort(val, region->count); + cval = 1; + cnt[0] = 1; + for (uint i = 1; i < region->count; i++) + if (val[i] == val[cval - 1]) + cnt[cval - 1]++; + else + { + val[cval] = val[i]; + cnt[cval] = 1; + cval++; + } + } + + /* Select split value - to minimize error */ + uint b1 = val[0] * cnt[0]; + uint c1 = isqr(val[0]) * cnt[0]; + uint b2 = region->b[axis] - b1; + uint c2 = region->c[axis] - c1; + uint i = cnt[0], j = region->count - cnt[0]; + u64 best_err = c1 - (u64)b1 * b1 / i + c2 - (u64)b2 * b2 / j; + uint split_val = val[0]; + for (uint k = 1; k < cval - 1; k++) + { + uint b0 = val[k] * cnt[k]; + uint c0 = isqr(val[k]) * cnt[k]; + b1 += b0; + b2 -= b0; + c1 += c0; + c2 -= c0; + i += cnt[k]; + j -= cnt[k]; + u64 err = (u64)c1 - (u64)b1 * b1 / i + (u64)c2 - (u64)b2 * b2 / j; + if (err < best_err) + { + best_err = err; + split_val = val[k]; + } + } + DBG("split_val=%u best_err=%llu b[axis]=%u c[axis]=%u", split_val, (long long)best_err, region->b[axis], region->c[axis]); + + /* Split region */ + block = region->blocks; + region2 = regions + regions_count++; + prequant_init_region(region); + prequant_init_region(region2); + while (block) + { + block2 = block->next; + if (block->v[axis] <= split_val) + prequant_add_block(region, block); + else + prequant_add_block(region2, block); + block = block2; + } + prequant_finish_region(region); + prequant_finish_region(region2); + HEAP_INCREASE(struct image_sig_region *, heap, heap_count, prequant_heap_cmp, HEAP_SWAP, 1, region); + HEAP_INSERT(struct image_sig_region *, heap, heap_count, prequant_heap_cmp, HEAP_SWAP, region2); + } + + DBG("Pre-quantized to %u regions", regions_count); + + return regions_count; +} + +/* Post-quantization - run a few K-mean iterations to improve pre-quantized regions */ + +static uint +postquant(struct image_sig_block *blocks, uint blocks_count, struct image_sig_region *regions, uint regions_count) +{ + DBG("Starting post-quantization"); + + struct image_sig_block *blocks_end = blocks + blocks_count, *block; + struct image_sig_region *regions_end = regions + regions_count, *region; + uint error = 0, last_error; + + /* Initialize regions and initial segmentation error */ + for (region = regions; region != regions_end; ) + { + uint inv = 0xffffffffU / region->count; + for (uint i = 0; i < IMAGE_VEC_F; i++) + { + region->a[i] = ((u64)region->b[i] * inv) >> 32; + error += region->c[i] - region->a[i] * region->b[i]; + } + region++; + } + + /* Convergation cycle */ + for (uint step = 0; step < image_sig_postquant_max_steps; step++) + { + DBG("Step..."); + + /* Clear regions */ + for (region = regions; region != regions_end; region++) + { + region->blocks = NULL; + region->count = 0; + bzero(region->b, sizeof(region->b)); + bzero(region->c, sizeof(region->c)); + } + + /* Assign each block to its nearest pivot and accumulate region variables */ + for (block = blocks; block != blocks_end; block++) + { + struct image_sig_region *best_region = NULL; + uint best_dist = ~0U; + for (region = regions; region != regions_end; region++) + { + uint dist = + isqr(block->v[0] - region->a[0]) + + isqr(block->v[1] - region->a[1]) + + isqr(block->v[2] - region->a[2]) + + isqr(block->v[3] - region->a[3]) + + isqr(block->v[4] - region->a[4]) + + isqr(block->v[5] - region->a[5]); + if (dist <= best_dist) + { + best_dist = dist; + best_region = region; + } + } + region = best_region; + region->count++; + block->next = region->blocks; + region->blocks = block; + for (uint i = 0; i < IMAGE_VEC_F; i++) + { + region->b[i] += block->v[i]; + region->c[i] += isqr(block->v[i]); + } + } + + /* Finish regions, delete empty ones (should appear rarely), compute segmentation error */ + last_error = error; + error = 0; + for (region = regions; region != regions_end; ) + if (region->count) + { + uint inv = 0xffffffffU / region->count; + for (uint i = 0; i < IMAGE_VEC_F; i++) + { + region->a[i] = ((u64)region->b[i] * inv) >> 32; + error += region->c[i] - region->a[i] * region->b[i]; + } + region++; + } + else + { + regions_end--; + *region = *regions_end; + } + + DBG("last_error=%u error=%u", last_error, error); + + /* Convergation criteria */ + if (step >= image_sig_postquant_min_steps) + { + if (error > last_error) + break; + u64 dif = last_error - error; + if (dif * image_sig_postquant_threshold < (u64)last_error * 100) + break; + } + } + + DBG("Post-quantized to %u regions with average square error %u", regions_end - regions, error / blocks_count); + + return regions_end - regions; +} + +void +image_sig_segmentation(struct image_sig_data *data) +{ + data->regions_count = prequant(data->blocks, data->blocks_count, data->regions); +#ifdef LOCAL_DEBUG + dump_segmentation(data->regions, data->regions_count); +#endif + data->regions_count = postquant(data->blocks, data->blocks_count, data->regions, data->regions_count); +#ifdef LOCAL_DEBUG + dump_segmentation(data->regions, data->regions_count); +#endif +} + diff --git a/libucw/images/sig-txt.c b/libucw/images/sig-txt.c new file mode 100644 index 0000000..c11e5eb --- /dev/null +++ b/libucw/images/sig-txt.c @@ -0,0 +1,97 @@ +/* + * Image Library -- Detection of textured images + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include + +#include + +#define MAX_CELLS_COLS 4 +#define MAX_CELLS_ROWS 4 + +void +image_sig_detect_textured(struct image_sig_data *data) +{ + if (image_sig_textured_threshold <= 0) + { + DBG("Zero textured threshold."); + return; + } + + uint cols = data->cols; + uint rows = data->rows; + uint cell_cols = MIN((cols + 1) / 2, MAX_CELLS_COLS); + uint cell_rows = MIN((rows + 1) / 2, MAX_CELLS_ROWS); + uint cell_x[MAX_CELLS_COLS + 1]; + uint cell_y[MAX_CELLS_ROWS + 1]; + uint i, j; + u32 cnt[IMAGE_REG_MAX]; + + if (cell_cols * cell_rows < 4) + { + DBG("Image is not textured."); + return; + } + + DBG("Detecting textured image... cols=%u rows=%u cell_cols=%u cell_rows=%u", cols, rows, cell_cols, cell_rows); + + /* Compute cells boundaries */ + for (i = 1, j = 0; i < cell_cols; i++) + cell_x[i] = fast_div_u32_u8(j += cols, cell_cols); + cell_x[0] = 0; + cell_x[cell_cols] = cols; + for (i = 1, j = 0; i < cell_rows; i++) + cell_y[i] = fast_div_u32_u8(j += rows, cell_rows); + cell_y[0] = 0; + cell_y[cell_rows] = rows; + + /* Preprocess blocks */ + for (uint i = 0; i < data->regions_count; i++) + for (struct image_sig_block *block = data->regions[i].blocks; block; block = block->next) + block->region = i; + + /* Process cells */ + double e = 0; + for (uint j = 0; j < cell_rows; j++) + for (uint i = 0; i < cell_cols; i++) + { + uint cell_area = 0; + bzero(cnt, data->regions_count * sizeof(u32)); + struct image_sig_block *b1 = data->blocks + cell_x[i] + cell_y[j] * cols, *b2; + for (uint y = cell_y[j]; y < cell_y[j + 1]; y++, b1 += cols) + { + b2 = b1; + for (uint x = cell_x[i]; x < cell_x[i + 1]; x++, b2++) + { + cnt[b2->region]++; + cell_area++; + } + } + for (uint k = 0; k < data->regions_count; k++) + { + int a = data->blocks_count * cnt[k] - cell_area * data->regions[k].count; + e += (double)a * a / ((double)isqr(data->regions[k].count) * cell_area); + } + } + + DBG("Coefficient=%g", (double)e / (data->regions_count * data->blocks_count)); + + /* Threshold */ + if (e < image_sig_textured_threshold * data->regions_count * data->blocks_count) + { + data->flags |= IMAGE_SIG_TEXTURED; + DBG("Image is textured."); + } + else + DBG("Image is not textured."); +} diff --git a/libucw/images/signature.h b/libucw/images/signature.h new file mode 100644 index 0000000..34a6c9a --- /dev/null +++ b/libucw/images/signature.h @@ -0,0 +1,155 @@ +#ifndef _IMAGES_SIGNATURE_H +#define _IMAGES_SIGNATURE_H + +#ifdef CONFIG_UCW_CLEAN_ABI +#define compute_image_signature ucw_compute_image_signature +#define image_region_dump ucw_image_region_dump +#define image_sig_border_bonus ucw_image_sig_border_bonus +#define image_sig_border_size ucw_image_sig_border_size +#define image_sig_cleanup ucw_image_sig_cleanup +#define image_sig_cmp_features_weights ucw_image_sig_cmp_features_weights +#define image_sig_compare_method ucw_image_sig_compare_method +#define image_sig_detect_textured ucw_image_sig_detect_textured +#define image_sig_finish ucw_image_sig_finish +#define image_sig_inertia_scale ucw_image_sig_inertia_scale +#define image_sig_init ucw_image_sig_init +#define image_sig_min_height ucw_image_sig_min_height +#define image_sig_min_width ucw_image_sig_min_width +#define image_sig_postquant_max_steps ucw_image_sig_postquant_max_steps +#define image_sig_postquant_min_steps ucw_image_sig_postquant_min_steps +#define image_sig_postquant_threshold ucw_image_sig_postquant_threshold +#define image_sig_preprocess ucw_image_sig_preprocess +#define image_sig_prequant_thresholds ucw_image_sig_prequant_thresholds +#define image_sig_segmentation ucw_image_sig_segmentation +#define image_sig_textured_threshold ucw_image_sig_textured_threshold +#define image_signatures_dist ucw_image_signatures_dist +#define image_signatures_dist_explain ucw_image_signatures_dist_explain +#define image_vector_dump ucw_image_vector_dump +#endif + +/* Configuration */ +extern uint image_sig_min_width, image_sig_min_height; +extern uint *image_sig_prequant_thresholds; +extern uint image_sig_postquant_min_steps, image_sig_postquant_max_steps, image_sig_postquant_threshold; +extern double image_sig_border_size; +extern int image_sig_border_bonus; +extern double image_sig_inertia_scale[]; +extern double image_sig_textured_threshold; +extern int image_sig_compare_method; +extern uint image_sig_cmp_features_weights[]; + +#define IMAGE_VEC_F 6 +#define IMAGE_REG_F IMAGE_VEC_F +#define IMAGE_REG_H 5 +#define IMAGE_REG_MAX 16 + +/* K-dimensional feature vector (6 bytes) */ +struct image_vector { + byte f[IMAGE_VEC_F]; /* texture features */ +} PACKED; + +/* Features for image regions (16 bytes) */ +struct image_region { + byte f[IMAGE_VEC_F]; /* texture features - L, u, v, LH, HL, HH */ + byte h[IMAGE_REG_H]; /* shape/pos features - I1, I2, I3, X, Y */ + byte wa; /* normalized area percentage */ + byte wb; /* normalized weight */ +}; + +#define IMAGE_SIG_TEXTURED 0x1 + +/* Image signature (usually 16 + len * 16 bytes) */ +struct image_signature { + byte len; /* number of regions */ + byte flags; /* IMAGE_SIG_xxx */ + u16 cols; /* image width */ + u16 rows; /* image height */ + u16 df; /* average weighted f dist */ + u16 dh; /* average weighted h dist */ + struct image_vector vec; /* average features of all regions... simple signature */ + struct image_region reg[IMAGE_REG_MAX];/* feature vector for every region */ +}; + +struct image_cluster { + union { + struct { + s32 dot; /* dot product of the splitting plane */ + s8 vec[IMAGE_VEC_F]; /* normal vector of the splitting plane */ + }; + struct { + u64 pos; /* cluster size in bytes */ + }; + }; +}; + +static inline uint image_signature_size(uint len) +{ + return OFFSETOF(struct image_signature, reg) + len * sizeof(struct image_region); +} + +/* sig-dump.c */ + +#define IMAGE_VECTOR_DUMP_MAX (IMAGE_VEC_F * 16 + 1) +#define IMAGE_REGION_DUMP_MAX ((IMAGE_REG_F + IMAGE_REG_H) * 16 + 100) + +byte *image_vector_dump(byte *buf, struct image_vector *vec); +byte *image_region_dump(byte *buf, struct image_region *reg); + +struct image_sig_block { + struct image_sig_block *next; /* linked list */ + u32 x, y; /* block position */ + byte area; /* block area in pixels (usually 16) */ + byte region; /* region index */ + byte v[IMAGE_VEC_F]; /* feature vector */ +}; + +struct image_sig_region { + struct image_sig_block *blocks; + u32 count; + u32 a[IMAGE_VEC_F]; + u32 b[IMAGE_VEC_F]; + u32 c[IMAGE_VEC_F]; + u64 e; + u64 w_sum; +}; + +struct image_sig_data { + struct image *image; + struct image_sig_block *blocks; + struct image_sig_region regions[IMAGE_REG_MAX]; + u32 cols; + u32 rows; + u32 full_cols; + u32 full_rows; + u32 flags; + u32 area; + u32 valid; + u32 blocks_count; + u32 regions_count; + u32 f[IMAGE_VEC_F]; +}; + +/* sig-init.c */ + +int compute_image_signature(struct image_context *ctx, struct image_signature *sig, struct image *image); + +int image_sig_init(struct image_context *ctx, struct image_sig_data *data, struct image *image); +void image_sig_preprocess(struct image_sig_data *data); +void image_sig_finish(struct image_sig_data *data, struct image_signature *sig); +void image_sig_cleanup(struct image_sig_data *data); + +/* sig-seg.c */ + +void image_sig_segmentation(struct image_sig_data *data); + +/* sig-txt.c */ + +void image_sig_detect_textured(struct image_sig_data *data); + +/* sig-cmp.c */ + +uint image_signatures_dist(struct image_signature *sig1, struct image_signature *sig2); +uint image_signatures_dist_explain(struct image_signature *sig1, struct image_signature *sig2, void (*msg)(byte *text, void *param), void *param); + +#endif + diff --git a/libucw/images/ucw-color-tool.c b/libucw/images/ucw-color-tool.c new file mode 100644 index 0000000..edc892f --- /dev/null +++ b/libucw/images/ucw-color-tool.c @@ -0,0 +1,97 @@ +/* + * Color spaces tool + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU General Public License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +static void NONRET +usage(void) +{ + fputs("\ +Usage: ucw-color-tool input-color-space output-color-space\n\ +", stderr); + exit(1); +} + +static char *shortopts = ""; +static struct option longopts[] = +{ + { NULL, 0, 0, 0 } +}; + +static const struct color_space_info * +parse_color_space(byte *s) +{ + if (!strcasecmp(s, "sRGB")) + return &color_srgb_info; + else if (!strcasecmp(s, "AdobeRGB") || !strcasecmp(s, "Adobe RGB")) + return &color_adobe_rgb_info; + else if (!strcasecmp(s, "CIERGB") || strcasecmp(s, "CIE RGB")) + return &color_cie_rgb_info; + else + die("Unknown color space"); +} + +static void +print_matrix(double m[9]) +{ + for (uint j = 0; j < 3; j++) + { + for (uint i = 0; i < 3; i++) + printf(" %12.8f", m[i + j * 3]); + printf("\n"); + } +} + +int +main(int argc, char **argv) +{ + log_init(argv[0]); + int opt; + while ((opt = getopt_long(argc, argv, shortopts, longopts, NULL)) >= 0) + switch (opt) + { + default: + usage(); + } + + if (argc == optind + 1) + { + const struct color_space_info *a = parse_color_space(argv[optind]); + double a_to_xyz[9], xyz_to_a[9]; + color_compute_color_space_to_xyz_matrix(a_to_xyz, &a->chromacity); + color_invert_matrix(xyz_to_a, a_to_xyz); + printf("linear %s -> XYZ:\n", a->name); + print_matrix(a_to_xyz); + printf("XYZ -> linear %s:\n", a->name); + print_matrix(xyz_to_a); + printf("Simple gamma: %.8f\n", a->gamma.simple_gamma); + printf("Detailed gamma: g=%.8f o=%.8f t=%.8f s=%.8f\n", a->gamma.detailed_gamma, a->gamma.offset, a->gamma.transition, a->gamma.slope); + } + else if (argc == optind + 2) + { + const struct color_space_info *a = parse_color_space(argv[optind++]); + const struct color_space_info *b = parse_color_space(argv[optind]); + double a_to_b[9]; + color_compute_color_spaces_conversion_matrix(a_to_b, &a->chromacity, &b->chromacity); + printf("linear %s -> linear %s:\n", a->name, b->name); + print_matrix(a_to_b); + } + else + usage(); + + return 0; +} diff --git a/libucw/images/ucw-image-dup-test.c b/libucw/images/ucw-image-dup-test.c new file mode 100644 index 0000000..f38d1b2 --- /dev/null +++ b/libucw/images/ucw-image-dup-test.c @@ -0,0 +1,174 @@ +/* + * Image duplicates testing + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU General Public License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static void NONRET +usage(void) +{ + fputs("\ +Usage: ucw-image-dup-test [options] image1 image2 \n\ +\n\ +-q --quiet no progress messages\n\ +-f --format-1 image1 format (jpeg, gif, png)\n\ +-F --format-2 image2 format\n\ +-g --background background color (hexadecimal RRGGBB)\n\ +-t --transformations hexadecimal value of allowed transformtion (1=identity, FF=all)\n\ +", stderr); + exit(1); +} + +static char *shortopts = "qf:F:g:t:" CF_SHORT_OPTS; +static struct option longopts[] = +{ + CF_LONG_OPTS + { "quiet", 0, 0, 'q' }, + { "format-1", 0, 0, 'f' }, + { "format-2", 0, 0, 'F' }, + { "background", 0, 0, 'g' }, + { "transormations", 0, 0, 't' }, + { NULL, 0, 0, 0 } +}; + +static uint verbose = 1; +static byte *file_name_1; +static byte *file_name_2; +static enum image_format format_1; +static enum image_format format_2; +static struct color background_color; +static uint transformations = IMAGE_DUP_TRANS_ALL; + +#define MSG(x...) do{ if (verbose) msg(L_INFO, ##x); }while(0) + +int +main(int argc, char **argv) +{ + log_init(argv[0]); + int opt; + while ((opt = cf_getopt(argc, argv, shortopts, longopts, NULL)) >= 0) + switch (opt) + { + case 'q': + verbose = 0; + break; + case 'f': + if (!(format_1 = image_extension_to_format(optarg))) + usage(); + break; + case 'F': + if (!(format_2 = image_extension_to_format(optarg))) + usage(); + break; + case 'g': + { + if (strlen(optarg) != 6) + usage(); + errno = 0; + char *end; + long int v = strtol(optarg, &end, 16); + if (errno || *end || v < 0) + usage(); + color_make_rgb(&background_color, (v >> 16) & 255, (v >> 8) & 255, v & 255); + } + break; + case 't': + { + errno = 0; + char *end; + long int v = strtol(optarg, &end, 16); + if (errno || *end || v < 0 || v > 0xff) + usage(); + transformations = v; + } + break; + default: + usage(); + } + + if (argc != optind + 2) + usage(); + file_name_1 = argv[optind++]; + file_name_2 = argv[optind]; + +#define TRY(x) do{ if (!(x)) exit(1); }while(0) + MSG("Initializing image library"); + struct image_context ctx; + struct image_dup_context idc; + struct image_io io; + image_context_init(&ctx); + image_dup_context_init(&ctx, &idc); + + struct image *img1, *img2; + + TRY(image_io_init(&ctx, &io)); + MSG("Reading %s", file_name_1); + io.fastbuf = bopen(file_name_1, O_RDONLY, 1 << 18); + io.format = format_1 ? : image_file_name_to_format(file_name_1); + TRY(image_io_read_header(&io)); + io.flags = COLOR_SPACE_RGB | IMAGE_IO_USE_BACKGROUND; + if (background_color.color_space) + io.background_color = background_color; + else if (!io.background_color.color_space) + io.background_color = color_black; + TRY(image_io_read_data(&io, 1)); + bclose(io.fastbuf); + img1 = io.image; + MSG("Image size=%ux%u", img1->cols, img1->rows); + + image_io_reset(&io); + MSG("Reading %s", file_name_2); + io.fastbuf = bopen(file_name_2, O_RDONLY, 1 << 18); + io.format = format_2 ? : image_file_name_to_format(file_name_2); + TRY(image_io_read_header(&io)); + io.flags = COLOR_SPACE_RGB | IMAGE_IO_USE_BACKGROUND; + if (background_color.color_space) + io.background_color = background_color; + else if (!io.background_color.color_space) + io.background_color = color_black; + TRY(image_io_read_data(&io, 1)); + bclose(io.fastbuf); + img2 = io.image; + image_io_cleanup(&io); + MSG("Image size=%ux%u", img2->cols, img2->rows); + + struct image_dup *dup1, *dup2; + struct mempool *pool = mp_new(1 << 18); + MSG("Creating internal structures"); + dup1 = mp_start(pool, image_dup_estimate_size(img1->cols, img1->rows, 1, idc.qtree_limit)); + uint size = image_dup_new(&idc, img1, dup1, 1); + TRY(size); + mp_end(pool, (void *)dup1 + size); + dup2 = mp_start(pool, image_dup_estimate_size(img2->cols, img2->rows, 1, idc.qtree_limit)); + size = image_dup_new(&idc, img2, dup2, 1); + TRY(size); + mp_end(pool, (void *)dup2 + size); + + idc.flags = transformations | IMAGE_DUP_SCALE | IMAGE_DUP_WANT_ALL; + MSG("Similarity bitmap %02x", image_dup_compare(&idc, dup1, dup2)); + + mp_delete(pool); + + image_destroy(img1); + image_destroy(img2); + image_dup_context_cleanup(&idc); + image_context_cleanup(&ctx); + MSG("Done."); + return 0; +} diff --git a/libucw/images/ucw-image-sim-test.c b/libucw/images/ucw-image-sim-test.c new file mode 100644 index 0000000..2247db6 --- /dev/null +++ b/libucw/images/ucw-image-sim-test.c @@ -0,0 +1,320 @@ +/* + * Image similarity testing + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU General Public License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static void NONRET +usage(void) +{ + fputs("\ +Usage: ucw-image-sim-test [options] image1 [image2] \n\ +\n\ +-q --quiet no progress messages\n\ +-f --format-1 image1 format (jpeg, gif, png)\n\ +-F --format-2 image2 format\n\ +-g --background background color (hexadecimal RRGGBB)\n\ +-r --segmentation-1 writes image1 segmentation to given file\n\ +-R --segmentation-2 writes image2 segmentation to given file\n\ +-6 --base64 display base64 encoded signature(s)\n\ +-2 --base224 display base224 encoded signature(s)\n\ +", stderr); + exit(1); +} + +static char *shortopts = "qf:F:g:t:r:R:62" CF_SHORT_OPTS; +static struct option longopts[] = +{ + CF_LONG_OPTS + { "quiet", 0, 0, 'q' }, + { "format-1", 0, 0, 'f' }, + { "format-2", 0, 0, 'F' }, + { "background", 0, 0, 'g' }, + { "segmentation-1", 0, 0, 'r' }, + { "segmentation-2", 0, 0, 'R' }, + { "base64", 0, 0, '6' }, + { "base224", 0, 0, '2' }, + { NULL, 0, 0, 0 } +}; + +static uint verbose = 1; +static byte *file_name_1; +static byte *file_name_2; +static enum image_format format_1; +static enum image_format format_2; +static struct color background_color; +static byte *segmentation_name_1; +static byte *segmentation_name_2; +static uint display_base64; +static uint display_base224; + +#define MSG(x...) do{ if (verbose) msg(L_INFO, ##x); }while(0) +#define TRY(x) do{ if (!(x)) exit(1); }while(0) + +static void +msg_str(byte *s, void *param UNUSED) +{ + MSG("%s", s); +} + +static void +dump_signature(struct image_signature *sig) +{ + byte buf[MAX(IMAGE_VECTOR_DUMP_MAX, IMAGE_REGION_DUMP_MAX)]; + image_vector_dump(buf, &sig->vec); + MSG("vector: %s", buf); + for (uint i = 0; i < sig->len; i++) + { + image_region_dump(buf, sig->reg + i); + MSG("region %u: %s", i, buf); + } + uint sig_size = image_signature_size(sig->len); + if (display_base64) + { + byte buf[BASE64_ENC_LENGTH(sig_size) + 1]; + uint enc_size = base64_encode(buf, (byte *)sig, sig_size); + buf[enc_size] = 0; + MSG("base64 encoded: %s", buf); + } + if (display_base224) + { + byte buf[BASE224_ENC_LENGTH(sig_size) + 1]; + uint enc_size = base224_encode(buf, (byte *)sig, sig_size); + buf[enc_size] = 0; + MSG("base224 encoded: %s", buf); + } +} + +static struct image_context ctx; +static struct image_io io; + +static void +write_segmentation(struct image_sig_data *data, byte *fn) +{ + MSG("Writing segmentation to %s", fn); + + struct fastbuf *fb = bopen(fn, O_WRONLY | O_CREAT | O_TRUNC, 4096); + struct image *img; + TRY(img = image_new(&ctx, data->image->cols, data->image->rows, COLOR_SPACE_RGB, NULL)); + image_clear(&ctx, img); + + for (uint i = 0; i < data->regions_count; i++) + { + byte c[3]; + double luv[3], xyz[3], srgb[3]; + luv[0] = data->regions[i].a[0] * (4 / 2.55); + luv[1] = ((int)data->regions[i].a[1] - 128) * (4 / 2.55); + luv[2] = ((int)data->regions[i].a[2] - 128) * (4 / 2.55); + luv_to_xyz_exact(xyz, luv); + xyz_to_srgb_exact(srgb, xyz); + c[0] = CLAMP(srgb[0] * 255, 0, 255); + c[1] = CLAMP(srgb[1] * 255, 0, 255); + c[2] = CLAMP(srgb[2] * 255, 0, 255); + for (struct image_sig_block *block = data->regions[i].blocks; block; block = block->next) + { + uint x1 = block->x * 4; + uint y1 = block->y * 4; + uint x2 = MIN(x1 + 4, img->cols); + uint y2 = MIN(y1 + 4, img->rows); + byte *p = img->pixels + x1 * 3 + y1 * img->row_size; + for (uint y = y1; y < y2; y++, p += img->row_size) + { + byte *p2 = p; + for (uint x = x1; x < x2; x++, p2 += 3) + { + p2[0] = c[0]; + p2[1] = c[1]; + p2[2] = c[2]; + } + } + } + } + + io.fastbuf = fb; + io.image = img; + io.format = image_file_name_to_format(fn); + TRY(image_io_write(&io)); + image_io_reset(&io); + + image_destroy(img); + bclose(fb); +} + +int +main(int argc, char **argv) +{ + log_init(argv[0]); + int opt; + while ((opt = cf_getopt(argc, argv, shortopts, longopts, NULL)) >= 0) + switch (opt) + { + case 'q': + verbose = 0; + break; + case 'f': + if (!(format_1 = image_extension_to_format(optarg))) + usage(); + break; + case 'F': + if (!(format_2 = image_extension_to_format(optarg))) + usage(); + break; + case 'g': + { + if (strlen(optarg) != 6) + usage(); + errno = 0; + char *end; + long int v = strtol(optarg, &end, 16); + if (errno || *end || v < 0) + usage(); + color_make_rgb(&background_color, (v >> 16) & 255, (v >> 8) & 255, v & 255); + } + break; + case 'r': + segmentation_name_1 = optarg; + break; + case 'R': + segmentation_name_2 = optarg; + break; + case '6': + display_base64++; + break; + case '2': + display_base224++; + break; + default: + usage(); + } + + if (argc != optind + 2 && argc != optind + 1) + usage(); + file_name_1 = argv[optind++]; + if (argc > optind) + file_name_2 = argv[optind++]; + + MSG("Initializing image library"); + random_gen_seed(); + srgb_to_luv_init(); + image_context_init(&ctx); + + struct image *img1, *img2; + + TRY(image_io_init(&ctx, &io)); + + if (file_name_1) + { + MSG("Reading %s", file_name_1); + io.fastbuf = bopen(file_name_1, O_RDONLY, 1 << 18); + io.format = format_1 ? : image_file_name_to_format(file_name_1); + TRY(image_io_read_header(&io)); + io.flags = COLOR_SPACE_RGB | IMAGE_IO_USE_BACKGROUND; + if (background_color.color_space) + io.background_color = background_color; + else if (!io.background_color.color_space) + io.background_color = color_black; + TRY(image_io_read_data(&io, 1)); + bclose(io.fastbuf); + img1 = io.image; + MSG("Image size=%ux%u", img1->cols, img1->rows); + image_io_reset(&io); + } + else + img1 = NULL; + + if (file_name_2) + { + MSG("Reading %s", file_name_2); + io.fastbuf = bopen(file_name_2, O_RDONLY, 1 << 18); + io.format = format_2 ? : image_file_name_to_format(file_name_2); + TRY(image_io_read_header(&io)); + io.flags = COLOR_SPACE_RGB | IMAGE_IO_USE_BACKGROUND; + if (background_color.color_space) + io.background_color = background_color; + else if (!io.background_color.color_space) + io.background_color = color_black; + TRY(image_io_read_data(&io, 1)); + bclose(io.fastbuf); + img2 = io.image; + MSG("Image size=%ux%u", img2->cols, img2->rows); + image_io_reset(&io); + } + else + img2 = NULL; + + struct image_signature sig1, sig2; + MSG("Computing signatures"); + if (img1) + { + struct image_sig_data data; + TRY(image_sig_init(&ctx, &data, img1)); + image_sig_preprocess(&data); + if (data.valid) + { + image_sig_segmentation(&data); + image_sig_detect_textured(&data); + } + if (segmentation_name_1) + write_segmentation(&data, segmentation_name_1); + image_sig_finish(&data, &sig1); + image_sig_cleanup(&data); + dump_signature(&sig1); + } + if (img2) + { + struct image_sig_data data; + TRY(image_sig_init(&ctx, &data, img2)); + image_sig_preprocess(&data); + if (data.valid) + { + image_sig_segmentation(&data); + image_sig_detect_textured(&data); + } + if (segmentation_name_2) + write_segmentation(&data, segmentation_name_2); + image_sig_finish(&data, &sig2); + image_sig_cleanup(&data); + dump_signature(&sig2); + } + + if (img1 && img2) + { + uint dist; + if (verbose) + { + struct fastbuf *fb = bfdopen(0, 4096); + dist = image_signatures_dist_explain(&sig1, &sig2, msg_str, NULL); + bclose(fb); + } + else + dist = image_signatures_dist(&sig1, &sig2); + MSG("dist=%u", dist); + } + + if (img1) + image_destroy(img1); + if (img2) + image_destroy(img2); + + image_io_cleanup(&io); + image_context_cleanup(&ctx); + MSG("Done."); + return 0; +} diff --git a/libucw/images/ucw-image-tool.c b/libucw/images/ucw-image-tool.c new file mode 100644 index 0000000..66928ac --- /dev/null +++ b/libucw/images/ucw-image-tool.c @@ -0,0 +1,243 @@ +/* + * Image Library -- Simple image manipulation utility + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU General Public License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static void NONRET +usage(void) +{ + fputs("\ +Usage: ucw-image-tool [options] infile [outfile]\n\ +\n\ +-q --quiet no progress messages\n\ +-f --input-format input image format (jpeg, gif, png)\n\ +-F --output-format output image format\n\ +-s --size force output dimensions (100x200)\n\ +-b --fit-to-box scale to fit the box (100x200)\n\ +-c --colorspace force output colorspace (Grayscale, Grayscale+Alpha, RGB, RGB+Alpha, ...)\n\ +-Q --jpeg-quality JPEG quality (1..100)\n\ +-g --background background color (hexadecimal RRGGBB)\n\ +-G --default-background background applied only if the image contains no background info (RRGGBB, default=FFFFFF)\n\ +-a --remove-alpha remove alpha channel\n\ +-e --exif reads Exif data\n" +, stderr); + exit(1); +} + +static char *shortopts = "qf:F:s:b:c:Q:g:G:ae"; +static struct option longopts[] = +{ + { "quiet", 0, 0, 'q' }, + { "input-format", 0, 0, 'f' }, + { "output-format", 0, 0, 'F' }, + { "size", 0, 0, 's' }, + { "fit-to-box", 0, 0, 'b' }, + { "colorspace", 0, 0, 'c' }, + { "jpeg-quality", 0, 0, 'Q' }, + { "background", 0, 0, 'g' }, + { "default-background", 0, 0, 'G' }, + { "remove-alpha", 0, 0, 'a' }, + { "exif", 0, 0, 'e' }, + { NULL, 0, 0, 0 } +}; + +static uint verbose = 1; +static byte *input_file_name; +static enum image_format input_format; +static byte *output_file_name; +static enum image_format output_format; +static uint cols; +static uint rows; +static uint fit_to_box; +static uint channels_format; +static uint jpeg_quality; +static struct color background_color; +static struct color default_background_color; +static uint remove_alpha; +static uint exif; + +static void +parse_color(struct color *color, byte *s) +{ + if (strlen(s) != 6) + usage(); + errno = 0; + char *end; + long int v = strtol(s, &end, 16); + if (errno || *end || v < 0) + usage(); + color_make_rgb(color, (v >> 16) & 255, (v >> 8) & 255, v & 255); +} + +#define MSG(x...) do{ if (verbose) msg(L_INFO, ##x); }while(0) + +int +main(int argc, char **argv) +{ + log_init(argv[0]); + int opt; + default_background_color = color_white; + while ((opt = getopt_long(argc, argv, shortopts, longopts, NULL)) >= 0) + switch (opt) + { + case 'q': + verbose = 0; + break; + case 'f': + if (!(input_format = image_extension_to_format(optarg))) + usage(); + break; + case 'F': + if (!(output_format = image_extension_to_format(optarg))) + usage(); + break; + case 's': + { + byte *r = strchr(optarg, 'x'); + if (!r) + usage(); + *r++ = 0; + if (!(cols = atoi(optarg)) || !(rows = atoi(r))) + usage(); + fit_to_box = 0; + break; + } + case 'b': + { + byte *r = strchr(optarg, 'x'); + if (!r) + usage(); + *r++ = 0; + if (!(cols = atoi(optarg)) || !(rows = atoi(r))) + usage(); + fit_to_box = 1; + break; + } + case 'c': + if (!(channels_format = image_name_to_channels_format(optarg))) + usage(); + break; + case 'Q': + if (!(jpeg_quality = atoi(optarg))) + usage(); + break; + case 'g': + parse_color(&background_color, optarg); + break; + case 'G': + parse_color(&default_background_color, optarg); + break; + case 'a': + remove_alpha++; + break; + case 'e': + exif++; + break; + default: + usage(); + } + + if (argc != optind + 1 && argc != optind + 2) + usage(); + input_file_name = argv[optind++]; + if (argc > optind) + output_file_name = argv[optind]; + +#define TRY(x) do{ if (!(x)) exit(1); }while(0) + MSG("Initializing image library"); + struct image_context ctx; + struct image_io io; + image_context_init(&ctx); + ctx.tracing_level = ~0U; + if (!image_io_init(&ctx, &io)) + die("Cannot initialize image I/O"); + + MSG("Reading %s", input_file_name); + byte cs_buf[IMAGE_CHANNELS_FORMAT_MAX_SIZE]; + io.fastbuf = bopen(input_file_name, O_RDONLY, 1 << 18); + io.format = input_format ? : image_file_name_to_format(input_file_name); + if (exif) + io.flags |= IMAGE_IO_WANT_EXIF; + TRY(image_io_read_header(&io)); + if (!output_file_name) + { + bclose(io.fastbuf); + printf("Format: %s\n", image_format_to_extension(io.format) ? : (byte *)"?"); + printf("Dimensions: %dx%d\n", io.cols, io.rows); + printf("Colorspace: %s\n", (io.flags & IMAGE_IO_HAS_PALETTE) ? (byte *)"Palette" : image_channels_format_to_name(io.flags, cs_buf)); + printf("NumColors: %u\n", io.number_of_colors); + if (io.background_color.color_space) + { + byte rgb[3]; + TRY(color_put(&ctx, &io.background_color, rgb, COLOR_SPACE_RGB)); + printf("Background: %02x%02x%02x\n", rgb[0], rgb[1], rgb[2]); + } + if (io.exif_size) + printf("ExifSize: %u\n", io.exif_size); + } + else + { + MSG("%s %dx%d %s", image_format_to_extension(io.format) ? : (byte *)"?", io.cols, io.rows, + (io.flags & IMAGE_IO_HAS_PALETTE) ? (byte *)"Palette" : image_channels_format_to_name(io.flags, cs_buf)); + if (cols) + if (fit_to_box) + { + image_dimensions_fit_to_box(&io.cols, &io.rows, MIN(cols, 0xffff), MIN(rows, 0xffff), 0); + } + else + { + io.cols = cols; + io.rows = rows; + } + if (background_color.color_space) + io.background_color = background_color; + else if (!io.background_color.color_space) + io.background_color = default_background_color; + if (remove_alpha) + io.flags &= ~IMAGE_ALPHA; + if (channels_format) + io.flags = io.flags & ~IMAGE_PIXEL_FORMAT | channels_format; + if (!(io.flags & IMAGE_ALPHA)) + io.flags |= IMAGE_IO_USE_BACKGROUND; + if (jpeg_quality) + io.jpeg_quality = jpeg_quality; + uint output_fmt = output_format ? : image_file_name_to_format(output_file_name); + uint output_cs = io.flags & IMAGE_COLOR_SPACE; + if (output_fmt != IMAGE_FORMAT_JPEG && + output_cs != COLOR_SPACE_GRAYSCALE && + output_cs != COLOR_SPACE_RGB) + { + MSG("Forcing RGB color space"); + io.flags = (io.flags & ~IMAGE_COLOR_SPACE) | COLOR_SPACE_RGB; + } + TRY(image_io_read_data(&io, 0)); + bclose(io.fastbuf); + MSG("Writing %s", output_file_name); + io.fastbuf = bopen(output_file_name, O_WRONLY | O_CREAT | O_TRUNC, 1 << 18); + io.format = output_format ? : image_file_name_to_format(output_file_name); + MSG("%s %dx%d %s", image_format_to_extension(io.format) ? : (byte *)"?", io.cols, io.rows, + image_channels_format_to_name(io.flags, cs_buf)); + TRY(image_io_write(&io)); + bclose(io.fastbuf); + } + + image_io_cleanup(&io); + image_context_cleanup(&ctx); + MSG("Done."); + return 0; +} diff --git a/libucw/ksp.cfg b/libucw/ksp.cfg new file mode 100644 index 0000000..a550c37 --- /dev/null +++ b/libucw/ksp.cfg @@ -0,0 +1,35 @@ +# Do we want shared or static libraries? +UnSet("CONFIG_SHARED"); + +# Include debugging code +Set("CONFIG_DEBUG"); + +# We want the public API +Set("CONFIG_INSTALL_API"); + +# Libucw extensions +UnSet("CONFIG_UCW_PERL"); +UnSet("CONFIG_UCW_PERL_MODULES"); +UnSet("CONFIG_UCW_SHELL_UTILS"); +UnSet("CONFIG_UCW_UTILS"); + +# Libucw-images settings +UnSet("CONFIG_IMAGES"); +UnSet("CONFIG_IMAGES_LIBJPEG"); +UnSet("CONFIG_IMAGES_LIBPNG"); +UnSet("CONFIG_IMAGES_LIBUNGIF"); +UnSet("CONFIG_IMAGES_LIBGIF"); +UnSet("CONFIG_IMAGES_LIBMAGICK"); + +# Libucw-charset +UnSet("CONFIG_CHARSET"); +UnSet("CONFIG_CHARSET_UTILS"); + +# Libucw-xml +UnSet("CONFIG_XML"); + +# Libucw-json +UnSet("CONFIG_JSON"); + +# Return success +1; diff --git a/libucw/maint/HOWTO b/libucw/maint/HOWTO new file mode 100644 index 0000000..f973275 --- /dev/null +++ b/libucw/maint/HOWTO @@ -0,0 +1,17 @@ +How to release libucw +~~~~~~~~~~~~~~~~~~~~~ + +- Run maint/abi-check-exports. No symbols should be listed except for _init + and friends. Otherwise, run maint/abi-map-symbols, update maint/libucw.abi + accordingly, and call maint/abi-rename symbols to fix headers. + +- Update version numbers and ABI suffix in ucw/default.cfg: + - If there were major changes, or changes which are not backward + compatible on source level, increase the major number. + - If new features were added, increase the minor number. + - Otherwise, increase only the third number. + - If anything in the ABI was changed, update UCW_ABI_SUFFIX. + +- git-tag vX.Y.Z && git push --tags + +- Run maint/release diff --git a/libucw/maint/abi-check-exports b/libucw/maint/abi-check-exports new file mode 100755 index 0000000..88818a1 --- /dev/null +++ b/libucw/maint/abi-check-exports @@ -0,0 +1,20 @@ +#!/usr/bin/perl +# Check symbols exported by a library +# (c) 2014 Martin Mares + +use common::sense; + +my $lib = $ARGV[0] or die "Usage: $0 \n"; +open my $f, '-|', 'nm', $lib or die; +while (<$f>) { + chomp; + next if /^\s/; + my ($addr, $type, $sym) = split /\s+/; + if ($sym =~ m{^(ucw|ucwlib)_}) { + next + } + if ($type =~ m{[A-Z]}) { + print "$sym ($type)\n"; + } +} +close $f or die; diff --git a/libucw/maint/abi-map-symbols b/libucw/maint/abi-map-symbols new file mode 100755 index 0000000..d3c2355 --- /dev/null +++ b/libucw/maint/abi-map-symbols @@ -0,0 +1,85 @@ +#!/usr/bin/perl +# Search for symbols which might need renaming +# (c) 2014 Martin Mares + +use common::sense; + +sub symbol { + my ($sym) = @_; + return if $sym =~ m{^(ucw|ucwlib)_}; + print "$sym\n"; +} + +sub parse { + my ($file) = @_; + print "# $file\n"; + open my $f, '<', $file or die; + while (<$f>) { + chomp; + # Interpret special comments + m{// NOAPI} and next; + if (m{// API: (\w+)}) { + print "$1\n"; + next; + } + # Find things which look like top-level declarations + s{//.*}{}; + s{/\*.*}{}; + /^\s/ and next; + /^$/ and next; + /^#/ and next; + /^{/ and next; + /}/ and next; + /^"/ and next; + /^-/ and next; # Magic for ucw/getopt.h + /^\w+:/ and next; # Labels in inline functions + /^typedef\s/ and next; + /^static\s/ and next; + /^(struct|union|enum)(\s+\w+)?(;|\s*{)/ and next; + + # print "$_\n"; + + # Try to parse the declaration + s{\[[^\]]*\]}{}g; # Delete array sizes + if (m{^extern [^,]*(\s+\**\w+(,\s+\**\w+)*);}) { + my $x = $1; + $x =~ s{[,*]}{}g; + symbol $_ for grep { !/^$/ } split /\s+/, $x; + } elsif (m{( |\*)(\w+)\(}) { + symbol($2); + } else { + print "??? $_\n"; + } + + } + close $f; +} + +my %blacklist = map { $_ => 1 } qw( + ucw/binheap.h + ucw/char-map.h + ucw/ff-binary.h + ucw/gbuf.h + ucw/hashtable.h + ucw/kmp.h + ucw/kmp-search.h + ucw/redblack.h + ucw/str-match.h + ucw/strtonum.h + ucw/strtonum-gen.h + ucw/trie.h + charset/charconv-gen.h + charset/chartable.h + charset/U-cat.h + charset/U-ligatures.h + charset/U-lower.h + charset/U-unacc.h + charset/U-upper.h + images/image-walk.h + images/scale-gen.h +); + +for my $f (, , , , , ) { + next if $blacklist{$f}; + parse($f); +} diff --git a/libucw/maint/abi-rename-symbols b/libucw/maint/abi-rename-symbols new file mode 100755 index 0000000..acf0534 --- /dev/null +++ b/libucw/maint/abi-rename-symbols @@ -0,0 +1,69 @@ +#!/usr/bin/perl +# Re-generate symbol renaming defines +# (c) 2014 Martin Mares + +use common::sense; + +my %renames = (); +open my $f, '<', 'maint/libucw.abi' or die; +my $current; +while (<$f>) { + chomp; + if (/^#\s*(.*)/) { + $current = $1; + } else { + push @{$renames{$current}}, $_; + } +} +close $f; + +for my $g (sort keys %renames) { + my @symbols = sort @{$renames{$g}}; + @symbols or next; + + open my $in, '<', $g or die "Cannot open $g: $!\n"; + open my $out, '>', "$g.new" or die; + my $mode = 0; + my $cmt = 0; + while (<$in>) { + if (!$mode) { + if (m{^/\*$} && !$cmt) { + $cmt = 1; + } elsif (m{^\s} || + m{^#include\s} || + m{^#define\s+_} || + m{^#ifndef\s+_} || + m{^\s*$} + ) { + # Waiting for the right spot + } elsif (m{^#ifdef CONFIG_UCW_CLEAN_ABI$}) { + $mode = 2; + next; + } else { + $mode = 1; + print $out "#ifdef CONFIG_UCW_CLEAN_ABI\n"; + for my $sym (@symbols) { + print $out "#define $sym ucw_$sym\n"; + } + print $out "#endif\n\n"; + } + } elsif ($mode == 2) { + if (m{^$}) { + $mode = 0; + } + next; + } + print $out "$_"; + } + $mode or die; + close $out; + close $in; + system "cmp", "-s", $g, "$g.new"; + if ($?) { + print "### $g: updated\n"; + rename "$g.new", $g or die; + } else { + print "--- $g: not modified\n"; + unlink "$g.new" or die; + } +} diff --git a/libucw/maint/libucw.abi b/libucw/maint/libucw.abi new file mode 100644 index 0000000..3a68d41 --- /dev/null +++ b/libucw/maint/libucw.abi @@ -0,0 +1,911 @@ +# ucw/alloc.h +# ucw/asio.h +asio_init_queue +asio_cleanup_queue +asio_get +asio_submit +asio_wait +asio_put +asio_sync +# ucw/base224.h +base224_encode +base224_decode +# ucw/base64.h +base64_encode +base64_decode +# ucw/bbuf.h +bb_vprintf +bb_printf +bb_vprintf_at +bb_printf_at +# ucw/binheap-node.h +# ucw/binsearch.h +# ucw/bitarray.h +bit_array_xrealloc +bit_array_count_bits +# ucw/bitops.h +bit_fls +ffs_table +# ucw/bitsig.h +bitsig_init +bitsig_free +bitsig_member +bitsig_insert +# ucw/chartype.h +# ucw/clists.h +# ucw/conf-internal.h +cf_obtain_context +cf_op_names +cf_type_names +cf_type_size +cf_interpret_line +cf_init_stack +cf_done_stack +cf_journal_swap +cf_journal_delete +cf_sections +cf_find_subitem +cf_commit_all +cf_add_dirty +cf_load_default +# ucw/conf.h +cf_new_context +cf_delete_context +cf_switch_context +cf_load +cf_reload +cf_set +cf_open_group +cf_close_group +cf_revert +cf_get_pool +cf_malloc +cf_malloc_zero +cf_strdup +cf_printf +cf_set_journalling +cf_journal_block +cf_journal_new_transaction +cf_journal_commit_transaction +cf_journal_rollback_transaction +cf_declare_section +cf_declare_rel_section +cf_init_section +cf_parse_int +cf_parse_u64 +cf_parse_double +cf_parse_ip +cf_find_item +cf_modify_item +cf_dump_sections +# ucw/config.h +# ucw/crc-tables.h +crc_tableil8_o32 +crc_tableil8_o40 +crc_tableil8_o48 +crc_tableil8_o56 +crc_tableil8_o64 +crc_tableil8_o72 +crc_tableil8_o80 +crc_tableil8_o88 +# ucw/crc.h +crc32_init +crc32_hash_buffer +# ucw/daemon.h +daemon_init +daemon_run +daemon_exit +daemon_resolve_ugid +daemon_switch_ugid +daemon_control +# ucw/eltpool.h +ep_new +ep_delete +ep_total_size +ep_alloc_slow +# ucw/fastbuf.h +fb_tie +fbpar_cf +fbpar_def +bopen_file +bopen_file_try +bopen_tmp_file +bopen_fd_name +bfilesync +bopen +bopen_try +bopen_tmp +bfdopen +bfdopen_shared +temp_file_name +open_tmp +bfix_tmp_file +bfdopen_internal +bfmmopen_internal +fbdir_cheat +fbdir_open_fd_internal +bclose_file_helper +bopen_limited_fd +fbmem_create +fbmem_clone_read +fbbuf_init_read +fbbuf_init_write +fbgrow_create +fbgrow_create_mp +fbgrow_reset +fbgrow_rewind +fbgrow_get_buf +fbpool_init +fbpool_start +fbpool_end +fbatomic_open +fbatomic_internal_write +fbnull_open +fbnull_start +fbnull_test +fbmulti_create +fbmulti_append +fbmulti_remove +bconfig +bclose +bthrow +brefill +bspout +bflush +bseek +bsetpos +brewind +bfilesize +bgetc_slow +bpeekc_slow +beof_slow +bputc_slow +bread_slow +bwrite_slow +bgets +bgets0 +bgets_nodie +bgets_bb +bgets_mp +bgets_stk_init +bgets_stk_step +bbcopy_slow +bskip_slow +bprintf +vbprintf +# ucw/fb-socket.h +fbsock_create +# ucw/ff-binary.h +bgetw_slow +bgetw_be_slow +bgetw_le_slow +bgetl_slow +bgetl_be_slow +bgetl_le_slow +bgetq_slow +bgetq_be_slow +bgetq_le_slow +bget5_slow +bget5_be_slow +bget5_le_slow +bputw_slow +bputw_be_slow +bputw_le_slow +bputl_slow +bputl_be_slow +bputl_le_slow +bputq_slow +bputq_be_slow +bputq_le_slow +bput5_slow +bput5_be_slow +bput5_le_slow +# ucw/ff-unicode.h +bget_utf8_slow +bget_utf8_32_slow +bput_utf8_slow +bput_utf8_32_slow +bget_utf16_be_slow +bget_utf16_le_slow +bput_utf16_be_slow +bput_utf16_le_slow +# ucw/ff-varint.h +bget_varint_slow +bput_varint_slow +# ucw/fw-hex.h +fb_wrap_hex_out +fb_wrap_hex_in +# ucw/gary.h +gary_init +gary_set_size +gary_push_helper +gary_fix +gary_empty_hdr +# ucw/getopt.h +cf_def_file +cf_env_file +cf_getopt +reset_getopt +# ucw/hashfunc.h +str_len_aligned +hash_string_aligned +hash_block_aligned +str_len +hash_string +hash_block +hash_string_nocase +# ucw/heap.h +# ucw/io.h +mmap_file +munmap_file +careful_read +careful_write +sync_dir +# ucw/ipaccess.h +ipaccess_cf +ipaccess_check +ip_addrmask_type +ip_addrmask_match +# ucw/lib.h +msg +vmsg +die +vdie +log_title +log_pid +log_die_hook +log_init +log_fork +log_file +assert_failed +assert_failed_msg +assert_failed_noinfo +xmalloc +xrealloc +xfree +xmalloc_zero +xstrdup +page_alloc +page_alloc_zero +page_free +page_realloc +big_alloc +big_alloc_zero +big_free +random_u32 +random_max +random_u64 +random_max_u64 +# ucw/lizard.h +lizard_compress +lizard_decompress +lizard_alloc +lizard_free +lizard_decompress_safe +adler32_update +# ucw/log-internal.h +log_pass_msg +log_streams +log_streams_after +log_stream_default +log_type_names +# ucw/log.h +log_register_type +log_find_type +log_type_name +log_new_stream +log_close_stream +log_add_substream +log_rm_substream +log_set_format +log_stream_by_flags +log_set_default_stream +log_close_all +log_pass_filtered +log_new_file +log_new_fd +log_switch_disable +log_switch_enable +log_switch +log_drop_stderr +log_new_syslog +log_syslog_facility_exists +log_new_configured +log_configured +log_check_configured +# ucw/mainloop.h +main_new +main_delete +main_destroy +main_switch_context +main_current +main_init +main_cleanup +main_teardown +main_loop +main_step +main_debug_context +timer_add +timer_add_rel +timer_del +main_get_time +timer_debug +hook_add +hook_del +hook_debug +file_add +file_chg +file_del +file_debug +block_io_add +block_io_del +block_io_read +block_io_write +block_io_set_timeout +rec_io_add +rec_io_del +rec_io_start_read +rec_io_stop_read +rec_io_set_timeout +rec_io_write +rec_io_parse_line +process_add +process_del +process_fork +process_debug +signal_add +signal_del +signal_debug +# ucw/md5.h +md5_init +md5_update +md5_final +md5_transform +md5_hash_buffer +# ucw/mempool.h +mp_init +mp_new +mp_delete +mp_flush +mp_stats +mp_total_size +mp_shrink +mp_alloc_internal +mp_alloc +mp_alloc_noalign +mp_alloc_zero +mp_start_internal +mp_grow_internal +mp_spread_internal +mp_start +mp_start_noalign +mp_append_utf8_32 +mp_open +mp_realloc +mp_realloc_zero +mp_push +mp_restore +mp_pop +mp_strdup +mp_memdup +mp_multicat +mp_strjoin +mp_str_from_mem +mp_printf +mp_vprintf +mp_printf_append +mp_vprintf_append +# ucw/opt-internal.h +opt_precompute +# ucw/opt.h +opt_parse +opt_failure +opt_help +opt_handle_help +opt_handle_config +opt_handle_set +opt_handle_dumpconfig +opt_conf_hook_internal +cf_def_file +cf_env_file +# ucw/partmap.h +partmap_open +partmap_close +partmap_size +partmap_load +# ucw/prime.h +isprime +nextprime +next_table_prime +prev_table_prime +# ucw/process.h +setproctitle_init +setproctitle +getproctitle +format_exit_status +run_command +exec_command +echo_command +run_command_v +exec_command_v +echo_command_v +# ucw/regex.h +rx_compile +rx_free +rx_match +rx_subst +# ucw/resource.h +rp_new +rp_delete +rp_detach +rp_commit +rp_dump +res_alloc +res_dump +res_free +res_detach +res_add +res_drop +res_for_fd +res_malloc +res_malloc_zero +res_realloc +res_subpool +res_mempool +res_eltpool +# ucw/semaphore.h +# ucw/sha1.h +sha1_init +sha1_update +sha1_final +sha1_hash_buffer +sha1_hmac +sha1_hmac_init +sha1_hmac_update +sha1_hmac_final +# ucw/sighandler.h +handle_signal +unhandle_signal +set_signal_handler +# ucw/signames.h +sig_name_to_number +sig_number_to_name +# ucw/simple-lists.h +simp_append +simp2_append +cf_string_list_config +cf_2string_list_config +# ucw/slists.h +slist_prev +slist_insert_before +slist_remove +# ucw/stkstring.h +stk_array_len +stk_array_join +stk_printf_internal +stk_vprintf_internal +stk_hexdump_internal +stk_fsize_internal +# ucw/string.h +str_format_flags +str_count_char +str_unesc +str_sepsplit +str_wordsplit +str_match_pattern +str_match_pattern_nocase +mem_to_hex +hex_to_mem +str_has_prefix +str_has_suffix +str_hier_prefix +str_hier_suffix +# ucw/strtonum.h +str_to_uintmax +str_to_uint +str_to_uns +str_to_u32 +str_to_u64 +# ucw/table.h +table_init +table_cleanup +table_start +table_end +table_col_str +table_col_generic_format +table_col_printf +table_col_fbstart +table_col_fbend +table_end_row +table_reset_row +table_get_col_idx +table_set_col_opt +table_get_col_list +table_set_col_order +table_set_col_order_by_name +table_col_is_printed +table_set_formatter +table_set_option_value +table_set_option +table_set_gary_options +table_fmt_human_readable +table_fmt_machine_readable +table_fmt_blockline +table_col_int +table_col_uint +table_col_double +table_col_str +table_col_intmax +table_col_s64 +table_col_uintmax +table_col_u64 +table_col_bool +# ucw/tbf.h +tbf_init +tbf_limit +# ucw/threads.h +# ucw/time.h +get_timestamp +timestamp_type +init_timer +get_timer +switch_timer +# ucw/trans.h +trans_init +trans_cleanup +trans_open +trans_get_current +trans_commit +trans_rollback +trans_fold +trans_dump +trans_get_pool +trans_throw +trans_vthrow +trans_throw_exc +trans_caught +trans_current_exc +# ucw/unaligned.h +# ucw/unicode.h +utf8_strlen +utf8_strnlen +# ucw/url.h +url_deescape +url_enescape +url_enescape_friendly +url_split +url_normalize +url_canonicalize +url_pack +url_canon_split_rel +url_auto_canonicalize_rel +url_identify_protocol +url_has_repeated_component +url_error +url_proto_names +# ucw/varint.h +varint_put_big +varint_get_big +# ucw/wildmatch.h +wp_compile +wp_match +wp_min_size +# ucw/workqueue.h +worker_pool_init +worker_pool_cleanup +raw_queue_init +raw_queue_cleanup +raw_queue_put +raw_queue_get +raw_queue_try_get +work_queue_init +work_queue_cleanup +work_submit +work_wait +work_try_wait +# ucw/xtypes.h +xtype_parse_fmt +xtype_format_fmt +xt_str +xt_int +xt_s64 +xt_intmax +xt_uint +xt_u64 +xt_uintmax +xt_bool +xt_double +xtype_unit_parser +# ucw/xtypes-extra.h +table_col_size +table_col_timestamp +xt_size +xt_timestamp +# ucw/sorter/common.h +sorter_trace +sorter_trace_array +sorter_stream_bufsize +sorter_debug +sorter_min_radix_bits +sorter_max_radix_bits +sorter_add_radix_bits +sorter_min_multiway_bits +sorter_max_multiway_bits +sorter_threads +sorter_bufsize +sorter_small_input +sorter_thread_threshold +sorter_thread_chunk +sorter_radix_threshold +sorter_fb_params +sorter_small_fb_params +sorter_run +sorter_alloc +sorter_prepare_buf +sorter_alloc_buf +sorter_free_buf +sbuck_new +sbuck_drop +sbuck_have +sbuck_has_file +sbuck_size +sbuck_read +sbuck_write +sbuck_swap_out +asort_run +asort_start_threads +asort_stop_threads +# charset/charconv.h +conv_init +conv_set_charset +conv_in_to_ucs +conv_ucs_to_out +conv_x_to_ucs +conv_ucs_to_x +conv_x_count +find_charset_by_name +charset_name +# charset/fb-charconv.h +fb_wrap_charconv_in +fb_wrap_charconv_out +# charset/mp-charconv.h +mp_strconv +# charset/stk-charconv.h +stk_strconv_init +stk_strconv_step +# charset/unicat.h +_U_cat +_U_upper +_U_lower +_U_unaccent +Uexpand_lig +# images/color.h +color_space_channels +color_space_name +color_space_id_to_name +color_space_name_to_id +color_get +color_put +color_black +color_white +image_conv_defaults +image_conv +color_illuminant_d50 +color_illuminant_d65 +color_illuminant_e +color_adobe_rgb_info +color_apple_rgb_info +color_cie_rgb_info +color_color_match_rgb_info +color_srgb_info +color_compute_color_space_to_xyz_matrix +color_compute_bradford_matrix +color_compute_color_spaces_conversion_matrix +color_invert_matrix +srgb_to_xyz_exact +xyz_to_srgb_exact +xyz_to_luv_exact +luv_to_xyz_exact +rgb_to_cmyk_exact +cmyk_to_rgb_exact +srgb_to_luv_tab1 +srgb_to_luv_tab2 +srgb_to_luv_tab3 +srgb_to_luv_init +srgb_to_luv_pixels +srgb_to_luv_grid +color_interpolation_table +color_conv_init +color_conv_pixels +# images/duplicates.h +image_dup_context_init +image_dup_context_cleanup +image_dup_estimate_size +image_dup_new +image_dup_compare +# images/error.h +image_trace +# images/images.h +image_context_init +image_context_cleanup +image_context_msg +image_context_vmsg +image_context_msg_default +image_context_msg_silent +image_max_dim +image_max_bytes +image_channels_format_to_name +image_name_to_channels_format +image_new +image_clone +image_destroy +image_clear +image_init_matrix +image_init_subimage +image_scale +image_dimensions_fit_to_box +image_io_init +image_io_cleanup +image_io_reset +image_io_read_header +image_io_read_data +image_io_read +image_io_write +image_format_to_extension +image_extension_to_format +image_file_name_to_format +# images/io-main.h +libjpeg_read_header +libjpeg_read_data +libjpeg_write +libpng_read_header +libpng_read_data +libpng_write +libungif_read_header +libungif_read_data +libmagick_init +libmagick_cleanup +libmagick_read_header +libmagick_read_data +libmagick_write +image_io_read_data_prepare +image_io_read_data_finish +image_io_read_data_break +# images/math.h +fast_div_tab +fast_sqrt_tab +# images/object.h +get_image_obj_info +get_image_obj_thumb +read_image_obj_thumb +put_image_obj_signature +get_image_obj_signature +# images/sig-cmp-gen.h +image_signatures_dist +image_signatures_dist_explain +# images/signature.h +image_sig_min_width +image_sig_min_height +image_sig_prequant_thresholds +image_sig_postquant_min_steps +image_sig_postquant_max_steps +image_sig_postquant_threshold +image_sig_border_size +image_sig_border_bonus +image_sig_inertia_scale +image_sig_textured_threshold +image_sig_compare_method +image_sig_cmp_features_weights +image_vector_dump +image_region_dump +compute_image_signature +image_sig_init +image_sig_preprocess +image_sig_finish +image_sig_cleanup +image_sig_segmentation +image_sig_detect_textured +image_signatures_dist +image_signatures_dist_explain +# ucw-xml/dtd.h +xml_dtd_find_notn +xml_dtd_find_entity +xml_dtd_find_elem +xml_dtd_init +xml_dtd_cleanup +xml_dtd_finish +xml_dtd_find_attr +# ucw-xml/internals.h +xml_throw +xml_do_push +xml_do_pop +xml_push_dom +xml_pop_dom +xml_hash_new +xml_spout_chars +xml_fatal_nested +xml_push_source +xml_push_entity +xml_refill +xml_sources_cleanup +xml_fatal_expected +xml_fatal_expected_white +xml_fatal_expected_quot +xml_parse_white +xml_parse_eq +xml_parse_name +xml_skip_name +xml_parse_nmtoken +xml_parse_system_literal +xml_parse_pubid_literal +xml_parse_char_ref +xml_parse_pe_ref +xml_parse_attr_value +xml_skip_internal_subset +xml_parse_notation_decl +xml_parse_entity_decl +xml_parse_element_decl +xml_parse_attr_list_decl +xml_push_comment +xml_pop_comment +xml_skip_comment +xml_push_pi +xml_pop_pi +xml_skip_pi +xml_validate_attr +xml_ns_cleanup +xml_ns_reset +xml_ns_push_element +xml_ns_pop_element +# ucw-xml/xml.h +xml_node_qname +xml_attr_qname +xml_attr_find +xml_attr_find_ns +xml_attr_value +xml_attr_value_ns +xml_normalize_white +xml_merge_chars +xml_merge_dom_chars +xml_init +xml_cleanup +xml_reset +xml_push_fastbuf +xml_parse +xml_next +xml_next_state +xml_skip_element +xml_row +xml_def_find_entity +xml_def_resolve_entity +xml_warn +xml_error +xml_fatal +xml_ns_enable +xml_ns_by_id +xml_ns_by_name +# ucw-json/json.h +json_new +json_delete +json_reset +json_push +json_pop +json_new_node +json_new_number +json_number_to_int +json_number_to_uint +json_number_to_s64 +json_number_to_u64 +json_new_array +json_array_append +json_new_object +json_object_set +json_object_get +json_parse +json_set_input +json_next_token +json_peek_token +json_next_value +json_write +json_set_output +json_write_value diff --git a/libucw/maint/release b/libucw/maint/release new file mode 100755 index 0000000..e245cbc --- /dev/null +++ b/libucw/maint/release @@ -0,0 +1,42 @@ +#!/usr/bin/perl +# A simple script for making releases of LibUCW +# (c) 2012--2014 Martin Mares + +use strict; +use warnings; +require "maint/release.pm"; + +my $r = new UCW::Release("libucw"); +$r->{"test_compile"} = "./configure && make -j4"; +$r->{"archive_dir"} = $ENV{"HOME"} . "/archives/sw/libucw"; +push @{$r->{"uploads"}}, { "url" => "scp://ftp.ucw.cz/~ftp/pub/libucw/" }; + +$r->ParseOptions; +$r->GetVersionFromGit; +$r->InitDist("maint/tmp"); +my $reldir = $r->GenPackage; +$r->GenFile("README"); +$r->Test; +$r->MakeArchive; +$r->Upload; + +if ($r->{"do_upload"}) { + my $tdir = $r->{"TESTDIR"}; + if (defined $tdir) { + print "Generating documentation\n"; + `cd $tdir && make docs`; + die if $?; + print "Upload of documentation\n"; + $r->Confirm; + `cd $tdir && maint/upload-doc`; + die if $@; + } else { + print "WARNING: With --notest, no documentation is published!\n"; + } + + print "Updating version at MJ's web\n"; + $r->Confirm; + my $wv = $r->{"VERSION"}; + `ssh jw "cd web && bin/release-prog libucw $wv"`; + die if $?; +} diff --git a/libucw/maint/release.pm b/libucw/maint/release.pm new file mode 100644 index 0000000..9790671 --- /dev/null +++ b/libucw/maint/release.pm @@ -0,0 +1,368 @@ +#!/usr/bin/perl +# A simple system for making software releases +# (c) 2003--2012 Martin Mares + +package UCW::Release; +use strict; +use warnings; +use Getopt::Long; + +our $verbose = 0; + +sub new($$) { + my ($class,$basename) = @_; + my $s = { + "PACKAGE" => $basename, + "rules" => [ + # p=preprocess, s=subst, -=discard + '(^|/)(CVS|\.arch-ids|{arch}|\.git|tmp)/' => '-', + '\.(lsm|spec)$' => 'ps', + '(^|/)README$' => 's' + ], + "conditions" => { + # Symbols, which can serve as conditions for the preprocessor + }, + "DATE" => `date '+%Y-%m-%d' | tr -d '\n'`, + "LSMDATE" => `date '+%y%m%d' | tr -d '\n'`, + "distfiles" => [ + # Files to be uploaded + ], + "uploads" => [ + # Locations where we want to upload, e.g.: + # { "url" => "ftp://metalab.unc.edu/incoming/linux/", + # "filter" => '(\.tar\.gz|\.lsm)$', } + ], + "test_compile" => "make", + # "archive_dir" => "/tmp/archives/$basename", + # Options + "do_test" => 1, + "do_patch" => 0, + "diff_against" => "", + "do_upload" => 1, + "do_sign" => 1, + }; + bless $s; + return $s; +} + +sub Confirm($) { + my ($s) = @_; + print " "; ; +} + +sub GetVersionFromGit($) { + my ($s) = @_; + return if defined $s->{"VERSION"}; + my $desc = `git describe --tags`; die "git describe failed\n" if $?; + chomp $desc; + my ($ver, $rest) = ($desc =~ /^v([0-9.]+)(.*)/) or die "Failed to understand output of git describe: $desc\n"; + print "Detected version $ver from git description $desc\n"; + if ($rest ne '') { + print "WARNING: We are several commits past release tag... "; + $s->Confirm; + } + $s->{"VERSION"} = $ver; + return $ver; +} + +sub GetVersionFromFile($) { + my ($s,$file,$rx) = @_; + return if defined $s->{"VERSION"}; + open F, $file or die "Unable to open $file for version autodetection"; + while () { + chomp; + if (/$rx/) { + $s->{"VERSION"} = $1; + print "Detected version $1 from $file\n" if $verbose; + last; + } + } + close F; + if (!defined $s->{"VERSION"}) { die "Failed to auto-detect version"; } + return $s->{"VERSION"}; +} + +sub GetVersionsFromChangelog($) { + my ($s,$file,$rx) = @_; + return if defined $s->{"VERSION"}; + open F, $file or die "Unable to open $file for version autodetection"; + while () { + chomp; + if (/$rx/) { + if (!defined $s->{"VERSION"}) { + $s->{"VERSION"} = $1; + print "Detected version $1 from $file\n" if $verbose; + } elsif ($s->{"VERSION"} eq $1) { + # do nothing + } else { + $s->{"OLDVERSION"} = $1; + print "Detected previous version $1 from $file\n" if $verbose; + last; + } + } + } + close F; + if (!defined $s->{"VERSION"}) { die "Failed to auto-detect version"; } + return $s->{"VERSION"}; +} + +sub InitDist($) { + my ($s,$dd) = @_; + $s->{"DISTDIR"} = $dd; + print "Initializing dist directory $dd\n" if $verbose; + `rm -rf $dd`; die if $?; + `mkdir -p $dd`; die if $?; + + if ($s->{"archive_dir"}) { + unshift @{$s->{"uploads"}}, { "url" => "file:" . $s->{"archive_dir"} }; + } +} + +sub ExpandVar($$) { + my ($s,$v) = @_; + if (defined $s->{$v}) { + return $s->{$v}; + } else { + die "Reference to unknown variable $v"; + } +} + +sub TransformFile($$$) { + my ($s,$file,$action) = @_; + + my $preprocess = ($action =~ /p/); + my $subst = ($action =~ /s/); + my $dest = "$file.dist"; + open I, "<", $file or die "open($file): $?"; + open O, ">", "$dest" or die "open($dest): $!"; + my @ifs = (); # stack of conditions, 1=satisfied + my $empty = 0; # last line was empty + my $is_makefile = ($file =~ /(Makefile|.mk)$/); + while () { + if ($subst) { + s/@([0-9A-Za-z_]+)@/$s->ExpandVar($1)/ge; + } + if ($preprocess) { + if (/^#/ || $is_makefile) { + if (/^#?ifdef\s+(\w+)/) { + if (defined ${$s->{"conditions"}}{$1}) { + push @ifs, ${$s->{"conditions"}}{$1}; + next; + } + push @ifs, 0; + } elsif (/^#ifndef\s+(\w+)/) { + if (defined ${$s->{"conditions"}}{$1}) { + push @ifs, -${$s->{"conditions"}}{$1}; + next; + } + push @ifs, 0; + } elsif (/^#if\s+/) { + push @ifs, 0; + } elsif (/^#?endif/) { + my $x = pop @ifs; + defined $x or die "Improper nesting of conditionals"; + $x && next; + } elsif (/^#?else/) { + my $x = pop @ifs; + defined $x or die "Improper nesting of conditionals"; + push @ifs, -$x; + $x && next; + } + } + @ifs && $ifs[$#ifs] < 0 && next; + if (/^$/) { + $empty && next; + $empty = 1; + } else { $empty = 0; } + } + print O; + } + close O; + close I; + ! -x $file or chmod(0755, "$dest") or die "chmod($dest): $!"; + rename $dest, $file or "rename($dest,$file): $!"; +} + +sub GenPackage($) { + my ($s) = @_; + $s->{"PKG"} = $s->{"PACKAGE"} . "-" . $s->{"VERSION"}; + my $dd = $s->{"DISTDIR"}; + my $pkg = $s->{"PKG"}; + my $dir = "$dd/$pkg"; + print "Generating $dir\n"; + + system "git archive --format=tar --prefix=$dir/ HEAD | tar xf -"; + die if $?; + + my @files = `cd $dir && find . -type f`; + die if $?; + + for my $f (@files) { + chomp $f; + $f =~ s/^\.\///; + my $action = ""; + my @rules = @{$s->{"rules"}}; + while (@rules) { + my $rule = shift @rules; + my $act = shift @rules; + if ($f =~ $rule) { + $action = $act; + last; + } + } + if ($action eq '') { + } elsif ($action =~ /-/) { + unlink "$dir/$f" or die "Cannot unlink $dir/$f: $!\n"; + print "$f (unlinked)\n" if $verbose; + } else { + print "$f ($action)\n" if $verbose; + $s->TransformFile("$dir/$f", $action); + } + } + + return $dir; +} + +sub GenFile($$) { + my ($s,$f) = @_; + my $sf = $s->{"DISTDIR"} . "/" . $s->{"PKG"} . "/$f"; + my $df = $s->{"DISTDIR"} . "/$f"; + print "Generating $df\n"; + `cp $sf $df`; die if $?; + push @{$s->{"distfiles"}}, $df; +} + +sub SignFile($$) { + my ($s, $file) = @_; + $s->{'do_sign'} or return; + print "Signing $file\n"; + system "gpg", "--armor", "--detach-sig", "$file"; + die if $?; + rename "$file.asc", "$file.sign" or die "No signature produced!?\n"; + push @{$s->{"distfiles"}}, "$file.sign"; +} + +sub MakeArchive($) { + my ($s) = @_; + my $dd = $s->{"DISTDIR"}; + my $pkg = $s->{"PKG"}; + + print "Creating $dd/$pkg.tar\n"; + my $tarvv = $verbose ? "vv" : ""; + `cd $dd && tar c${tarvv}f $pkg.tar $pkg >&2`; die if $?; + + print "Creating $dd/$pkg.tar.gz\n"; + `gzip <$dd/$pkg.tar >$dd/$pkg.tar.gz`; die if $?; + push @{$s->{"distfiles"}}, "$dd/$pkg.tar.gz"; + + # print "Creating $dd/$pkg.tar.bz2\n"; + # `bzip2 <$dd/$pkg.tar >$dd/$pkg.tar.bz2`; die if $?; + # push @{$s->{"distfiles"}}, "$dd/$pkg.tar.bz2"; + + $s->SignFile("$dd/$pkg.tar"); +} + +sub ParseOptions($) { + my ($s) = @_; + GetOptions( + "verbose!" => \$verbose, + "test!" => \$s->{"do_test"}, + "patch!" => \$s->{"do_patch"}, + "diff-against=s" => \$s->{"diff_against"}, + "version=s" => \$s->{"VERSION"}, + "upload!" => \$s->{"do_upload"}, + "sign!" => \$s->{"do_sign"}, + ) || die "Syntax: release [--verbose] [--test] [--nopatch] [--version=] [--diff-against=] [--noupload] [--nosign]"; +} + +sub Test($) { + my ($s) = @_; + $s->{"do_test"} or return; + my $dd = $s->{"DISTDIR"}; + my $pkg = $s->{"PKG"}; + my $tdir = "$dd/$pkg.test"; + $s->{"TESTDIR"} = $tdir; + `cp -a $dd/$pkg $tdir`; die if $?; + my $log = "$tdir.log"; + print "Doing a test compilation\n"; + my $make = $s->{"test_compile"}; + `( cd $tdir && $make ) >$log 2>&1`; + die "There were errors. Please inspect $log" if $?; + `grep -q [Ww]arning $log`; + $? or print "There were warnings! Please inspect $log.\n"; +} + +sub MakePatch($) { + my ($s) = @_; + $s->{"do_patch"} or return; + my $dd = $s->{"DISTDIR"}; + my $pkg1 = $s->{"PKG"}; + my $oldver; + if ($s->{"diff_against"} ne "") { + $oldver = $s->{"diff_against"}; + } elsif (defined $s->{"OLDVERSION"}) { + $oldver = $s->{"OLDVERSION"}; + } else { + print "WARNING: No previous version known. No patch generated.\n"; + return; + } + my $pkg0 = $s->{"PACKAGE"} . "-" . $oldver; + + my $oldarch = $s->{"archivedir"} . "/" . $pkg0 . ".tar.gz"; + -f $oldarch or die "MakePatch: $oldarch not found"; + print "Unpacking $pkg0 from $oldarch\n"; + `cd $dd && tar xzf $oldarch`; die if $?; + + my $diff = $s->{"PACKAGE"} . "-" . $oldver . "-" . $s->{"VERSION"} . ".diff.gz"; + print "Creating a patch from $pkg0 to $pkg1: $diff\n"; + `cd $dd && diff -ruN $pkg0 $pkg1 | gzip >$diff`; die if $?; + push @{$s->{"distfiles"}}, "$dd/$diff"; + $s->SignFile("$dd/$diff"); +} + +sub Upload($) { + my ($s) = @_; + $s->{"do_upload"} or return; + foreach my $u (@{$s->{"uploads"}}) { + my $url = $u->{"url"}; + print "Upload to $url :\n"; + my @files = (); + my $filter = $u->{"filter"} || ".*"; + foreach my $f (@{$s->{"distfiles"}}) { + if ($f =~ $filter) { + print "\t$f\n"; + push @files, $f; + } + } + $s->Confirm; + if ($url =~ m@^file:(.*)@) { + my $dir = $1; + $dir =~ s@^///@/@; + `cp @files $dir/`; die if $?; + } elsif ($url =~ m@^scp://([^/]+)(.*)@) { + $, = " "; + my $host = $1; + my $dir = $2; + $dir =~ s@^/~@~@; + $dir =~ s@^/\./@@; + my $cmd = "scp @files $host:$dir\n"; + `$cmd`; die if $?; + } elsif ($url =~ m@ftp://([^/]+)(.*)@) { + my $host = $1; + my $dir = $2; + open FTP, "|ftp -v $host" or die; + print FTP "cd $dir\n"; + foreach my $f (@files) { + (my $ff = $f) =~ s@.*\/([^/].*)@$1@; + print FTP "put $f $ff\n"; + } + print FTP "bye\n"; + close FTP; + die if $?; + } else { + die "Don't know how to handle this URL scheme"; + } + } +} + +1; diff --git a/libucw/maint/upload-doc b/libucw/maint/upload-doc new file mode 100755 index 0000000..6e1c0c0 --- /dev/null +++ b/libucw/maint/upload-doc @@ -0,0 +1,10 @@ +#!/bin/bash +# Build and upload LibUCW documentation and web pages +# (c) 2012--2015 Martin Mares + +set -e +rm -rf maint/tmp/doc +mkdir -p maint/tmp/doc +make install-libucw-docs install-libucw-json-docs install-libucw-xml-docs PREFIX=. INSTALL_DOC_DIR=maint/tmp/doc +rsync -avz --delete maint/tmp/doc/ jw:/var/www/libucw/doc/ +rsync -avz --no-times --no-perms --executability --omit-dir-times WWW/ jw:/var/www/libucw/ --exclude=doc --exclude='*.swp' diff --git a/libucw/ucw-json/Makefile b/libucw/ucw-json/Makefile new file mode 100644 index 0000000..ff8dcb3 --- /dev/null +++ b/libucw/ucw-json/Makefile @@ -0,0 +1,49 @@ +# Makefile for the UCW JSON library +# (c) 2015 Martin Mares + +DIRS+=ucw-json +PROGS+=$(o)/ucw-json/json-test + +LIBJSON_MODS=json parse format +LIBJSON_MOD_PATHS=$(addprefix $(o)/ucw-json/,$(LIBJSON_MODS)) +LIBJSON_INCLUDES=json.h +LIBJSON_DEPS=$(LIBUCW) +export LIBJSON_LIBS=-lm + +$(o)/ucw-json/libucw-json$(LV).a: $(addsuffix .o,$(LIBJSON_MOD_PATHS)) +$(o)/ucw-json/libucw-json$(LV).so: $(addsuffix .oo,$(LIBJSON_MOD_PATHS)) $(LIBJSON_DEPS) +$(o)/ucw-json/libucw-json$(LV).so: SONAME_SUFFIX=.0 +$(o)/ucw-json/libucw-json$(LV).so: LIBS+=$(LIBJSON_LIBS) +$(o)/ucw-json/libucw-json.pc: $(LIBJSON_DEPS) + +ifdef CONFIG_INSTALL_API +$(o)/ucw-json/libucw-json.pc: $(addprefix $(o)/ucw-json/libucw-json$(LV),.a .so) +endif + +TESTS+=$(o)/ucw-json/json-test.test +$(o)/ucw-json/json-test: $(o)/ucw-json/json-test.o $(LIBJSON) $(LIBUCW) +$(o)/ucw-json/json-test.test: $(o)/ucw-json/json-test + +API_LIBS+=libucw-json +API_INCLUDES+=$(o)/ucw-json/.include-stamp +$(o)/ucw-json/.include-stamp: $(addprefix $(s)/ucw-json/,$(LIBJSON_INCLUDES)) +$(o)/ucw-json/.include-stamp: IDST=ucw-json +run/lib/pkgconfig/libucw-json.pc: $(o)/ucw-json/libucw-json.pc + +INSTALL_TARGETS+=install-libucw-json-lib +install-libucw-json-lib: + install -d -m 755 $(DESTDIR)$(INSTALL_LIB_DIR) + install -m 644 run/lib/libucw-json$(LV).so.0 $(DESTDIR)$(INSTALL_LIB_DIR)/libucw-json$(LV).so.0.0 + ln -sf libucw-json$(LV).so.0.0 $(DESTDIR)$(INSTALL_LIB_DIR)/libucw-json$(LV).so.0 +.PHONY: install-libucw-json-lib + +INSTALL_TARGETS+=install-libucw-json-api +install-libucw-json-api: + install -d -m 755 $(DESTDIR)$(INSTALL_INCLUDE_DIR)/ucw-json $(DESTDIR)$(INSTALL_LIB_DIR) $(DESTDIR)$(INSTALL_PKGCONFIG_DIR) + install -m 644 run/lib/pkgconfig/libucw-json.pc $(DESTDIR)$(INSTALL_PKGCONFIG_DIR) + install -m 644 $(addprefix run/include/ucw-json/,$(LIBJSON_INCLUDES)) $(DESTDIR)$(INSTALL_INCLUDE_DIR)/ucw-json + ln -sf libucw-json$(LV).so.0.0 $(DESTDIR)$(INSTALL_LIB_DIR)/libucw-json$(LV).so + install -m 644 run/lib/libucw-json$(LV).a $(DESTDIR)$(INSTALL_LIB_DIR) +.PHONY: install-libucw-json-api + +include $(s)/ucw-json/doc/Makefile diff --git a/libucw/ucw-json/doc/Makefile b/libucw/ucw-json/doc/Makefile new file mode 100644 index 0000000..0d7468e --- /dev/null +++ b/libucw/ucw-json/doc/Makefile @@ -0,0 +1,20 @@ +# Makefile for the UCW-JSON documentation + +DIRS+=ucw-json/doc + +JSON_DOCS=json index +JSON_DOCS_HTML=$(addprefix $(o)/ucw-json/doc/,$(addsuffix .html,$(JSON_DOCS))) + +DOCS+=$(JSON_DOCS_HTML) +DOC_MODULES+=ucw-json +$(JSON_DOCS_HTML): DOC_MODULE=ucw-json + +ifdef CONFIG_DOC +INSTALL_TARGETS+=install-libucw-json-docs +endif + +.PHONY: install-libucw-json-docs + +install-libucw-json-docs: $(JSON_DOCS_HTML) + install -d -m 755 $(DESTDIR)$(INSTALL_DOC_DIR)/ucw-json/ + install -m 644 $^ $(DESTDIR)$(INSTALL_DOC_DIR)/ucw-json/ diff --git a/libucw/ucw-json/doc/index.txt b/libucw/ucw-json/doc/index.txt new file mode 100644 index 0000000..de70808 --- /dev/null +++ b/libucw/ucw-json/doc/index.txt @@ -0,0 +1,15 @@ +The UCW-JSON library +==================== + +This library provides a light-weight JSON parser and generator built atop <<../ucw/index:,LibUCW>>. + +It follows the specification of JSON in RFC 7159. + +Modules +------- +- <> + +Authors +------- + +- Martin MareÅ¡ diff --git a/libucw/ucw-json/doc/json.txt b/libucw/ucw-json/doc/json.txt new file mode 100644 index 0000000..de4725e --- /dev/null +++ b/libucw/ucw-json/doc/json.txt @@ -0,0 +1,7 @@ +JSON Parser and Generator +========================= + +ucw-json/json.h +--------------- + +!!ucw-json/json.h diff --git a/libucw/ucw-json/format.c b/libucw/ucw-json/format.c new file mode 100644 index 0000000..227a26b --- /dev/null +++ b/libucw/ucw-json/format.c @@ -0,0 +1,165 @@ +/* + * UCW JSON Library -- Formatter + * + * (c) 2015 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +void json_set_output(struct json_context *js, struct fastbuf *fb) +{ + js->out_fb = fb; +} + +static void write_string(struct json_context *js, const char *p) +{ + struct fastbuf *fb = js->out_fb; + + bputc(fb, '"'); + for (;;) + { + uint u; + p = utf8_32_get(p, &u); + if (!u) + break; + if (u == '"' || u == '\\') + { + bputc(fb, '\\'); + bputc(fb, u); + } + else if (u < 0x20) + { + // We avoid "\f" nor "\b" and use "\uXXXX" instead + switch (u) + { + case 0x09: bputs(fb, "\\t"); break; + case 0x0a: bputs(fb, "\\n"); break; + case 0x0d: bputs(fb, "\\r"); break; + default: + bprintf(fb, "\\u%04x", u); + } + } + else if (u >= 0x007f && (js->format_options & JSON_FORMAT_ESCAPE_NONASCII)) + { + if (u < 0x10000) + bprintf(fb, "\\u%04x", u); + else if (u < 0x110000) + bprintf(fb, "\\u%04x\\u%04x", 0xd800 + ((u - 0x10000) >> 10), 0xdc00 + (u & 0x3ff)); + else + bprintf(fb, "\\u%04x", UNI_REPLACEMENT); + } + else + bput_utf8_32(fb, u); + } + bputc(fb, '"'); +} + +static void write_number(struct fastbuf *fb, double val) +{ + bprintf(fb, "%.*g", DBL_DIG+1, val); +} + +static bool want_indent_p(struct json_context *js) +{ + return (js->format_options & JSON_FORMAT_INDENT); +} + +static void write_space(struct json_context *js) +{ + struct fastbuf *fb = js->out_fb; + + if (want_indent_p(js)) + { + bputc(fb, '\n'); + for (uint i=0; i < js->out_indent; i++) + bputc(fb, '\t'); + } + else + bputc(fb, ' '); +} + +void json_write_value(struct json_context *js, struct json_node *n) +{ + struct fastbuf *fb = js->out_fb; + + switch (n->type) + { + case JSON_NULL: + bputs(fb, "null"); + break; + case JSON_BOOLEAN: + bputs(fb, (n->boolean ? "true" : "false")); + break; + case JSON_NUMBER: + write_number(fb, n->number); + break; + case JSON_STRING: + write_string(js, n->string); + break; + case JSON_ARRAY: + { + if (!GARY_SIZE(n->elements)) + bputs(fb, "[]"); + else + { + bputc(fb, '['); + js->out_indent++; + for (size_t i=0; i < GARY_SIZE(n->elements); i++) + { + if (i) + bputc(fb, ','); + write_space(js); + json_write_value(js, n->elements[i]); + } + js->out_indent--; + write_space(js); + bputc(fb, ']'); + } + break; + } + case JSON_OBJECT: + { + if (!GARY_SIZE(n->pairs)) + bputs(fb, "{}"); + else + { + bputc(fb, '{'); + js->out_indent++; + for (size_t i=0; i < GARY_SIZE(n->pairs); i++) + { + if (i) + bputc(fb, ','); + write_space(js); + struct json_pair *p = &n->pairs[i]; + write_string(js, p->key); + bputs(fb, ": "); + json_write_value(js, p->value); + } + js->out_indent--; + write_space(js); + bputc(fb, '}'); + } + break; + } + default: + ASSERT(0); + } +} + +void json_write(struct json_context *js, struct fastbuf *fb, struct json_node *n) +{ + json_set_output(js, fb); + json_write_value(js, n); + bputc(fb, '\n'); +} diff --git a/libucw/ucw-json/json-test.c b/libucw/ucw-json/json-test.c new file mode 100644 index 0000000..5d4c287 --- /dev/null +++ b/libucw/ucw-json/json-test.c @@ -0,0 +1,123 @@ +/* + * UCW JSON Library -- Tests + * + * (c) 2015 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include +#include +#include + +static int opt_read; +static int opt_write; +static int opt_escape; +static int opt_indent; +static int opt_read_hex; +static int opt_write_hex; +static int opt_stream; + +static struct opt_section options = { + OPT_ITEMS { + OPT_HELP("Test program for UCW JSON library."), + OPT_HELP("Usage: json-test [options]"), + OPT_HELP(""), + OPT_HELP("Options:"), + OPT_HELP_OPTION, + OPT_BOOL('r', "read", opt_read, 0, "\tRead JSON from standard input"), + OPT_BOOL('R', "read-hex", opt_read_hex, 0, "\tRead JSON, interpreting as hex escapes"), + OPT_BOOL('w', "write", opt_write, 0, "\tWrite JSON to standard output"), + OPT_BOOL('W', "write-hex", opt_write_hex, 0, "\tWrite JSON, print non-ASCII as hex escapes"), + OPT_BOOL('e', "escape", opt_escape, 0, "\tEscape non-ASCII characters in strings"), + OPT_BOOL('i', "indent", opt_indent, 0, "\tIndent output"), + OPT_BOOL('s', "stream", opt_stream, 0, "\tTest of streaming mode"), + OPT_END + } +}; + +static struct json_node *do_parse(struct json_context *js, struct fastbuf *fb) +{ + struct json_node *n; + TRANS_TRY + { + n = json_parse(js, fb); + } + TRANS_CATCH(x) + { + fprintf(stderr, "ERROR: %s\n", x->msg); + exit(1); + } + TRANS_END; + return n; +} + +static void test_stream(struct json_context *js) +{ + struct fastbuf *in = bfdopen_shared(0, 65536); + struct fastbuf *out = bfdopen_shared(1, 65536); + json_set_input(js, in); + json_set_output(js, out); + + for (;;) + { + json_push(js); + struct json_node *n = json_next_value(js); + if (!n) + break; + json_write_value(js, n); + bputc(out, '\n'); + json_pop(js); + } + + bclose(out); + bclose(in); +} + +int main(int argc UNUSED, char **argv) +{ + opt_parse(&options, argv+1); + + struct json_context *js = json_new(); + struct json_node *n = NULL; + + if (opt_escape) + js->format_options |= JSON_FORMAT_ESCAPE_NONASCII; + if (opt_indent) + js->format_options |= JSON_FORMAT_INDENT; + + if (opt_stream) + { + test_stream(js); + json_delete(js); + return 0; + } + + if (opt_read || opt_read_hex) + { + struct fastbuf *fb = bfdopen_shared(0, 65536); + if (opt_read_hex) + fb = fb_wrap_hex_in(fb); + n = do_parse(js, fb); + bclose(fb); + } + + if (!n) + n = json_new_number(js, 42); + + if (opt_write || opt_write_hex) + { + struct fastbuf *fb = bfdopen_shared(1, 65536); + if (opt_write_hex) + fb = fb_wrap_hex_out(fb); + json_write(js, fb, n); + bclose(fb); + } + + json_delete(js); + return 0; +} diff --git a/libucw/ucw-json/json-test.t b/libucw/ucw-json/json-test.t new file mode 100644 index 0000000..db747e7 --- /dev/null +++ b/libucw/ucw-json/json-test.t @@ -0,0 +1,394 @@ +# Tests for the JSON library +# (c) 2015 Martin Mares + +### Literals ### + +Name: Null +Run: ../obj/ucw-json/json-test -rw +In: null +Out: null + +Name: True +In: true +Out: true + +Name: False +In: false +Out: false + +Name: Invalid literal 1 +In: lomikel +Exit: 1 +Err: ERROR: Invalid literal name at line 1:8 + +Name: Invalid literal 2 +In: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +Exit: 1 +Err: ERROR: Invalid literal name at line 1:101 + +### Numbers ### + +Name: Plain number +In: 42 +Out: 42 + +Name: Negative number +In: -42 +Out: -42 + +Name: Zero number +In: 0 +Out: 0 + +# The largest number guaranteed to be precise by RFC 7159 +Name: Large number +In: 9007199254740991 +Out: 9007199254740991 + +Name: Fractional number 1 +In: 12345.54321 +Out: 12345.54321 + +Name: Fractional number 2 +In: 0.333333333 +Out: 0.333333333 + +Name: Number in scientific notation 1 +In: 3.14159e20 +Out: 3.14159e+20 + +Name: Number in scientific notation 2 +In: 3.14159e+20 +Out: 3.14159e+20 + +Name: Number in scientific notation 3 +In: 3.14159e-20 +Out: 3.14159e-20 + +Name: No leading zero allowed +In: 01234 +Exit: 1 +Err: ERROR: Malformed number: leading zero at line 1:2 + +Name: Bare sign is not a number +In: - +Exit: 1 +Err: ERROR: Malformed number: just minus at line 1:2 + +Name: No leading decimal point allowed +In: .1234 +Exit: 1 +Err: ERROR: Numbers must start with a digit at line 1:1 + +Name: Digits after decimal point required +In: 1234. +Exit: 1 +Err: ERROR: Malformed number: no digits after decimal point at line 1:6 + +Name: Exponent part must be non-empty 1 +In: 1e +Exit: 1 +Err: ERROR: Malformed number: empty exponent at line 1:3 + +Name: Exponent part must be non-empty 2 +In: 1e+ +Exit: 1 +Err: ERROR: Malformed number: empty exponent at line 1:4 + +Name: Number out of range +In: 1e3000000 +Exit: 1: +Err: ERROR: Number out of range at line 1:10 + +### Strings ### + +Name: Plain string +In: "foo" +Out: "foo" + +Name: Empty string +In: "" +Out: "" + +Name: UTF-8 string +In: "Å¡elmiÄka" +Out: "Å¡elmiÄka" + +Name: Unterminated string +In: "infinity +Exit: 1 +Err: ERROR: Unterminated string at line 1:10 + +Name: Multi-line string +In: "infi + nity" +Exit: 1 +Err: ERROR: Unterminated string at line 1:6 + +# Some characters are written as \uXXXX on output, which is correct +Name: Escaped characters +In: "\"\\\/\b\f\n\r\t" +Out: "\"\\/\u0008\u000c\n\r\t" + +Name: Improper escaped characters +In: "\a" +Exit: 1 +Err: ERROR: Invalid backslash sequence in string at line 1:3 + +Name: Unicode escapes +In: "\u0041\u010d\u010D\u0001" +Out: "AÄÄ\u0001" + +Name: Improper Unicode escapes 1 +In: "\u" +Exit: 1 +Err: ERROR: Invalid Unicode escape sequence at line 1:4 + +Name: Improper Unicode escapes 2 +In: "\u +Exit: 1 +Err: ERROR: Invalid Unicode escape sequence at line 1:4 + +Name: Improper Unicode escapes 3 +In: "\uABCZ" +Exit: 1 +Err: ERROR: Invalid Unicode escape sequence at line 1:7 + +### Unicode magic ### + +# TAB is forbidden +Name: Control characters 1 +Run: ../obj/ucw-json/json-test -RW +In: "<09>" +Exit: 1 +Err: ERROR: Invalid control character in string at line 1:2 + +# Surprisingly, DEL is not +Name: Control characters 2 +In: "<7f>" +Out: "<7f>"<0a> + +# U+31234 +Name: UTF-8 outside BMP +In: "<88>" +Out: "<88>"<0a> + +Name: Private use characters in BMP +In: "<80><80>" +Exit: 1 +Err: ERROR: Invalid private-use character in string at line 1:2 + +Name: UTF-8 outside BMP +In: "<88>" +Out: "<88>"<0a> + +Name: Private use characters outside BMP +In: "<80><80>" +Exit: 1 +Err: ERROR: Invalid private-use character in string at line 1:2 + +# U+FF0000 +Name: UTF-8 outside UCS +In: "<80><80>" +Exit: 1 +Err: ERROR: Invalid non-Unicode character in string at line 1:2 + +# U+D800 +Name: UTF-8 surrogate 1 +In: "<80>" +Exit: 1 +Err: ERROR: Invalid surrogate character in string at line 1:2 + +# U+DC00 +Name: UTF-8 surrogate 2 +In: "<80>" +Exit: 1 +Err: ERROR: Invalid surrogate character in string at line 1:2 + +# Denormalized U+07FF +Name: UTF-8 denormalized +In: "" +Exit: 1 +Err: ERROR: Malformed UTF-8 character at line 1:1 + +# U+FEFF +Name: UTF-8 BOM +In: +Exit: 1 +Err: ERROR: Misplaced byte-order mark, complain in Redmond at line 1:1 + +Name: Escaped NUL +In: "\u0000" +Exit: 1 +Err: ERROR: Zero bytes in strings are not supported at line 1:7 + +Name: Escaped surrogate +In: "\udaff\udcba" +Out: "<8f>"<0a> + +Name: Escaped surrogate malformation 1 +In: "\udaff" +Exit: 1 +Err: ERROR: Escaped high surrogate codepoint must be followed by a low surrogate codepoint at line 1:8 + +Name: Escaped surrogate malformation 2 +In: "\udcff" +Exit: 1 +Err: ERROR: Invalid escaped surrogate codepoint at line 1:7 + +Name: Escaped low private-use character +In: "\uedac" +Exit: 1 +Err: ERROR: Invalid escaped private-use character at line 1:7 + +Name: Escaped high private-use character +In: "\udbff\udc00" +Exit: 1 +Err: ERROR: Invalid escaped private-use character at line 1:13 + +# Switch back to normal mode +Name: Invalid ASCII character +Run: ../obj/ucw-json/json-test -rw +In: @ +Exit: 1 +Err: ERROR: Invalid character at line 1:1 + +### Arrays ### + +Name: Empty array +In: [] +Out: [] + +Name: One-element array +In: [1] +Out: [ 1 ] + +Name: Two-element array +In: [1,2] +Out: [ 1, 2 ] + +Name: Nested arrays +In: [[1,2],["a","b"]] +Out: [ [ 1, 2 ], [ "a", "b" ] ] + +Name: Multi-line array +In: [ + "a", null,false + ,false + ] +Out: [ "a", null, false, false ] + +Name: Unterminated array 1 +In: [1,2 +Exit: 1 +Err: ERROR: Comma or right bracket expected at line 2:0 + +Name: Unterminated array 2 +In: [1,2, +Exit: 1 +Err: ERROR: Unterminated array at line 2:0 + +Name: Extra comma not allowed +In: [1,2,] +Exit: 1 +Err: ERROR: Misplaced end of array at line 1:6 + +Name: Solitary comma not allowed +In: , +Exit: 1 +Err: ERROR: Misplaced comma at line 1:1 + +Name: Deeply nested array +In: [[[[[[[[[[]]]]]]]]]] +Out: [ [ [ [ [ [ [ [ [ [] ] ] ] ] ] ] ] ] ] + +Name: Deeply unclosed array +In: [[[[[[[[[[] +Exit: 1 +Err: ERROR: Comma or right bracket expected at line 2:0 + +Name: Missing comma +In: [1 2] +Exit: 1 +Err: ERROR: Comma or right bracket expected at line 1:5 + +### Objects ### + +Name: Empty object +In: {} +Out: {} + +Name: One-entry object +In: {"a":"b"} +Out: { "a": "b" } + +Name: Two-entry object +In: {"a":1,"b":2} +Out: { "a": 1, "b": 2 } + +Name: Nested objects +In: { + "a": [1,2], + "b": { "x": true, "y": false } + } +Out: { "a": [ 1, 2 ], "b": { "x": true, "y": false } } + +Name: Unterminated object 1 +In: { +Exit: 1 +Err: ERROR: Unterminated object at line 2:0 + +Name: Unterminated object 2 +In: { "a" +Exit: 1 +Err: ERROR: Colon expected at line 2:0 + +Name: Unterminated object 3 +In: { "a": +Exit: 1 +Err: ERROR: Unterminated object at line 2:0 + +Name: Unterminated object 4 +In: { "a":1, +Exit: 1 +Err: ERROR: Unterminated object at line 2:0 + +Name: Extra comma not allowed in objects +In: { "a":1, } +Exit: 1 +Err: ERROR: Misplaced end of object at line 1:10 + +Name: Non-string key +In: {1:2} +Exit: 1 +Err: ERROR: Object key must be a string at line 1:3 + +Name: Repeated key +In: {"a":1, "a":2} +Exit: 1 +Err: ERROR: Key already set at line 1:14 + +Name: Missing object comma +In: {"a":1 "b":2} +Exit: 1 +Err: ERROR: Comma expected at line 1:10 + +### Top-level problems ### + +Name: Empty input +Exit: 1 +Err: ERROR: Empty input at line 1:0 + +Name: Multiple values +In: 1 2 +Exit: 1 +Err: ERROR: Only one top-level value allowed at line 1:4 + +### Streaming interface ### + +Name: Streaming +Run: ../obj/ucw-json/json-test -s +In: 123 [true, false] "Rincewind" +Out: 123 + [ true, false ] + "Rincewind" diff --git a/libucw/ucw-json/json.c b/libucw/ucw-json/json.c new file mode 100644 index 0000000..780478d --- /dev/null +++ b/libucw/ucw-json/json.c @@ -0,0 +1,141 @@ +/* + * UCW JSON Library -- Data Representation + * + * (c) 2015 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include + +#include +#include +#include + +static void json_init(struct json_context *js) +{ + mp_save(js->pool, &js->init_state); + js->trivial_token = json_new_node(js, JSON_INVALID); +} + +struct json_context *json_new(void) +{ + struct mempool *mp = mp_new(4096); + struct json_context *js = mp_alloc_zero(mp, sizeof(*js)); + js->pool = mp; + json_init(js); + return js; +} + +void json_delete(struct json_context *js) +{ + mp_delete(js->pool); +} + +void json_reset(struct json_context *js) +{ + struct mempool *mp = js->pool; + mp_restore(mp, &js->init_state); + bzero(js, sizeof(*js)); + js->pool = mp; + json_init(js); +} + +void json_push(struct json_context *js) +{ + ASSERT(!js->next_token); + mp_push(js->pool); +} + +void json_pop(struct json_context *js) +{ + ASSERT(!js->next_token); + mp_pop(js->pool); +} + +struct json_node *json_new_node(struct json_context *js, enum json_node_type type) +{ + struct json_node *n = mp_alloc_fast(js->pool, sizeof(*n)); + n->type = type; + return n; +} + +struct json_node *json_new_number(struct json_context *js, double value) +{ + ASSERT(isfinite(value)); + struct json_node *n = json_new_node(js, JSON_NUMBER); + n->number = value; + return n; +} + +#define JSON_NUM_TO(_type, _min, _max) \ + bool json_number_to_##_type(struct json_node *num, _type *dest) \ + { \ + if (num->type == JSON_NUMBER && \ + num->number >= _min && num->number <= _max) \ + { \ + *dest = num->number; \ + return 1; \ + } \ + return 0; \ + } + +JSON_NUM_TO(int, INT_MIN, INT_MAX) +JSON_NUM_TO(uint, 0, UINT_MAX) +JSON_NUM_TO(s64, INT64_MIN, INT64_MAX) +JSON_NUM_TO(u64, 0, UINT64_MAX) + +struct json_node *json_new_array(struct json_context *js) +{ + struct json_node *n = json_new_node(js, JSON_ARRAY); + GARY_INIT_SPACE_ALLOC(n->elements, 4, mp_get_allocator(js->pool)); + return n; +} + +void json_array_append(struct json_node *array, struct json_node *elt) +{ + ASSERT(array->type == JSON_ARRAY); + *GARY_PUSH(array->elements) = elt; +} + +struct json_node *json_new_object(struct json_context *js) +{ + struct json_node *n = json_new_node(js, JSON_OBJECT); + GARY_INIT_SPACE_ALLOC(n->pairs, 4, mp_get_allocator(js->pool)); + return n; +} + +void json_object_set(struct json_node *n, const char *key, struct json_node *value) +{ + for (size_t i=0; i < GARY_SIZE(n->pairs); i++) + if (!strcmp(n->pairs[i].key, key)) + { + if (value) + n->pairs[i].value = value; + else + { + n->pairs[i] = n->pairs[GARY_SIZE(n->pairs) - 1]; + GARY_POP(n->pairs); + } + return; + } + + if (value) + { + struct json_pair *p = GARY_PUSH(n->pairs); + p->key = key; + p->value = value; + } +} + +struct json_node *json_object_get(struct json_node *n, const char *key) +{ + for (size_t i=0; i < GARY_SIZE(n->pairs); i++) + if (!strcmp(n->pairs[i].key, key)) + return n->pairs[i].value; + return NULL; +} diff --git a/libucw/ucw-json/json.h b/libucw/ucw-json/json.h new file mode 100644 index 0000000..369598b --- /dev/null +++ b/libucw/ucw-json/json.h @@ -0,0 +1,319 @@ +/* + * UCW JSON Library + * + * (c) 2015 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_JSON_JSON_H +#define _UCW_JSON_JSON_H + +#include +#include + +#ifdef CONFIG_UCW_CLEAN_ABI +#define json_array_append ucw_json_array_append +#define json_delete ucw_json_delete +#define json_new ucw_json_new +#define json_new_array ucw_json_new_array +#define json_new_node ucw_json_new_node +#define json_new_number ucw_json_new_number +#define json_new_object ucw_json_new_object +#define json_next_token ucw_json_next_token +#define json_next_value ucw_json_next_value +#define json_number_to_int ucw_json_number_to_int +#define json_number_to_s64 ucw_json_number_to_s64 +#define json_number_to_u64 ucw_json_number_to_u64 +#define json_number_to_uint ucw_json_number_to_uint +#define json_object_get ucw_json_object_get +#define json_object_set ucw_json_object_set +#define json_parse ucw_json_parse +#define json_peek_token ucw_json_peek_token +#define json_pop ucw_json_pop +#define json_push ucw_json_push +#define json_reset ucw_json_reset +#define json_set_input ucw_json_set_input +#define json_set_output ucw_json_set_output +#define json_write ucw_json_write +#define json_write_value ucw_json_write_value +#endif + +/*** + * === JSON library context + * + * The context structure remembers the whole state of the JSON + * library. All JSON values are allocated from a memory pool associated + * with the context. By default, their lifetime is the same as that + * of the context. + * + * Alternatively, you can mark the current state of the context + * with json_push() and return to the marked state later using + * json_pop(). All JSON values created between these two operations + * are released afterwards. See json_push() for details. + ***/ + +/** + * The context is represented a pointer to this structure. + * The fields marked with [*] are publicly accessible, the rest is private. + **/ +struct json_context { + // Memory management + struct mempool *pool; + struct mempool_state init_state; + + // Parser context + struct fastbuf *in_fb; + uint in_line; // [*] Current line number + uint in_column; // [*] Current column number + bool in_eof; // End of file was encountered + struct json_node *next_token; + struct json_node *trivial_token; + int next_char; + + // Formatter context + struct fastbuf *out_fb; + uint out_indent; + uint format_options; // [*] Formatting options (a combination of JSON_FORMAT_xxx) +}; + +/** Creates a new JSON context. **/ +struct json_context *json_new(void); + +/** Deletes a JSON context, deallocating all memory associated with it. **/ +void json_delete(struct json_context *js); + +/** + * Recycles a JSON context. All state is reset, allocated objects are freed. + * This is equivalent to mp_delete() followed by mp_new(), but it is faster + * and the address of the context is preserved. + **/ +void json_reset(struct json_context *js); + +/** + * Push the current state of the context onto state stack. + * + * Between json_push() and the associated json_pop(), only newly + * created JSON values can be modified. Older values can be only + * inspected, never modified. In particular, new values cannot be + * inserted to old arrays nor objects. + * + * If you are using json_peek_token(), the saved tokens cannot + * be carried over push/pop boundary. + **/ +void json_push(struct json_context *js); + +/** + * Create a copy of a string in JSON memory. + * + * For example, this is useful when you want to use a string of unknown + * lifetime as a key in json_object_set(). + **/ +static inline const char *json_strdup(struct json_context *js, const char *str) +{ + return mp_strdup(js->pool, str); +} + +/** + * Pop state of the context off state stack. All JSON values created + * since the state was saved by json_push() are released. + **/ +void json_pop(struct json_context *js); + +/*** + * === JSON values + * + * Each JSON value is represented by <>, + * which is either an elementary value (null, boolean, number, string), + * or a container (array, object) pointing to other values. + * + * A value can belong to multiple containers simultaneously, so in general, + * the relationships between values need not form a tree, but a directed + * acyclic graph. + * + * You are allowed to read contents of nodes directly, but construction + * and modification of nodes must be always performed using the appropriate + * library functions. + ***/ + +/** Node types **/ +enum json_node_type { + JSON_INVALID, + JSON_NULL, + JSON_BOOLEAN, + JSON_NUMBER, + JSON_STRING, + JSON_ARRAY, + JSON_OBJECT, + // These are not real nodes, but raw tokens. + // They are not present in the tree of values, but you may see them + // if you call json_next_token() and friends. + JSON_BEGIN_ARRAY, + JSON_END_ARRAY, + JSON_BEGIN_OBJECT, + JSON_END_OBJECT, + JSON_NAME_SEP, + JSON_VALUE_SEP, + JSON_EOF, +}; + +/** Each value is represented by a single node. **/ +struct json_node { + enum json_node_type type; + union { // Data specific to individual value types + bool boolean; + double number; + const char *string; + struct json_node **elements; // Arrays: Growing array of values + struct json_pair *pairs; // Objects: Growing array of pairs + }; +}; + +/** Attributes of objects are stored as (key, value) pairs of this format. **/ +struct json_pair { + const char *key; + struct json_node *value; + // FIXME: Hash table +}; + +// Used internally +struct json_node *json_new_node(struct json_context *js, enum json_node_type type); + +/** Creates a new null value. **/ +static inline struct json_node *json_new_null(struct json_context *js UNUSED) +{ + static const struct json_node static_null = { .type = JSON_NULL }; + return (struct json_node *) &static_null; +} + +/** Creates a new boolean value. **/ +static inline struct json_node *json_new_bool(struct json_context *js UNUSED, bool value) +{ + static const struct json_node static_bool[2] = { + [0] = { .type = JSON_BOOLEAN, { .boolean = 0 } }, + [1] = { .type = JSON_BOOLEAN, { .boolean = 1 } }, + }; + return (struct json_node *) &static_bool[value]; +} + +/** Creates a new numeric value. The @value must be a finite number. **/ +struct json_node *json_new_number(struct json_context *js, double value); + +/** + * Convert a numeric value to an `int`. Returns false if the value + * is not numeric or if it is too large for an int. + **/ +bool json_number_to_int(struct json_node *num, int *dest); + +/** Same as above, but for `uint`. **/ +bool json_number_to_uint(struct json_node *num, uint *dest); + +/** Same as above, but for `s64`. **/ +bool json_number_to_s64(struct json_node *num, s64 *dest); + +/** Same as above, but for `u64`. **/ +bool json_number_to_u64(struct json_node *num, u64 *dest); + +/** + * Creates a new string value. The @value is kept only as a reference. + * + * String values can contain an arbitrary UTF-8 string with no null + * characters. However, it is not recommended to use UTF-8 values outside + * the range of UniCode codepoints (0 to 0x10ffff). + **/ +static inline struct json_node *json_new_string_ref(struct json_context *js, const char *value) +{ + struct json_node *n = json_new_node(js, JSON_STRING); + n->string = value; + return n; +} + +/** Creates a new string value, making a private copy of @value. **/ +static inline struct json_node *json_new_string(struct json_context *js, const char *value) +{ + return json_new_string_ref(js, mp_strdup(js->pool, value)); +} + +/** Creates a new array value with no elements. **/ +struct json_node *json_new_array(struct json_context *js); + +/** Appends a new element to the given array. **/ +void json_array_append(struct json_node *array, struct json_node *elt); + +/** Creates a new object value with no attributes. **/ +struct json_node *json_new_object(struct json_context *js); + +/** + * Adds a new (@key, @value) pair to the given object. If @key is already + * present, the pair is replaced. If @value is NULL, no new pair is created + * and a pre-existing pair is deleted. + * + * The @key is referenced by the object, you must not free it during + * the lifetime of the object. When in doubt, use json_strdup(). + **/ +void json_object_set(struct json_node *n, const char *key, struct json_node *value); + +/** Returns the value associated with @key, or NULL if no such value exists. **/ +struct json_node *json_object_get(struct json_node *n, const char *key); + +/*** + * === Parser + * + * The simplest way to parse a complete JSON file is to call json_parse(), + * which returns a value tree representing the contents of the file. + * + * Alternatively, you can read the input token by token: call json_set_input() + * and then repeat json_next_token(). If you are parsing huge JSON files, + * you probably want to do json_push() first, then scan and process some + * tokens, and then json_pop(). + * + * All parsing functions throw LibUCW exceptions of class `ucw.json.parse` + * upon errors. If you want to catch them, call the parser inside + * a transaction. + ***/ + +/** Parses a JSON file from the given fastbuf stream. **/ +struct json_node *json_parse(struct json_context *js, struct fastbuf *fb); + +/** Selects the given fastbuf stream as parser input. **/ +void json_set_input(struct json_context *js, struct fastbuf *in); + +/** Reads the next token from the input. **/ +struct json_node *json_next_token(struct json_context *js); + +/** Reads the next token, but keeps it in the input. **/ +struct json_node *json_peek_token(struct json_context *js); + +/** Reads the next JSON value, including nested values. **/ +struct json_node *json_next_value(struct json_context *js); + +/*** + * === Writer + * + * JSON files can be produced by simply calling json_write(). + * + * If you want to generate the output on the fly (for example if it is huge), + * call json_set_output() and then iterate json_write_value(). + * + * By default, we produce a single-line compact representation, + * but you can choose differently by setting the appropriate + * `format_options` in the `json_context`. + ***/ + +/** Writes a JSON file to the given fastbuf stream, containing the JSON value @n. **/ +void json_write(struct json_context *js, struct fastbuf *fb, struct json_node *n); + +/** Selects the given fastbuf stream as output. **/ +void json_set_output(struct json_context *js, struct fastbuf *fb); + +/** Writes a single JSON value to the output stream. **/ +void json_write_value(struct json_context *js, struct json_node *n); + +/** Formatting options. The `format_options` field in the context is a bitwise OR of these flags. **/ +enum json_format_option { + JSON_FORMAT_ESCAPE_NONASCII = 1, // Produce pure ASCII output by escaping all Unicode characters in strings + JSON_FORMAT_INDENT = 2, // Produce pretty indented output +}; + +#endif diff --git a/libucw/ucw-json/libucw-json.pc b/libucw/ucw-json/libucw-json.pc new file mode 100644 index 0000000..fb4e105 --- /dev/null +++ b/libucw/ucw-json/libucw-json.pc @@ -0,0 +1,12 @@ +# pkg-config metadata for libucw-json + +libdir=@LIBDIR@ +incdir=. + +Name: libucw-json +Description: JSON parser for LibUCW project +Version: @UCW_VERSION@ +Cflags: -I${incdir} +Libs: -L${libdir} @SO_LINK_PATH@ -lucw-json@UCW_ABI_SUFFIX@ +Libs.private: @LIBJSON_LIBS@ +Requires.private: @DEPS@ diff --git a/libucw/ucw-json/parse.c b/libucw/ucw-json/parse.c new file mode 100644 index 0000000..df99ea4 --- /dev/null +++ b/libucw/ucw-json/parse.c @@ -0,0 +1,452 @@ +/* + * UCW JSON Library -- Parser + * + * (c) 2015 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +void json_set_input(struct json_context *js, struct fastbuf *in) +{ + js->in_fb = in; + js->in_line = 1; + js->in_column = 0; + js->next_char = -1; + js->next_token = NULL; + js->in_eof = 0; +} + +static void NONRET json_parse_error(struct json_context *js, const char *msg) +{ + trans_throw("ucw.json.parse", js, "%s at line %u:%u", msg, js->in_line, js->in_column); +} + +static int json_get_char(struct json_context *js) +{ + int c = bget_utf8_32_repl(js->in_fb, -2); + if (unlikely(c < 0)) + { + if (c == -2) + json_parse_error(js, "Malformed UTF-8 character"); + js->in_eof = 1; + return c; + } + js->in_column++; + return c; +} + +static void json_unget_char(struct json_context *js, int c) +{ + js->next_char = c; +} + +static struct json_node *json_triv_token(struct json_context *js, enum json_node_type type) +{ + js->trivial_token->type = type; + return js->trivial_token; +} + +static struct json_node *json_parse_number(struct json_context *js, int c) +{ + mp_push(js->pool); + char *p = mp_start_noalign(js->pool, 0); + + // Optional minus + if (c == '-') + { + p = mp_append_char(js->pool, p, c); + c = json_get_char(js); + if (!(c >= '0' && c <= '9')) + json_parse_error(js, "Malformed number: just minus"); + } + + // Integer part + if (c == '0') + { + // Leading zeroes are forbidden by RFC 7159 + p = mp_append_char(js->pool, p, c); + c = json_get_char(js); + if (c >= '0' && c <= '9') + json_parse_error(js, "Malformed number: leading zero"); + } + else + { + while (c >= '0' && c <= '9') + { + p = mp_append_char(js->pool, p, c); + c = json_get_char(js); + } + } + + // Fractional part + if (c == '.') + { + p = mp_append_char(js->pool, p, c); + c = json_get_char(js); + if (!(c >= '0' && c <= '9')) + json_parse_error(js, "Malformed number: no digits after decimal point"); + while (c >= '0' && c <= '9') + { + p = mp_append_char(js->pool, p, c); + c = json_get_char(js); + } + } + + // Exponent + if (c == 'e' || c == 'E') + { + p = mp_append_char(js->pool, p, c); + c = json_get_char(js); + if (c == '+' || c == '-') + { + p = mp_append_char(js->pool, p, c); + c = json_get_char(js); + } + if (!(c >= '0' && c <= '9')) + json_parse_error(js, "Malformed number: empty exponent"); + while (c >= '0' && c <= '9') + { + p = mp_append_char(js->pool, p, c); + c = json_get_char(js); + } + } + + json_unget_char(js, c); + + p = mp_end_string(js->pool, p); + errno = 0; + double val = strtod(p, NULL); + if (errno == ERANGE) + json_parse_error(js, "Number out of range"); + mp_pop(js->pool); + + return json_new_number(js, val); +} + +static struct json_node *json_parse_name(struct json_context *js, int c) +{ + char name[16]; + uint i = 0; + + while (c >= 'a' && c <= 'z') + { + if (i < sizeof(name) - 1) + name[i++] = c; + c = json_get_char(js); + } + if (i >= sizeof(name) - 1) + json_parse_error(js, "Invalid literal name"); + name[i] = 0; + json_unget_char(js, c); + + struct json_node *n; + if (!strcmp(name, "null")) + n = json_new_null(js); + else if (!strcmp(name, "false")) + n = json_new_bool(js, 0); + else if (!strcmp(name, "true")) + n = json_new_bool(js, 1); + else + json_parse_error(js, "Invalid literal name"); + + return n; +} + +static uint json_parse_hex4(struct json_context *js) +{ + uint x = 0; + for (int i=0; i<4; i++) + { + x = x << 4; + int c = json_get_char(js); + if (c >= '0' && c <= '9') + x += c - '0'; + else if (c >= 'a' && c <= 'f') + x += c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + x += c - 'A' + 10; + else + json_parse_error(js, "Invalid Unicode escape sequence"); + } + return x; +} + +static struct json_node *json_parse_string(struct json_context *js, int c) +{ + char *p = mp_start_noalign(js->pool, 0); + + c = json_get_char(js); + while (c != '"') + { + if (unlikely(c < 0x20)) + { + if (c < 0 || c == 0x0d || c == 0x0a) + json_parse_error(js, "Unterminated string"); + else + json_parse_error(js, "Invalid control character in string"); + } + if (unlikely(c >= 0xd800 && c < 0xf900)) + { + if (c < 0xe000) + json_parse_error(js, "Invalid surrogate character in string"); + else + json_parse_error(js, "Invalid private-use character in string"); + } + if (unlikely(c >= 0xf0000)) + { + if (c > 0x10ffff) + json_parse_error(js, "Invalid non-Unicode character in string"); + else + json_parse_error(js, "Invalid private-use character in string"); + } + if (c == '\\') + { + c = json_get_char(js); + switch (c) + { + case '"': + case '\\': + case '/': + break; + case 'b': + c = 0x08; + break; + case 'f': + c = 0x0c; + break; + case 'n': + c = 0x0a; + break; + case 'r': + c = 0x0d; + break; + case 't': + c = 0x09; + break; + case 'u': + { + uint x = json_parse_hex4(js); + if (!x) + json_parse_error(js, "Zero bytes in strings are not supported"); + if (x >= 0xd800 && x < 0xf900) + { + if (x < 0xdc00) + { + // High surrogate: low surrogate must follow + uint y = 0; + if (json_get_char(js) == '\\' && json_get_char(js) == 'u') + y = json_parse_hex4(js); + if (!(y >= 0xdc00 && y < 0xe000)) + json_parse_error(js, "Escaped high surrogate codepoint must be followed by a low surrogate codepoint"); + c = 0x10000 + ((x & 0x03ff) << 10) | (y & 0x03ff); + if (c > 0xf0000) + json_parse_error(js, "Invalid escaped private-use character"); + } + else if (x < 0xe000) + { + // Low surrogate + json_parse_error(js, "Invalid escaped surrogate codepoint"); + } + else + json_parse_error(js, "Invalid escaped private-use character"); + } + else + c = x; + break; + } + default: + json_parse_error(js, "Invalid backslash sequence in string"); + } + } + p = mp_append_utf8_32(js->pool, p, c); + c = json_get_char(js); + } + + p = mp_end_string(js->pool, p); + return json_new_string_ref(js, p); +} + +static struct json_node *json_read_token(struct json_context *js) +{ + if (unlikely(js->in_eof)) + return json_triv_token(js, JSON_EOF); + + int c = js->next_char; + if (c >= 0) + js->next_char = -1; + else + c = json_get_char(js); + + while (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d) + { + if (c == 0x0a) + { + js->in_line++; + js->in_column = 0; + } + c = json_get_char(js); + } + if (c < 0) + return json_triv_token(js, JSON_EOF); + + if (c >= '0' && c <= '9' || c == '-') + return json_parse_number(js, c); + + if (c >= 'a' && c <= 'z') + return json_parse_name(js, c); + + if (c == '"') + return json_parse_string(js, c); + + switch (c) + { + case '[': + return json_triv_token(js, JSON_BEGIN_ARRAY); + case ']': + return json_triv_token(js, JSON_END_ARRAY); + case '{': + return json_triv_token(js, JSON_BEGIN_OBJECT); + case '}': + return json_triv_token(js, JSON_END_OBJECT); + case ':': + return json_triv_token(js, JSON_NAME_SEP); + case ',': + return json_triv_token(js, JSON_VALUE_SEP); + case '.': + json_parse_error(js, "Numbers must start with a digit"); + case 0xfeff: + json_parse_error(js, "Misplaced byte-order mark, complain in Redmond"); + default: + json_parse_error(js, "Invalid character"); + } +} + +struct json_node *json_peek_token(struct json_context *js) +{ + if (!js->next_token) + js->next_token = json_read_token(js); + return js->next_token; +} + +struct json_node *json_next_token(struct json_context *js) +{ + struct json_node *t = js->next_token; + if (t) + { + js->next_token = NULL; + return t; + } + return json_read_token(js); +} + +struct json_node *json_next_value(struct json_context *js) +{ + struct json_node *t = json_next_token(js); + + switch (t->type) + { + case JSON_EOF: + return NULL; + + // Elementary values + case JSON_NULL: + case JSON_BOOLEAN: + case JSON_NUMBER: + case JSON_STRING: + return t; + + // Array + case JSON_BEGIN_ARRAY: + { + struct json_node *a = json_new_array(js); + if (json_peek_token(js)->type == JSON_END_ARRAY) + json_next_token(js); + else for (;;) + { + struct json_node *v = json_next_value(js); + if (!v) + json_parse_error(js, "Unterminated array"); + json_array_append(a, v); + + t = json_next_token(js); + if (t->type == JSON_END_ARRAY) + break; + if (t->type != JSON_VALUE_SEP) + json_parse_error(js, "Comma or right bracket expected"); + } + return a; + } + + // Object + case JSON_BEGIN_OBJECT: + { + struct json_node *o = json_new_object(js); + if (json_peek_token(js)->type == JSON_END_OBJECT) + json_next_token(js); + else for (;;) + { + struct json_node *k = json_next_value(js); + if (!k) + json_parse_error(js, "Unterminated object"); + if (k->type != JSON_STRING) + json_parse_error(js, "Object key must be a string"); + + t = json_next_token(js); + if (t->type != JSON_NAME_SEP) + json_parse_error(js, "Colon expected"); + + struct json_node *v = json_next_value(js); + if (!v) + json_parse_error(js, "Unterminated object"); + if (json_object_get(o, k->string)) // FIXME: Optimize + json_parse_error(js, "Key already set"); + json_object_set(o, k->string, v); + + t = json_next_token(js); + if (t->type == JSON_END_OBJECT) + break; + if (t->type != JSON_VALUE_SEP) + json_parse_error(js, "Comma expected"); + } + return o; + } + + // Misplaced characters + case JSON_END_ARRAY: + json_parse_error(js, "Misplaced end of array"); + case JSON_END_OBJECT: + json_parse_error(js, "Misplaced end of object"); + case JSON_NAME_SEP: + json_parse_error(js, "Misplaced colon"); + case JSON_VALUE_SEP: + json_parse_error(js, "Misplaced comma"); + default: + ASSERT(0); + } +} + +struct json_node *json_parse(struct json_context *js, struct fastbuf *fb) +{ + json_set_input(js, fb); + + struct json_node *n = json_next_value(js); + if (!n) + json_parse_error(js, "Empty input"); + + struct json_node *t = json_next_token(js); + if (t->type != JSON_EOF) + json_parse_error(js, "Only one top-level value allowed"); + + return n; +} diff --git a/libucw/ucw-xml/Makefile b/libucw/ucw-xml/Makefile new file mode 100644 index 0000000..9f339b2 --- /dev/null +++ b/libucw/ucw-xml/Makefile @@ -0,0 +1,54 @@ +# Makefile for the XML parser +# (c) 2007 Pavel Charvat + +DIRS+=ucw-xml +PROGS+=$(o)/ucw-xml/xml-test + +LIBXML_MODS=common source parse dtd ns +LIBXML_MOD_PATHS=$(addprefix $(o)/ucw-xml/,$(LIBXML_MODS)) +LIBXML_INCLUDES=xml.h dtd.h +LIBXML_DEPS=$(LIBUCW) $(LIBCHARSET) + +$(o)/ucw-xml/libucw-xml$(LV).a: $(addsuffix .o,$(LIBXML_MOD_PATHS)) +$(o)/ucw-xml/libucw-xml$(LV).so: $(addsuffix .oo,$(LIBXML_MOD_PATHS)) $(LIBXML_DEPS) +$(o)/ucw-xml/libucw-xml$(LV).so: SONAME_SUFFIX=.0 +$(o)/ucw-xml/libucw-xml.pc: $(LIBXML_DEPS) + +ifdef CONFIG_INSTALL_API +$(o)/ucw-xml/libucw-xml.pc: $(addprefix $(o)/ucw-xml/libucw-xml$(LV),.a .so) +endif + +$(addsuffix .o,$(LIBXML_MOD_PATHS)): $(o)/ucw-xml/unicat.h +$(addsuffix .oo,$(LIBXML_MOD_PATHS)): $(o)/ucw-xml/unicat.h +$(o)/ucw-xml/unicat.h: $(s)/ucw-xml/unicat.pl + $(M)GEN $(addprefix $(o)/ucw-xml/unicat,.h .c) + $(Q)$< $(addprefix $(o)/ucw-xml/unicat,.h .c) + $(Q)touch $@ + +TESTS+=$(o)/ucw-xml/xml-test.test +$(o)/ucw-xml/xml-test: $(o)/ucw-xml/xml-test.o $(LIBXML) $(LIBCHARSET) $(LIBUCW) +$(o)/ucw-xml/xml-test.test: $(o)/ucw-xml/xml-test + +API_LIBS+=libucw-xml +API_INCLUDES+=$(o)/ucw-xml/.include-stamp +$(o)/ucw-xml/.include-stamp: $(addprefix $(s)/ucw-xml/,$(LIBXML_INCLUDES)) +$(o)/ucw-xml/.include-stamp: IDST=ucw-xml +run/lib/pkgconfig/libucw-xml.pc: $(o)/ucw-xml/libucw-xml.pc + +INSTALL_TARGETS+=install-libucw-xml-lib +install-libucw-xml-lib: + install -d -m 755 $(DESTDIR)$(INSTALL_LIB_DIR) + install -m 644 run/lib/libucw-xml$(LV).so.0 $(DESTDIR)$(INSTALL_LIB_DIR)/libucw-xml$(LV).so.0.0 + ln -sf libucw-xml$(LV).so.0.0 $(DESTDIR)$(INSTALL_LIB_DIR)/libucw-xml$(LV).so.0 +.PHONY: install-libucw-xml-lib + +INSTALL_TARGETS+=install-libucw-xml-api +install-libucw-xml-api: + install -d -m 755 $(DESTDIR)$(INSTALL_INCLUDE_DIR)/ucw-xml $(DESTDIR)$(INSTALL_LIB_DIR) $(DESTDIR)$(INSTALL_PKGCONFIG_DIR) + install -m 644 run/lib/pkgconfig/libucw-xml.pc $(DESTDIR)$(INSTALL_PKGCONFIG_DIR) + install -m 644 $(addprefix run/include/ucw-xml/,$(LIBXML_INCLUDES)) $(DESTDIR)$(INSTALL_INCLUDE_DIR)/ucw-xml + ln -sf libucw-xml$(LV).so.0.0 $(DESTDIR)$(INSTALL_LIB_DIR)/libucw-xml$(LV).so + install -m 644 run/lib/libucw-xml$(LV).a $(DESTDIR)$(INSTALL_LIB_DIR) +.PHONY: install-libucw-xml-api + +include $(s)/ucw-xml/doc/Makefile diff --git a/libucw/ucw-xml/TODO b/libucw/ucw-xml/TODO new file mode 100644 index 0000000..b8dbc29 --- /dev/null +++ b/libucw/ucw-xml/TODO @@ -0,0 +1,15 @@ +Non-normative / not-implemented: +-- introduce numeric error codes +-- cycle detection in internal entities (and possibly external?) +-- conditional sections in DTD +-- validation of elements (regular expressions, non-cdata) +-- validation of attributes (unfinished) +-- notations +-- URI normalization +-- support for xml:space +-- support for xml:lang +-- full support for standalone documents +-- Unicode normalization + +Optimizations: +-- detect definitions of trivial entities diff --git a/libucw/ucw-xml/common.c b/libucw/ucw-xml/common.c new file mode 100644 index 0000000..8c23ce2 --- /dev/null +++ b/libucw/ucw-xml/common.c @@ -0,0 +1,135 @@ +/* + * UCW Library -- A simple XML parser + * + * (c) 2007 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include +#include +#include + +#include + +/*** Error handling ***/ + +void NONRET +xml_throw(struct xml_context *ctx) +{ + ASSERT(ctx->err_code && ctx->throw_buf); + longjmp(*(jmp_buf *)ctx->throw_buf, ctx->err_code); +} + +void +xml_warn(struct xml_context *ctx, const char *format, ...) +{ + if (ctx->h_warn) + { + va_list args; + va_start(args, format); + ctx->err_msg = stk_vprintf(format, args); + ctx->err_code = XML_ERR_WARN; + va_end(args); + ctx->h_warn(ctx); + ctx->err_msg = NULL; + ctx->err_code = XML_ERR_OK; + } +} + +void +xml_error(struct xml_context *ctx, const char *format, ...) +{ + if (ctx->h_error) + { + va_list args; + va_start(args, format); + ctx->err_msg = stk_vprintf(format, args); + ctx->err_code = XML_ERR_ERROR; + va_end(args); + ctx->h_error(ctx); + ctx->err_msg = NULL; + ctx->err_code = XML_ERR_OK; + } +} + +void NONRET +xml_fatal(struct xml_context *ctx, const char *format, ...) +{ + va_list args; + va_start(args, format); + ctx->err_msg = mp_vprintf(ctx->stack, format, args); + ctx->err_code = XML_ERR_FATAL; + ctx->state = XML_STATE_EOF; + va_end(args); + if (ctx->h_fatal) + ctx->h_fatal(ctx); + xml_throw(ctx); +} + +/*** Memory management ***/ + +void * +xml_hash_new(struct mempool *pool, uint size) +{ + void *tab = mp_alloc_zero(pool, size + XML_HASH_HDR_SIZE); + *(void **)tab = pool; + return tab + XML_HASH_HDR_SIZE; +} + +/*** Initialization ***/ + +static struct xml_context xml_defaults = { + .flags = XML_SRC_EOF | XML_REPORT_ALL, + .state = XML_STATE_START, + .h_resolve_entity = xml_def_resolve_entity, + .chars = { + .name = "", + .spout = xml_spout_chars, + .can_overwrite_buffer = 1, + }, +}; + +void +xml_init(struct xml_context *ctx) +{ + *ctx = xml_defaults; + ctx->pool = mp_new(65536); + ctx->stack = mp_new(65536); + TRACE(ctx, "init"); +} + +void +xml_cleanup(struct xml_context *ctx) +{ + TRACE(ctx, "cleanup"); + xml_dtd_cleanup(ctx); + xml_sources_cleanup(ctx); + xml_ns_cleanup(ctx); + mp_delete(ctx->pool); + mp_delete(ctx->stack); +} + +void +xml_reset(struct xml_context *ctx) +{ + TRACE(ctx, "reset"); + struct mempool *pool = ctx->pool, *stack = ctx->stack, *ns_pool = ctx->ns_pool; + const char **ns_by_id = ctx->ns_by_id; + xml_dtd_cleanup(ctx); + xml_sources_cleanup(ctx); + mp_flush(pool); + mp_flush(stack); + *ctx = xml_defaults; + ctx->pool = pool; + ctx->stack = stack; + ctx->ns_pool = ns_pool; + ctx->ns_by_id = ns_by_id; + xml_ns_reset(ctx); +} diff --git a/libucw/ucw-xml/doc/Makefile b/libucw/ucw-xml/doc/Makefile new file mode 100644 index 0000000..2ae21aa --- /dev/null +++ b/libucw/ucw-xml/doc/Makefile @@ -0,0 +1,20 @@ +# Makefile for the UCW-XML documentation + +DIRS+=ucw-xml/doc + +XML_DOCS=xml index +XML_DOCS_HTML=$(addprefix $(o)/ucw-xml/doc/,$(addsuffix .html,$(XML_DOCS))) + +DOCS+=$(XML_DOCS_HTML) +DOC_MODULES+=ucw-xml +$(XML_DOCS_HTML): DOC_MODULE=ucw-xml + +ifdef CONFIG_DOC +INSTALL_TARGETS+=install-libucw-xml-docs +endif + +.PHONY: install-libucw-xml-docs + +install-libucw-xml-docs: $(XML_DOCS_HTML) + install -d -m 755 $(DESTDIR)$(INSTALL_DOC_DIR)/ucw-xml/ + install -m 644 $^ $(DESTDIR)$(INSTALL_DOC_DIR)/ucw-xml/ diff --git a/libucw/ucw-xml/doc/index.txt b/libucw/ucw-xml/doc/index.txt new file mode 100644 index 0000000..a3c8efb --- /dev/null +++ b/libucw/ucw-xml/doc/index.txt @@ -0,0 +1,34 @@ +The UCW-XML library +=================== + +This library provides a light-weight XML parser built atop <<../ucw/index:,LibUCW>>. +It is primarily intended for efficient parsing of huge data sets, where other +parsers are too slow and cumbersome. + +Its features include: + +* High speed and low memory consumption, mainly thanks to efficient LibUCW + primitives like fastbufs and mempools. +* Multiple interfaces: +** SAX-like: callback functions called on various parser events +** Pull: for each call of the parser, it returns the next node +** DOM-like: returns a data structure describing the whole tree of nodes +** Any combination of the above: For example, when given a database with millions + of records, you can pull on the top level and ask for DOM of each record + separately. +* Support of namespaces. +* Complies with W3C recommendations on XML 1.0, XML 1.1, and Namespaces in XML 1.0 + as a non-validating parser, but does not aim to support all frills of other + XML-related standards. +* Partial support for DTD-driven parsing: basic checks of document structure, + filling in default values, expanding user-defined entities. + +Modules +------- +- <> + +Authors +------- + +- Pavel Charvát (main author) +- Martin MareÅ¡ (minor hacking and support for namespaces) diff --git a/libucw/ucw-xml/doc/xml.txt b/libucw/ucw-xml/doc/xml.txt new file mode 100644 index 0000000..07852d3 --- /dev/null +++ b/libucw/ucw-xml/doc/xml.txt @@ -0,0 +1,12 @@ +XML Parser +========== + +ucw-xml/xml.h +------------- + +To parse a document, create a parser context (<>), +initialize it with <>, fill in requested parsing mode, pointers to hooks, and +other parameters. Then call <> or <> as you need. At the end, dispose +of the context by <> or recycle it by <>. + +!!ucw-xml/xml.h diff --git a/libucw/ucw-xml/dtd.c b/libucw/ucw-xml/dtd.c new file mode 100644 index 0000000..62badba --- /dev/null +++ b/libucw/ucw-xml/dtd.c @@ -0,0 +1,1003 @@ +/* + * UCW Library -- A simple XML parser + * + * (c) 2007--2008 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include +#include +#include +#include + +/* Notations */ + +#define HASH_PREFIX(x) xml_dtd_notns_##x +#define HASH_NODE struct xml_dtd_notn +#define HASH_KEY_STRING name +#define HASH_ZERO_FILL +#define HASH_TABLE_DYNAMIC +#define HASH_WANT_LOOKUP +#define HASH_WANT_FIND +#define HASH_GIVE_ALLOC +#define HASH_TABLE_ALLOC +XML_HASH_GIVE_ALLOC +#include + +struct xml_dtd_notn * +xml_dtd_find_notn(struct xml_context *ctx, char *name) +{ + struct xml_dtd *dtd = ctx->dtd; + struct xml_dtd_notn *notn = xml_dtd_notns_find(dtd->tab_notns, name); + return !notn ? NULL : (notn->flags & XML_DTD_NOTN_DECLARED) ? notn : NULL; +} + +/* General entities */ + +#define HASH_PREFIX(x) xml_dtd_ents_##x +#define HASH_NODE struct xml_dtd_entity +#define HASH_KEY_STRING name +#define HASH_ZERO_FILL +#define HASH_TABLE_DYNAMIC +#define HASH_WANT_FIND +#define HASH_WANT_LOOKUP +#define HASH_GIVE_ALLOC +#define HASH_TABLE_ALLOC +XML_HASH_GIVE_ALLOC +#include + +static struct xml_dtd_entity * +xml_dtd_declare_trivial_entity(struct xml_context *ctx, char *name, char *text) +{ + struct xml_dtd *dtd = ctx->dtd; + struct xml_dtd_entity *ent = xml_dtd_ents_lookup(dtd->tab_ents, name); + if (ent->flags & XML_DTD_ENTITY_DECLARED) + { + xml_warn(ctx, "Entity &%s; already declared", name); + return NULL; + } + slist_add_tail(&dtd->ents, &ent->n); + ent->flags = XML_DTD_ENTITY_DECLARED | XML_DTD_ENTITY_TRIVIAL; + ent->text = text; + return ent; +} + +static void +xml_dtd_declare_default_entities(struct xml_context *ctx) +{ + xml_dtd_declare_trivial_entity(ctx, "lt", "<"); + xml_dtd_declare_trivial_entity(ctx, "gt", ">"); + xml_dtd_declare_trivial_entity(ctx, "amp", "&"); + xml_dtd_declare_trivial_entity(ctx, "apos", "'"); + xml_dtd_declare_trivial_entity(ctx, "quot", "\""); +} + +struct xml_dtd_entity * +xml_def_find_entity(struct xml_context *ctx UNUSED, char *name) +{ +#define ENT(n, t) ent_##n = { .name = #n, .text = t, .flags = XML_DTD_ENTITY_DECLARED | XML_DTD_ENTITY_TRIVIAL } + static struct xml_dtd_entity ENT(lt, "<"), ENT(gt, ">"), ENT(amp, "&"), ENT(apos, "'"), ENT(quot, "\""); +#undef ENT + switch (name[0]) + { + case 'l': + if (!strcmp(name, "lt")) + return &ent_lt; + break; + case 'g': + if (!strcmp(name, "gt")) + return &ent_gt; + break; + case 'a': + if (!strcmp(name, "amp")) + return &ent_amp; + if (!strcmp(name, "apos")) + return &ent_apos; + break; + case 'q': + if (!strcmp(name, "quot")) + return &ent_quot; + break; + } + return NULL; +} + +struct xml_dtd_entity * +xml_dtd_find_entity(struct xml_context *ctx, char *name) +{ + struct xml_dtd *dtd = ctx->dtd; + if (ctx->h_find_entity) + return ctx->h_find_entity(ctx, name); + else if (dtd) + { + struct xml_dtd_entity *ent = xml_dtd_ents_find(dtd->tab_ents, name); + return !ent ? NULL : (ent->flags & XML_DTD_ENTITY_DECLARED) ? ent : NULL; + } + else + return xml_def_find_entity(ctx, name); +} + +/* Parameter entities */ + +static struct xml_dtd_entity * +xml_dtd_find_pentity(struct xml_context *ctx, char *name) +{ + struct xml_dtd *dtd = ctx->dtd; + struct xml_dtd_entity *ent = xml_dtd_ents_find(dtd->tab_pents, name); + return !ent ? NULL : (ent->flags & XML_DTD_ENTITY_DECLARED) ? ent : NULL; +} + +/* Elements */ + +struct xml_dtd_elems_table; + +static void +xml_dtd_elems_init_data(struct xml_dtd_elems_table *tab UNUSED, struct xml_dtd_elem *e) +{ + slist_init(&e->attrs); +} + +#define HASH_PREFIX(x) xml_dtd_elems_##x +#define HASH_NODE struct xml_dtd_elem +#define HASH_KEY_STRING name +#define HASH_TABLE_DYNAMIC +#define HASH_ZERO_FILL +#define HASH_WANT_FIND +#define HASH_WANT_LOOKUP +#define HASH_GIVE_ALLOC +#define HASH_GIVE_INIT_DATA +#define HASH_TABLE_ALLOC +XML_HASH_GIVE_ALLOC +#include + +struct xml_dtd_elem * +xml_dtd_find_elem(struct xml_context *ctx, char *name) +{ + return ctx->dtd ? xml_dtd_elems_find(ctx->dtd->tab_elems, name) : NULL; +} + +/* Element sons */ + +struct xml_dtd_enodes_table; + +static inline uint +xml_dtd_enodes_hash(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem) +{ + return hash_pointer(parent) ^ hash_pointer(elem); +} + +static inline int +xml_dtd_enodes_eq(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *parent1, struct xml_dtd_elem *elem1, struct xml_dtd_elem_node *parent2, struct xml_dtd_elem *elem2) +{ + return (parent1 == parent2) && (elem1 == elem2); +} + +static inline void +xml_dtd_enodes_init_key(struct xml_dtd_enodes_table *tab UNUSED, struct xml_dtd_elem_node *node, struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem) +{ + node->parent = parent; + node->elem = elem; +} + +#define HASH_PREFIX(x) xml_dtd_enodes_##x +#define HASH_NODE struct xml_dtd_elem_node +#define HASH_KEY_COMPLEX(x) x parent, x elem +#define HASH_KEY_DECL struct xml_dtd_elem_node *parent, struct xml_dtd_elem *elem +#define HASH_GIVE_HASHFN +#define HASH_GIVE_EQ +#define HASH_GIVE_INIT_KEY +#define HASH_TABLE_DYNAMIC +#define HASH_ZERO_FILL +#define HASH_WANT_FIND +#define HASH_WANT_NEW +#define HASH_GIVE_ALLOC +#define HASH_TABLE_ALLOC +XML_HASH_GIVE_ALLOC +#include + +/* Element attributes */ + +struct xml_dtd_attrs_table; + +static inline uint +xml_dtd_attrs_hash(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_elem *elem, char *name) +{ + return hash_pointer(elem) ^ hash_string(name); +} + +static inline int +xml_dtd_attrs_eq(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_elem *elem1, char *name1, struct xml_dtd_elem *elem2, char *name2) +{ + return (elem1 == elem2) && !strcmp(name1, name2); +} + +static void +xml_dtd_attrs_init_key(struct xml_dtd_attrs_table *tab UNUSED, struct xml_dtd_attr *attr, struct xml_dtd_elem *elem, char *name) +{ + attr->elem = elem; + attr->name = name; + slist_add_tail(&elem->attrs, &attr->n); +} + +#define HASH_PREFIX(x) xml_dtd_attrs_##x +#define HASH_NODE struct xml_dtd_attr +#define HASH_ZERO_FILL +#define HASH_TABLE_DYNAMIC +#define HASH_KEY_COMPLEX(x) x elem, x name +#define HASH_KEY_DECL struct xml_dtd_elem *elem, char *name +#define HASH_GIVE_HASHFN +#define HASH_GIVE_EQ +#define HASH_GIVE_INIT_KEY +#define HASH_WANT_FIND +#define HASH_WANT_NEW +#define HASH_GIVE_ALLOC +#define HASH_TABLE_ALLOC +XML_HASH_GIVE_ALLOC +#include + +struct xml_dtd_attr * +xml_dtd_find_attr(struct xml_context *ctx, struct xml_dtd_elem *elem, char *name) +{ + return ctx->dtd ? xml_dtd_attrs_find(ctx->dtd->tab_attrs, elem, name) : NULL; +} + +/* Enumerated attribute values */ + +struct xml_dtd_evals_table; + +static inline uint +xml_dtd_evals_hash(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_attr *attr, char *val) +{ + return hash_pointer(attr) ^ hash_string(val); +} + +static inline int +xml_dtd_evals_eq(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_attr *attr1, char *val1, struct xml_dtd_attr *attr2, char *val2) +{ + return (attr1 == attr2) && !strcmp(val1, val2); +} + +static inline void +xml_dtd_evals_init_key(struct xml_dtd_evals_table *tab UNUSED, struct xml_dtd_eval *eval, struct xml_dtd_attr *attr, char *val) +{ + eval->attr = attr; + eval->val = val; +} + +#define HASH_PREFIX(x) xml_dtd_evals_##x +#define HASH_NODE struct xml_dtd_eval +#define HASH_TABLE_DYNAMIC +#define HASH_KEY_COMPLEX(x) x attr, x val +#define HASH_KEY_DECL struct xml_dtd_attr *attr, char *val +#define HASH_GIVE_HASHFN +#define HASH_GIVE_EQ +#define HASH_GIVE_INIT_KEY +#define HASH_WANT_FIND +#define HASH_WANT_NEW +#define HASH_GIVE_ALLOC +#define HASH_TABLE_ALLOC +XML_HASH_GIVE_ALLOC +#include + +/* Enumerated attribute notations */ + +struct xml_dtd_enotns_table; + +static inline uint +xml_dtd_enotns_hash(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_attr *attr, struct xml_dtd_notn *notn) +{ + return hash_pointer(attr) ^ hash_pointer(notn); +} + +static inline int +xml_dtd_enotns_eq(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_attr *attr1, struct xml_dtd_notn *notn1, struct xml_dtd_attr *attr2, struct xml_dtd_notn *notn2) +{ + return (attr1 == attr2) && (notn1 == notn2); +} + +static void +xml_dtd_enotns_init_key(struct xml_dtd_enotns_table *tab UNUSED, struct xml_dtd_enotn *enotn, struct xml_dtd_attr *attr, struct xml_dtd_notn *notn) +{ + enotn->attr = attr; + enotn->notn = notn; +} + +#define HASH_PREFIX(x) xml_dtd_enotns_##x +#define HASH_NODE struct xml_dtd_enotn +#define HASH_TABLE_DYNAMIC +#define HASH_KEY_COMPLEX(x) x attr, x notn +#define HASH_KEY_DECL struct xml_dtd_attr *attr, struct xml_dtd_notn *notn +#define HASH_GIVE_HASHFN +#define HASH_GIVE_EQ +#define HASH_GIVE_INIT_KEY +#define HASH_WANT_FIND +#define HASH_WANT_NEW +#define HASH_GIVE_ALLOC +#define HASH_TABLE_ALLOC +XML_HASH_GIVE_ALLOC +#include + +/* DTD initialization/cleanup */ + +void +xml_dtd_init(struct xml_context *ctx) +{ + if (ctx->dtd) + return; + struct mempool *pool = mp_new(4096); + struct xml_dtd *dtd = ctx->dtd = mp_alloc_zero(pool, sizeof(*ctx->dtd)); + dtd->pool = pool; + xml_dtd_ents_init(dtd->tab_ents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table))); + xml_dtd_ents_init(dtd->tab_pents = xml_hash_new(pool, sizeof(struct xml_dtd_ents_table))); + xml_dtd_notns_init(dtd->tab_notns = xml_hash_new(pool, sizeof(struct xml_dtd_notns_table))); + xml_dtd_elems_init(dtd->tab_elems = xml_hash_new(pool, sizeof(struct xml_dtd_elems_table))); + xml_dtd_enodes_init(dtd->tab_enodes = xml_hash_new(pool, sizeof(struct xml_dtd_enodes_table))); + xml_dtd_attrs_init(dtd->tab_attrs = xml_hash_new(pool, sizeof(struct xml_dtd_attrs_table))); + xml_dtd_evals_init(dtd->tab_evals = xml_hash_new(pool, sizeof(struct xml_dtd_evals_table))); + xml_dtd_enotns_init(dtd->tab_enotns = xml_hash_new(pool, sizeof(struct xml_dtd_enotns_table))); + xml_dtd_declare_default_entities(ctx); +} + +void +xml_dtd_cleanup(struct xml_context *ctx) +{ + if (!ctx->dtd) + return; + mp_delete(ctx->dtd->pool); + ctx->dtd = NULL; +} + +void +xml_dtd_finish(struct xml_context *ctx) +{ + if (!ctx->dtd) + return; + // FIXME: validity checks +} + +/*** Parsing functions ***/ + +/* References to parameter entities */ + +void +xml_parse_pe_ref(struct xml_context *ctx) +{ + /* PEReference ::= '%' Name ';' + * Already parsed: '%' */ + struct mempool_state state; + mp_save(ctx->stack, &state); + char *name = xml_parse_name(ctx, ctx->stack); + xml_parse_char(ctx, ';'); + struct xml_dtd_entity *ent = xml_dtd_find_pentity(ctx, name); + if (!ent) + xml_error(ctx, "Unknown entity %%%s;", name); + else + { + TRACE(ctx, "Pushed entity %%%s;", name); + mp_restore(ctx->stack, &state); + xml_dec(ctx); + xml_push_entity(ctx, ent); + return; + } + mp_restore(ctx->stack, &state); + xml_dec(ctx); +} + +static uint +xml_parse_dtd_pe(struct xml_context *ctx, uint entity_decl) +{ + /* Already parsed: '%' */ + do + { + xml_inc(ctx); + if (!~entity_decl && (xml_peek_cat(ctx) & XML_CHAR_WHITE)) + { + xml_dec(ctx); + return ~0U; + } + xml_parse_pe_ref(ctx); + while (xml_peek_cat(ctx) & XML_CHAR_WHITE) + xml_skip_char(ctx); + } + while (xml_get_char(ctx) == '%'); + xml_unget_char(ctx); + return 1; +} + +static uint +xml_parse_dtd_white(struct xml_context *ctx, uint mandatory) +{ + /* Whitespace or parameter entity, + * mandatory==~0U has a special maening of the whitespace before the '%' character in an parameter entity declaration */ + uint cnt = 0; + while (xml_peek_cat(ctx) & XML_CHAR_WHITE) + { + xml_skip_char(ctx); + cnt = 1; + } + if (xml_peek_char(ctx) == '%') + { + xml_skip_char(ctx); + return xml_parse_dtd_pe(ctx, mandatory); + } + else if (unlikely(mandatory && !cnt)) + xml_fatal_expected_white(ctx); + return cnt; +} + +static void +xml_dtd_parse_external_id(struct xml_context *ctx, char **system_id, char **public_id, uint allow_public) +{ + struct xml_dtd *dtd = ctx->dtd; + uint c = xml_peek_char(ctx); + if (c == 'S') + { + xml_parse_seq(ctx, "SYSTEM"); + xml_parse_dtd_white(ctx, 1); + *public_id = NULL; + *system_id = xml_parse_system_literal(ctx, dtd->pool); + } + else if (c == 'P') + { + xml_parse_seq(ctx, "PUBLIC"); + xml_parse_dtd_white(ctx, 1); + *system_id = NULL; + *public_id = xml_parse_pubid_literal(ctx, dtd->pool); + if (xml_parse_dtd_white(ctx, !allow_public)) + if ((c = xml_peek_char(ctx)) == '\'' || c == '"' || !allow_public) + *system_id = xml_parse_system_literal(ctx, dtd->pool); + } + else + xml_fatal(ctx, "Expected an external ID"); +} + +/* DTD: */ + +void +xml_parse_notation_decl(struct xml_context *ctx) +{ + /* NotationDecl ::= '' + * Already parsed: 'dtd; + xml_parse_dtd_white(ctx, 1); + + struct xml_dtd_notn *notn = xml_dtd_notns_lookup(dtd->tab_notns, xml_parse_name(ctx, dtd->pool)); + xml_parse_dtd_white(ctx, 1); + char *system_id, *public_id; + xml_dtd_parse_external_id(ctx, &system_id, &public_id, 1); + xml_parse_dtd_white(ctx, 0); + xml_parse_char(ctx, '>'); + + if (notn->flags & XML_DTD_NOTN_DECLARED) + xml_warn(ctx, "Notation %s already declared", notn->name); + else + { + notn->flags = XML_DTD_NOTN_DECLARED; + notn->system_id = system_id; + notn->public_id = public_id; + slist_add_tail(&dtd->notns, ¬n->n); + } + xml_dec(ctx); +} + +/* DTD: */ + +void +xml_parse_entity_decl(struct xml_context *ctx) +{ + /* Already parsed: 'dtd; + uint flags = ~xml_parse_dtd_white(ctx, ~0U) ? 0 : XML_DTD_ENTITY_PARAMETER; + if (flags) + xml_parse_dtd_white(ctx, 1); + struct xml_dtd_entity *ent = xml_dtd_ents_lookup(flags ? dtd->tab_pents : dtd->tab_ents, xml_parse_name(ctx, dtd->pool)); + xml_parse_dtd_white(ctx, 1); + slist *list = flags ? &dtd->pents : &dtd->ents; + if (ent->flags & XML_DTD_ENTITY_DECLARED) + { + xml_fatal(ctx, "Entity &%s; already declared, skipping not implemented", ent->name); + // FIXME: should be only warning + } + uint c, sep = xml_get_char(ctx); + if (sep == '\'' || sep == '"') + { + /* Internal entity: + * EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'" */ + char *p = mp_start_noalign(dtd->pool, 1); + while (1) + { + if ((c = xml_get_char(ctx)) == sep) + break; + if (c == '%') + { + // FIXME + ASSERT(0); + //xml_parse_parameter_ref(ctx); + continue; + } + if (c == '&') + { + xml_inc(ctx); + if (xml_peek_char(ctx) != '#') + { + /* Bypass references to general entities */ + struct mempool_state state; + mp_save(ctx->stack, &state); + char *n = xml_parse_name(ctx, ctx->stack); + xml_parse_char(ctx, ';'); + xml_dec(ctx); + uint l = strlen(n); + p = mp_spread(dtd->pool, p, 3 + l); + *p++ = '&'; + memcpy(p, n, l); + p += l; + *p++ = ';';; + mp_restore(ctx->stack, &state); + continue; + } + else + { + xml_skip_char(ctx); + c = xml_parse_char_ref(ctx); + } + } + p = mp_spread(dtd->pool, p, 5); + p = utf8_32_put(p, c); + } + *p = 0; + ent->len = p - (char *)mp_ptr(dtd->pool); + ent->text = mp_end(dtd->pool, p + 1); + slist_add_tail(list, &ent->n); + ent->flags = flags | XML_DTD_ENTITY_DECLARED; + } + else + { + /* External entity */ + struct xml_dtd_notn *notn = NULL; + char *system_id, *public_id; + xml_unget_char(ctx); + xml_dtd_parse_external_id(ctx, &system_id, &public_id, 0); + if (xml_parse_dtd_white(ctx, 0) && flags && xml_peek_char(ctx) != '>') + { + /* General external unparsed entity */ + flags |= XML_DTD_ENTITY_UNPARSED; + xml_parse_seq(ctx, "NDATA"); + xml_parse_dtd_white(ctx, 1); + notn = xml_dtd_notns_lookup(dtd->tab_notns, xml_parse_name(ctx, dtd->pool)); + } + slist_add_tail(list, &ent->n); + ent->flags = flags | XML_DTD_ENTITY_DECLARED | XML_DTD_ENTITY_EXTERNAL; + ent->system_id = system_id; + ent->public_id = public_id; + ent->notn = notn; + } + xml_parse_dtd_white(ctx, 0); + xml_parse_char(ctx, '>'); + xml_dec(ctx); +} + +/* DTD: */ + +void +xml_parse_element_decl(struct xml_context *ctx) +{ + /* Elementdecl ::= '' + * Already parsed: 'dtd; + xml_parse_dtd_white(ctx, 1); + char *name = xml_parse_name(ctx, dtd->pool); + xml_parse_dtd_white(ctx, 1); + struct xml_dtd_elem *elem = xml_dtd_elems_lookup(dtd->tab_elems, name); + if (elem->flags & XML_DTD_ELEM_DECLARED) + xml_fatal(ctx, "Element <%s> already declared", name); + + /* contentspec ::= 'EMPTY' | 'ANY' | Mixed | children */ + uint c = xml_peek_char(ctx); + if (c == 'E') + { + xml_parse_seq(ctx, "EMPTY"); + elem->type = XML_DTD_ELEM_EMPTY; + } + else if (c == 'A') + { + xml_parse_seq(ctx, "ANY"); + elem->type = XML_DTD_ELEM_ANY; + } + else if (c == '(') + { + xml_skip_char(ctx); + xml_inc(ctx); + xml_parse_dtd_white(ctx, 0); + struct xml_dtd_elem_node *parent = elem->node = mp_alloc_zero(dtd->pool, sizeof(*parent)); + if (xml_peek_char(ctx) == '#') + { + /* Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' */ + xml_skip_char(ctx); + xml_parse_seq(ctx, "PCDATA"); + elem->type = XML_DTD_ELEM_MIXED; + parent->type = XML_DTD_ELEM_PCDATA; + while (1) + { + xml_parse_dtd_white(ctx, 0); + if ((c = xml_get_char(ctx)) == ')') + break; + else if (c != '|') + xml_fatal_expected(ctx, ')'); + xml_parse_dtd_white(ctx, 0); + struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx, dtd->pool)); + if (xml_dtd_enodes_find(dtd->tab_enodes, parent, son_elem)) + xml_error(ctx, "Duplicate content '%s'", son_elem->name); + else + { + struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem); + slist_add_tail(&parent->sons, &son->n); + } + } + xml_dec(ctx); + if (xml_peek_char(ctx) == '*') + { + xml_skip_char(ctx); + parent->occur = XML_DTD_ELEM_OCCUR_MULT; + } + else if (!slist_head(&parent->sons)) + parent->occur = XML_DTD_ELEM_OCCUR_ONCE; + else + xml_fatal_expected(ctx, '*'); + } + else + { + /* children ::= (choice | seq) ('?' | '*' | '+')? + * cp ::= (Name | choice | seq) ('?' | '*' | '+')? + * choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' + * seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' */ + + elem->type = XML_DTD_ELEM_CHILDREN; + parent->type = XML_DTD_ELEM_PCDATA; + uint c; + goto first; + + while (1) + { + /* After name */ + xml_parse_dtd_white(ctx, 0); + if ((c = xml_get_char(ctx)) == ')') + { + xml_dec(ctx); + if (parent->type == XML_DTD_ELEM_PCDATA) + parent->type = XML_DTD_ELEM_SEQ; + if ((c = xml_get_char(ctx)) == '?') + parent->occur = XML_DTD_ELEM_OCCUR_OPT; + else if (c == '*') + parent->occur = XML_DTD_ELEM_OCCUR_MULT; + else if (c == '+') + parent->occur = XML_DTD_ELEM_OCCUR_PLUS; + else + { + xml_unget_char(ctx); + parent->occur = XML_DTD_ELEM_OCCUR_ONCE; + } + if (!parent->parent) + break; + parent = parent->parent; + continue; + } + else if (c == '|') + { + if (parent->type == XML_DTD_ELEM_PCDATA) + parent->type = XML_DTD_ELEM_OR; + else if (parent->type != XML_DTD_ELEM_OR) + xml_fatal(ctx, "Mixed operators in the list of element children"); + } + else if (c == ',') + { + if (parent->type == XML_DTD_ELEM_PCDATA) + parent->type = XML_DTD_ELEM_SEQ; + else if (parent->type != XML_DTD_ELEM_SEQ) + xml_fatal(ctx, "Mixed operators in the list of element children"); + } + else if (c == '(') + { + xml_inc(ctx); + struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son)); + son->parent = parent; + slist_add_tail(&parent->sons, &son->n); + parent = son->parent; + son->type = XML_DTD_ELEM_MIXED; + } + else + xml_unget_char(ctx); + + /* Before name */ + xml_parse_dtd_white(ctx, 0); +first:; + struct xml_dtd_elem *son_elem = xml_dtd_elems_lookup(dtd->tab_elems, xml_parse_name(ctx, dtd->pool)); + // FIXME: duplicates, occurance + //struct xml_dtd_elem_node *son = xml_dtd_enodes_new(dtd->tab_enodes, parent, son_elem); + struct xml_dtd_elem_node *son = mp_alloc_zero(dtd->pool, sizeof(*son)); + son->parent = parent; + son->elem = son_elem; + slist_add_tail(&parent->sons, &son->n); + } + } + } + else + xml_fatal(ctx, "Expected element content specification"); + + xml_parse_dtd_white(ctx, 0); + xml_parse_char(ctx, '>'); + xml_dec(ctx); +} + +void +xml_parse_attr_list_decl(struct xml_context *ctx) +{ + /* AttlistDecl ::= '' + * AttDef ::= S Name S AttType S DefaultDecl + * Already parsed: 'dtd; + xml_parse_dtd_white(ctx, 1); + struct xml_dtd_elem *elem = xml_dtd_elems_lookup(ctx->dtd->tab_elems, xml_parse_name(ctx, dtd->pool)); + + while (xml_parse_dtd_white(ctx, 0) && xml_peek_char(ctx) != '>') + { + char *name = xml_parse_name(ctx, dtd->pool); + struct xml_dtd_attr *attr = xml_dtd_attrs_find(dtd->tab_attrs, elem, name); + uint ignored = 0; + if (attr) + { + xml_warn(ctx, "Duplicate attribute definition"); + ignored++; + } + else + attr = xml_dtd_attrs_new(ctx->dtd->tab_attrs, elem, name); + xml_parse_dtd_white(ctx, 1); + if (xml_peek_char(ctx) == '(') + { + xml_skip_char(ctx); // FIXME: xml_inc/dec ? + if (!ignored) + attr->type = XML_ATTR_ENUM; + do + { + xml_parse_dtd_white(ctx, 0); + char *value = xml_parse_nmtoken(ctx, dtd->pool); + if (!ignored) + if (xml_dtd_evals_find(ctx->dtd->tab_evals, attr, value)) + xml_error(ctx, "Duplicate enumeration value"); + else + xml_dtd_evals_new(ctx->dtd->tab_evals, attr, value); + xml_parse_dtd_white(ctx, 0); + } + while (xml_get_char(ctx) == '|'); + xml_unget_char(ctx); + xml_parse_char(ctx, ')'); + } + else + { + char *type = xml_parse_name(ctx, dtd->pool); + enum xml_dtd_attr_type t = XML_ATTR_CDATA; + if (!strcmp(type, "CDATA")) + t = XML_ATTR_CDATA; + else if (!strcmp(type, "ID")) + t = XML_ATTR_ID; + else if (!strcmp(type, "IDREF")) + t = XML_ATTR_IDREF; + else if (!strcmp(type, "IDREFS")) + t = XML_ATTR_IDREFS; + else if (!strcmp(type, "ENTITY")) + t = XML_ATTR_ENTITY; + else if (!strcmp(type, "ENTITIES")) + t = XML_ATTR_ENTITIES; + else if (!strcmp(type, "NMTOKEN")) + t = XML_ATTR_NMTOKEN; + else if (!strcmp(type, "NMTOKENS")) + t = XML_ATTR_NMTOKENS; + else if (!strcmp(type, "NOTATION")) + { + if (elem->type == XML_DTD_ELEM_EMPTY) + xml_fatal(ctx, "Empty element must not have notation attribute"); + // FIXME: An element type MUST NOT have more than one NOTATION attribute specified. + t = XML_ATTR_NOTATION; + xml_parse_dtd_white(ctx, 1); + xml_parse_char(ctx, '('); + do + { + xml_parse_dtd_white(ctx, 0); + struct xml_dtd_notn *n = xml_dtd_notns_lookup(ctx->dtd->tab_notns, xml_parse_name(ctx, dtd->pool)); + if (!ignored) + if (xml_dtd_enotns_find(ctx->dtd->tab_enotns, attr, n)) + xml_error(ctx, "Duplicate enumerated notation"); + else + xml_dtd_enotns_new(ctx->dtd->tab_enotns, attr, n); + xml_parse_dtd_white(ctx, 0); + } + while (xml_get_char(ctx) == '|'); + xml_unget_char(ctx); + xml_parse_char(ctx, ')'); + } + else + xml_fatal(ctx, "Unknown attribute type"); + if (!ignored) + attr->type = t; + } + xml_parse_dtd_white(ctx, 1); + enum xml_dtd_attr_default def = XML_ATTR_NONE; + if (xml_get_char(ctx) == '#') + switch (xml_peek_char(ctx)) + { + case 'R': + xml_parse_seq(ctx, "REQUIRED"); + def = XML_ATTR_REQUIRED; + break; + case 'I': + xml_parse_seq(ctx, "IMPLIED"); + def = XML_ATTR_IMPLIED; + break; + case 'F': + xml_parse_seq(ctx, "FIXED"); + def = XML_ATTR_FIXED; + xml_parse_dtd_white(ctx, 1); + break; + default: + xml_fatal(ctx, "Expected a modifier for default attribute value"); + } + else + xml_unget_char(ctx); + if (def != XML_ATTR_REQUIRED && def != XML_ATTR_IMPLIED) + { + char *v = xml_parse_attr_value(ctx, attr); + if (!ignored) + attr->default_value = v; + } + if (!ignored) + attr->default_mode = def; + } + xml_skip_char(ctx); + xml_dec(ctx); +} + +void +xml_skip_internal_subset(struct xml_context *ctx) +{ + TRACE(ctx, "skip_internal_subset"); + /* AlreadyParsed: '[' */ + uint c; + while ((c = xml_get_char(ctx)) != ']') + { + if (c != '<') + continue; + if ((c = xml_get_char(ctx)) == '?') + { + xml_inc(ctx); + xml_skip_pi(ctx); + } + else if (c != '!') + xml_dec(ctx); + else if (xml_get_char(ctx) == '-') + { + xml_inc(ctx); + xml_skip_comment(ctx); + } + else + while ((c = xml_get_char(ctx)) != '>') + if (c == '\'' || c == '"') + while (xml_get_char(ctx) != c); + } + xml_dec(ctx); +} + +/*** Validation of attribute values ***/ + +static uint +xml_check_tokens(char *value, uint first_cat, uint next_cat, uint seq) +{ + char *p = value; + uint u; + while (1) + { + p = utf8_32_get(p, &u); + if (!(xml_char_cat(u) & first_cat)) + return 0; + while (*p & ~0x20) + { + p = utf8_32_get(p, &u); + if (!(xml_char_cat(u) & next_cat)) + return 0; + } + if (!*p) + return 1; + if (!seq) + return 0; + p++; + } +} + +static uint +xml_is_name(struct xml_context *ctx, char *value) +{ + /* Name ::= NameStartChar (NameChar)* */ + return xml_check_tokens(value, ctx->cat_sname, ctx->cat_name, 0); +} + +static uint +xml_is_names(struct xml_context *ctx, char *value) +{ + /* Names ::= Name (#x20 Name)* */ + return xml_check_tokens(value, ctx->cat_sname, ctx->cat_name, 1); +} + +static uint +xml_is_nmtoken(struct xml_context *ctx, char *value) +{ + /* Nmtoken ::= (NameChar)+ */ + return xml_check_tokens(value, ctx->cat_name, ctx->cat_name, 0); +} + +static uint +xml_is_nmtokens(struct xml_context *ctx, char *value) +{ + /* Nmtokens ::= Nmtoken (#x20 Nmtoken)* */ + return xml_check_tokens(value, ctx->cat_name, ctx->cat_name, 1); +} + +static void +xml_err_attr_format(struct xml_context *ctx, struct xml_dtd_attr *dtd, char *type) +{ + xml_error(ctx, "Attribute %s in <%s> does not match the production of %s", dtd->name, dtd->elem->name, type); +} + +void +xml_validate_attr(struct xml_context *ctx, struct xml_dtd_attr *dtd, char *value) +{ + if (dtd->type == XML_ATTR_CDATA) + return; + xml_normalize_white(ctx, value); + switch (dtd->type) + { + case XML_ATTR_ID: + if (!xml_is_name(ctx, value)) + xml_err_attr_format(ctx, dtd, "NAME"); + //FIXME: add to a hash table + break; + case XML_ATTR_IDREF: + if (!xml_is_name(ctx, value)) + xml_err_attr_format(ctx, dtd, "NAME"); + // FIXME: find in hash table (beware forward references) + break; + case XML_ATTR_IDREFS: + if (!xml_is_names(ctx, value)) + xml_err_attr_format(ctx, dtd, "NAMES"); + // FIXME: find + break; + case XML_ATTR_ENTITY: + // FIXME + break; + case XML_ATTR_ENTITIES: + // FIXME + break; + case XML_ATTR_NMTOKEN: + if (!xml_is_nmtoken(ctx, value)) + xml_err_attr_format(ctx, dtd, "NMTOKEN"); + break; + case XML_ATTR_NMTOKENS: + if (!xml_is_nmtokens(ctx, value)) + xml_err_attr_format(ctx, dtd, "NMTOKENS"); + break; + case XML_ATTR_ENUM: + if (!xml_dtd_evals_find(ctx->dtd->tab_evals, dtd, value)) + xml_error(ctx, "Attribute %s in <%s> contains an undefined enumeration value", dtd->name, dtd->elem->name); + break; + case XML_ATTR_NOTATION: + if (!xml_dtd_find_notn(ctx, value)) + xml_error(ctx, "Attribute %s in <%s> contains an undefined notation", dtd->name, dtd->elem->name); + break; + } +} diff --git a/libucw/ucw-xml/dtd.h b/libucw/ucw-xml/dtd.h new file mode 100644 index 0000000..c3e07f6 --- /dev/null +++ b/libucw/ucw-xml/dtd.h @@ -0,0 +1,178 @@ +/* + * UCW Library -- A simple XML parser + * + * (c) 2007--2008 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_XML_DTD_H +#define _UCW_XML_DTD_H + +#include + +#ifdef CONFIG_UCW_CLEAN_ABI +#define xml_dtd_cleanup ucw_xml_dtd_cleanup +#define xml_dtd_find_attr ucw_xml_dtd_find_attr +#define xml_dtd_find_elem ucw_xml_dtd_find_elem +#define xml_dtd_find_entity ucw_xml_dtd_find_entity +#define xml_dtd_find_notn ucw_xml_dtd_find_notn +#define xml_dtd_finish ucw_xml_dtd_finish +#define xml_dtd_init ucw_xml_dtd_init +#endif + +struct xml_dtd { + struct mempool *pool; /* Memory pool where to allocate DTD */ + slist ents; /* Link list of general entities */ + slist pents; /* Link list of parameter entities */ + slist notns; /* Link list of notations */ + slist elems; /* Link list of elements */ + void *tab_ents; /* Hash table of general entities */ + void *tab_pents; /* Hash table of parameter entities */ + void *tab_notns; /* Hash table of notations */ + void *tab_elems; /* Hash table of elements */ + void *tab_enodes; /* Hash table of element sons */ + void *tab_attrs; /* Hash table of element attributes */ + void *tab_evals; /* Hash table of enumerated attribute values */ + void *tab_enotns; /* hash table of enumerated attribute notations */ +}; + +/* Notations */ + +enum xml_dtd_notn_flags { + XML_DTD_NOTN_DECLARED = 0x1, /* The notation has been declared (internal usage) */ +}; + +struct xml_dtd_notn { + snode n; /* Node in xml_dtd.notns */ + uint flags; /* XML_DTD_NOTN_x */ + char *name; /* Notation name */ + char *system_id; /* External ID */ + char *public_id; + void *user; /* User-defined */ +}; + +struct xml_dtd_notn *xml_dtd_find_notn(struct xml_context *ctx, char *name); + +/* Entities */ + +enum xml_dtd_entity_flags { + XML_DTD_ENTITY_DECLARED = 0x1, /* The entity has been declared (internal usage) */ + XML_DTD_ENTITY_VISITED = 0x2, /* Cycle detection (internal usage) */ + XML_DTD_ENTITY_PARAMETER = 0x4, /* Parameter entity, general otherwise */ + XML_DTD_ENTITY_EXTERNAL = 0x8, /* External entity, internal otherwise */ + XML_DTD_ENTITY_UNPARSED = 0x10, /* Unparsed entity, parsed otherwise */ + XML_DTD_ENTITY_TRIVIAL = 0x20, /* Replacement text is a sequence of characters and character references */ +}; + +struct xml_dtd_entity { + snode n; /* Node in xml_dtd.[gp]ents */ + uint flags; /* XML_DTD_ENT_x */ + char *name; /* Entity name */ + char *text; /* Replacement text / expanded replacement text (XML_DTD_ENT_TRIVIAL) */ + uint len; /* Text length */ + char *system_id; /* External ID */ + char *public_id; + struct xml_dtd_notn *notn; /* Notation (XML_DTD_ENT_UNPARSED only) */ + void *user; /* User-defined */ +}; + +struct xml_dtd_entity *xml_dtd_find_entity(struct xml_context *ctx, char *name); + +/* Elements */ + +enum xml_dtd_elem_flags { + XML_DTD_ELEM_DECLARED = 0x1, /* The element has been declared (internal usage) */ +}; + +enum xml_dtd_elem_type { + XML_DTD_ELEM_EMPTY, + XML_DTD_ELEM_ANY, + XML_DTD_ELEM_MIXED, + XML_DTD_ELEM_CHILDREN, +}; + +struct xml_dtd_elem { + snode n; + uint flags; + uint type; + char *name; + struct xml_dtd_elem_node *node; + slist attrs; + void *user; /* User-defined */ +}; + +struct xml_dtd_elem_node { + snode n; + struct xml_dtd_elem_node *parent; + struct xml_dtd_elem *elem; + slist sons; + uint type; + uint occur; + void *user; /* User-defined */ +}; + +enum xml_dtd_elem_node_type { + XML_DTD_ELEM_PCDATA, + XML_DTD_ELEM_SEQ, + XML_DTD_ELEM_OR, +}; + +enum xml_dtd_elem_node_occur { + XML_DTD_ELEM_OCCUR_ONCE, + XML_DTD_ELEM_OCCUR_OPT, + XML_DTD_ELEM_OCCUR_MULT, + XML_DTD_ELEM_OCCUR_PLUS, +}; + +struct xml_dtd_elem *xml_dtd_find_elem(struct xml_context *ctx, char *name); + +/* Attributes */ + +enum xml_dtd_attr_default { + XML_ATTR_NONE, + XML_ATTR_REQUIRED, + XML_ATTR_IMPLIED, + XML_ATTR_FIXED, +}; + +enum xml_dtd_attr_type { + XML_ATTR_CDATA, + XML_ATTR_ID, + XML_ATTR_IDREF, + XML_ATTR_IDREFS, + XML_ATTR_ENTITY, + XML_ATTR_ENTITIES, + XML_ATTR_NMTOKEN, + XML_ATTR_NMTOKENS, + XML_ATTR_ENUM, + XML_ATTR_NOTATION, +}; + +struct xml_dtd_attr { + snode n; + char *name; /* Attribute name */ + struct xml_dtd_elem *elem; /* Owner element */ + uint type; /* See enum xml_dtd_attr_type */ + uint default_mode; /* See enum xml_dtd_attr_default */ + char *default_value; /* The default value defined in DTD (or NULL) */ +}; + +struct xml_dtd_eval { + struct xml_dtd_attr *attr; + char *val; +}; + +struct xml_dtd_enotn { + struct xml_dtd_attr *attr; + struct xml_dtd_notn *notn; +}; + +void xml_dtd_init(struct xml_context *ctx); +void xml_dtd_cleanup(struct xml_context *ctx); +void xml_dtd_finish(struct xml_context *ctx); + +struct xml_dtd_attr *xml_dtd_find_attr(struct xml_context *ctx, struct xml_dtd_elem *elem, char *name); + +#endif diff --git a/libucw/ucw-xml/internals.h b/libucw/ucw-xml/internals.h new file mode 100644 index 0000000..0e9dc2d --- /dev/null +++ b/libucw/ucw-xml/internals.h @@ -0,0 +1,275 @@ +/* + * UCW Library -- A simple XML parser + * + * (c) 2007--2008 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_XML_INTERNALS_H +#define _UCW_XML_INTERNALS_H + +#include +#include + +#ifdef CONFIG_UCW_CLEAN_ABI +#define xml_do_pop ucw_xml_do_pop +#define xml_do_push ucw_xml_do_push +#define xml_fatal_expected ucw_xml_fatal_expected +#define xml_fatal_expected_quot ucw_xml_fatal_expected_quot +#define xml_fatal_expected_white ucw_xml_fatal_expected_white +#define xml_fatal_nested ucw_xml_fatal_nested +#define xml_hash_new ucw_xml_hash_new +#define xml_ns_cleanup ucw_xml_ns_cleanup +#define xml_ns_pop_element ucw_xml_ns_pop_element +#define xml_ns_push_element ucw_xml_ns_push_element +#define xml_ns_reset ucw_xml_ns_reset +#define xml_parse_attr_list_decl ucw_xml_parse_attr_list_decl +#define xml_parse_attr_value ucw_xml_parse_attr_value +#define xml_parse_char_ref ucw_xml_parse_char_ref +#define xml_parse_element_decl ucw_xml_parse_element_decl +#define xml_parse_entity_decl ucw_xml_parse_entity_decl +#define xml_parse_eq ucw_xml_parse_eq +#define xml_parse_name ucw_xml_parse_name +#define xml_parse_nmtoken ucw_xml_parse_nmtoken +#define xml_parse_notation_decl ucw_xml_parse_notation_decl +#define xml_parse_pe_ref ucw_xml_parse_pe_ref +#define xml_parse_pubid_literal ucw_xml_parse_pubid_literal +#define xml_parse_system_literal ucw_xml_parse_system_literal +#define xml_parse_white ucw_xml_parse_white +#define xml_pop_comment ucw_xml_pop_comment +#define xml_pop_dom ucw_xml_pop_dom +#define xml_pop_pi ucw_xml_pop_pi +#define xml_push_comment ucw_xml_push_comment +#define xml_push_dom ucw_xml_push_dom +#define xml_push_entity ucw_xml_push_entity +#define xml_push_pi ucw_xml_push_pi +#define xml_push_source ucw_xml_push_source +#define xml_refill ucw_xml_refill +#define xml_skip_comment ucw_xml_skip_comment +#define xml_skip_internal_subset ucw_xml_skip_internal_subset +#define xml_skip_name ucw_xml_skip_name +#define xml_skip_pi ucw_xml_skip_pi +#define xml_sources_cleanup ucw_xml_sources_cleanup +#define xml_spout_chars ucw_xml_spout_chars +#define xml_throw ucw_xml_throw +#define xml_validate_attr ucw_xml_validate_attr +#endif + +/*** Debugging ***/ + +#ifdef LOCAL_DEBUG +#define TRACE(c, f, p...) do { DBG("XML %u: " f, xml_row(c), ##p); } while(0) +#else +#define TRACE(c, f, p...) do {} while(0) +#endif + +/*** Error handling ***/ + +void NONRET xml_throw(struct xml_context *ctx); + +/*** Memory management ***/ + +struct xml_stack { + struct xml_stack *next; + struct mempool_state state; + uint flags; +}; + +void *xml_do_push(struct xml_context *ctx, uint size); +void xml_do_pop(struct xml_context *ctx, struct xml_stack *s); + +static inline void xml_push(struct xml_context *ctx) +{ + TRACE(ctx, "push"); + xml_do_push(ctx, sizeof(struct xml_stack)); +} + +static inline void xml_pop(struct xml_context *ctx) +{ + TRACE(ctx, "pop"); + ASSERT(ctx->stack_list); + xml_do_pop(ctx, ctx->stack_list); +} + +struct xml_dom_stack { + struct xml_stack stack; + struct mempool_state state; +}; + +struct xml_node *xml_push_dom(struct xml_context *ctx, struct mempool_state *state); +void xml_pop_dom(struct xml_context *ctx, uint free); + +#define XML_HASH_HDR_SIZE ALIGN_TO(sizeof(void *), CPU_STRUCT_ALIGN) +#define XML_HASH_GIVE_ALLOC struct HASH_PREFIX(table); \ + static inline void *HASH_PREFIX(alloc)(struct HASH_PREFIX(table) *t, uint size) \ + { return mp_alloc(*(void **)((void *)t - XML_HASH_HDR_SIZE), size); } \ + static inline void HASH_PREFIX(free)(struct HASH_PREFIX(table) *t UNUSED, void *p UNUSED) {} + +void *xml_hash_new(struct mempool *pool, uint size); + +void xml_spout_chars(struct fastbuf *fb); + +/*** Reading of document/external entities ***/ + +void NONRET xml_fatal_nested(struct xml_context *ctx); + +static inline void xml_inc(struct xml_context *ctx) +{ + /* Called after the first character of a block */ + TRACE(ctx, "inc"); + ctx->depth++; +} + +static inline void xml_dec(struct xml_context *ctx) +{ + /* Called after the last character of a block */ + TRACE(ctx, "dec"); + if (unlikely(!ctx->depth--)) + xml_fatal_nested(ctx); +} + +#include "obj/ucw-xml/unicat.h" + +static inline uint xml_char_cat(uint c) +{ + if (c < 0x10000) + return 1U << ucw_xml_char_tab1[(c & 0xff) + ucw_xml_char_tab2[c >> 8]]; + else if (likely(c < 0x110000)) + return 1U << ucw_xml_char_tab3[c >> 16]; + else + return 1; +} + +static inline uint xml_ascii_cat(uint c) +{ + return ucw_xml_char_tab1[c]; +} + +struct xml_source *xml_push_source(struct xml_context *ctx); +void xml_push_entity(struct xml_context *ctx, struct xml_dtd_entity *ent); + +void xml_refill(struct xml_context *ctx); + +static inline uint xml_peek_char(struct xml_context *ctx) +{ + if (ctx->bptr == ctx->bstop) + xml_refill(ctx); + return ctx->bptr[0]; +} + +static inline uint xml_peek_cat(struct xml_context *ctx) +{ + if (ctx->bptr == ctx->bstop) + xml_refill(ctx); + return ctx->bptr[1]; +} + +static inline uint xml_get_char(struct xml_context *ctx) +{ + uint c = xml_peek_char(ctx); + ctx->bptr += 2; + return c; +} + +static inline uint xml_get_cat(struct xml_context *ctx) +{ + uint c = xml_peek_cat(ctx); + ctx->bptr += 2; + return c; +} + +static inline uint xml_last_char(struct xml_context *ctx) +{ + return ctx->bptr[-2]; +} + +static inline uint xml_last_cat(struct xml_context *ctx) +{ + return ctx->bptr[-1]; +} + +static inline uint xml_skip_char(struct xml_context *ctx) +{ + uint c = ctx->bptr[0]; + ctx->bptr += 2; + return c; +} + +static inline uint xml_unget_char(struct xml_context *ctx) +{ + return *(ctx->bptr -= 2); +} + +void xml_sources_cleanup(struct xml_context *ctx); + +/*** Parsing ***/ + +void NONRET xml_fatal_expected(struct xml_context *ctx, uint c); +void NONRET xml_fatal_expected_white(struct xml_context *ctx); +void NONRET xml_fatal_expected_quot(struct xml_context *ctx); + +uint xml_parse_white(struct xml_context *ctx, uint mandatory); + +static inline void xml_parse_char(struct xml_context *ctx, uint c) +{ + /* Consumes a given Unicode character */ + if (unlikely(c != xml_get_char(ctx))) + xml_fatal_expected(ctx, c); +} + +static inline void xml_parse_seq(struct xml_context *ctx, const char *seq) +{ + /* Consumes a given sequence of ASCII characters */ + while (*seq) + xml_parse_char(ctx, *seq++); +} + +void xml_parse_eq(struct xml_context *ctx); + +static inline uint xml_parse_quote(struct xml_context *ctx) +{ + /* "'" | '"' */ + uint c = xml_get_char(ctx); + if (unlikely(c != '\'' && c != '\"')) + xml_fatal_expected_quot(ctx); + return c; +} + +char *xml_parse_name(struct xml_context *ctx, struct mempool *pool); +void xml_skip_name(struct xml_context *ctx); +char *xml_parse_nmtoken(struct xml_context *ctx, struct mempool *pool); + +char *xml_parse_system_literal(struct xml_context *ctx, struct mempool *pool); +char *xml_parse_pubid_literal(struct xml_context *ctx, struct mempool *pool); + +uint xml_parse_char_ref(struct xml_context *ctx); +void xml_parse_pe_ref(struct xml_context *ctx); + +char *xml_parse_attr_value(struct xml_context *ctx, struct xml_dtd_attr *attr); + +void xml_skip_internal_subset(struct xml_context *ctx); +void xml_parse_notation_decl(struct xml_context *ctx); +void xml_parse_entity_decl(struct xml_context *ctx); +void xml_parse_element_decl(struct xml_context *ctx); +void xml_parse_attr_list_decl(struct xml_context *ctx); + +void xml_push_comment(struct xml_context *ctx); +void xml_pop_comment(struct xml_context *ctx); +void xml_skip_comment(struct xml_context *ctx); + +void xml_push_pi(struct xml_context *ctx); +void xml_pop_pi(struct xml_context *ctx); +void xml_skip_pi(struct xml_context *ctx); + +void xml_validate_attr(struct xml_context *ctx, struct xml_dtd_attr *dtd, char *value); + +/*** Namespaces ***/ + +void xml_ns_cleanup(struct xml_context *ctx); +void xml_ns_reset(struct xml_context *ctx); +void xml_ns_push_element(struct xml_context *ctx); +void xml_ns_pop_element(struct xml_context *ctx); + +#endif diff --git a/libucw/ucw-xml/libucw-xml.pc b/libucw/ucw-xml/libucw-xml.pc new file mode 100644 index 0000000..5c02e99 --- /dev/null +++ b/libucw/ucw-xml/libucw-xml.pc @@ -0,0 +1,11 @@ +# pkg-config metadata for libucw-xml + +libdir=@LIBDIR@ +incdir=. + +Name: libucw-xml +Description: XML parser for LibUCW project +Version: @UCW_VERSION@ +Cflags: -I${incdir} +Libs: -L${libdir} @SO_LINK_PATH@ -lucw-xml@UCW_ABI_SUFFIX@ +Requires.private: @DEPS@ diff --git a/libucw/ucw-xml/ns.c b/libucw/ucw-xml/ns.c new file mode 100644 index 0000000..18412dd --- /dev/null +++ b/libucw/ucw-xml/ns.c @@ -0,0 +1,248 @@ +/* + * UCW Library -- A simple XML parser -- Namespaces + * + * (c) 2015 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include +#include + +/* + * This is an implementation of XML namespaces according to + * http://www.w3.org/TR/REC-xml-names/. + * + * Currently, we assume that the document does not contain a plethora + * of namespaces and prefixes. So we keep them in memory until the + * document ends. + */ + +struct ns_hash_entry { + uint ns; + char name[1]; +}; + +#define HASH_NODE struct ns_hash_entry +#define HASH_PREFIX(x) ns_hash_##x +#define HASH_KEY_ENDSTRING name +#define HASH_WANT_FIND +#define HASH_WANT_LOOKUP +#define HASH_TABLE_DYNAMIC +#define HASH_TABLE_ALLOC +#define HASH_LOOKUP_DETECT_NEW +#define HASH_GIVE_ALLOC +XML_HASH_GIVE_ALLOC +#include + +struct xml_ns_prefix { + struct xml_ns_prefix *prev; + struct xml_node *e; /* Which element defined this prefix */ + struct ns_hash_entry *he; /* NULL if changing default NS */ + uint prev_ns; /* Previous NS ID assigned to this prefix */ +}; + +static bool +ns_enabled(struct xml_context *ctx) +{ + return (ctx->flags & XML_NAMESPACES); +} + +void +xml_ns_enable(struct xml_context *ctx) +{ + if (ns_enabled(ctx)) + return; + + TRACE(ctx, "NS: Enabling"); + ASSERT(!ctx->depth); + ctx->flags |= XML_NAMESPACES; + if (!ctx->ns_pool) + { + // CAVEAT: xml_reset() must handle everything we allocate here + TRACE(ctx, "NS: Allocating data structures"); + ctx->ns_pool = mp_new(4096); + GARY_INIT(ctx->ns_by_id, 16); + } + + ctx->ns_by_name = xml_hash_new(ctx->ns_pool, sizeof(struct ns_hash_table)); + ns_hash_init(ctx->ns_by_name); + + ctx->ns_by_prefix = xml_hash_new(ctx->ns_pool, sizeof(struct ns_hash_table)); + ns_hash_init(ctx->ns_by_prefix); + + /* Intern well-known namespaces */ + GARY_RESIZE(ctx->ns_by_id, 0); + uint none_ns = xml_ns_by_name(ctx, ""); + uint xmlns_ns = xml_ns_by_name(ctx, "http://www.w3.org/2000/xmlns/"); + uint xml_ns = xml_ns_by_name(ctx, "http://www.w3.org/XML/1998/namespace"); + ASSERT(none_ns == XML_NS_NONE && xmlns_ns == XML_NS_XMLNS && xml_ns == XML_NS_XML); + + /* Intern standard prefixes */ + int new_xmlns, new_xml; + ns_hash_lookup(ctx->ns_by_prefix, "xmlns", &new_xmlns)->ns = xmlns_ns; + ns_hash_lookup(ctx->ns_by_prefix, "xml", &new_xml)->ns = xml_ns; + ASSERT(new_xmlns && new_xml); +} + +void +xml_ns_cleanup(struct xml_context *ctx) +{ + if (!ctx->ns_pool) + return; + + TRACE(ctx, "NS: Cleanup"); + GARY_FREE(ctx->ns_by_id); + mp_delete(ctx->ns_pool); +} + +void +xml_ns_reset(struct xml_context *ctx) +{ + if (!ns_enabled(ctx)) + return; + + TRACE(ctx, "NS: Reset"); + GARY_RESIZE(ctx->ns_by_id, 0); + mp_flush(ctx->ns_pool); +} + +const char * +xml_ns_by_id(struct xml_context *ctx, uint ns) +{ + if (!ns) // This should work even if namespaces are disabled + return ""; + ASSERT(ns_enabled(ctx) && ns < GARY_SIZE(ctx->ns_by_id)); + return ctx->ns_by_id[ns]; +} + +uint +xml_ns_by_name(struct xml_context *ctx, const char *name) +{ + ASSERT(ns_enabled(ctx)); + int new_p; + struct ns_hash_entry *he = ns_hash_lookup(ctx->ns_by_name, (char *) name, &new_p); + if (new_p) + { + he->ns = GARY_SIZE(ctx->ns_by_id); + ASSERT(he->ns < ~0U); + *GARY_PUSH(ctx->ns_by_id) = he->name; + TRACE(ctx, "NS: New namespace <%s> with ID %u", he->name, he->ns); + } + return he->ns; +} + +static struct xml_ns_prefix * +ns_push_prefix(struct xml_context *ctx) +{ + struct xml_ns_prefix *px = mp_alloc(ctx->stack, sizeof(*px)); + px->prev = ctx->ns_prefix_stack; + ctx->ns_prefix_stack = px; + px->e = ctx->node; + return px; +} + +static uint +ns_resolve(struct xml_context *ctx, char **namep, uint default_ns) +{ + char *name = *namep; + char *colon = strchr(name, ':'); + if (colon) + { + *colon = 0; + struct ns_hash_entry *he = ns_hash_find(ctx->ns_by_prefix, name); + *colon = ':'; + if (he && he->ns) + { + *namep = colon + 1; + return he->ns; + } + else + { + xml_error(ctx, "Unknown namespace prefix for %s", name); + return 0; + } + } + else + return default_ns; +} + +void xml_ns_push_element(struct xml_context *ctx) +{ + struct xml_node *e = ctx->node; + if (!ns_enabled(ctx)) + { + e->ns = 0; + return; + } + + /* Scan attributes for prefix definitions */ + XML_ATTR_FOR_EACH(a, e) + if (str_has_prefix(a->name, "xmlns")) + { + uint ns = xml_ns_by_name(ctx, a->val); + if (a->name[5] == ':') + { + if (!ns) + xml_error(ctx, "Namespace prefixes must not be undeclared"); + else if (a->name[6]) + { + /* New NS prefix */ + int new_p; + struct ns_hash_entry *he = ns_hash_lookup(ctx->ns_by_prefix, a->name + 6, &new_p); + if (new_p) + he->ns = 0; + struct xml_ns_prefix *px = ns_push_prefix(ctx); + px->he = he; + px->prev_ns = he->ns; + he->ns = ns; + TRACE(ctx, "NS: New prefix <%s> -> ID %u", he->name, he->ns); + } + else + xml_error(ctx, "Invalid namespace prefix"); + } + else if (!a->name[5]) + { + /* New default NS */ + struct xml_ns_prefix *px = ns_push_prefix(ctx); + px->he = NULL; + px->prev_ns = ctx->ns_default; + ctx->ns_default = ns; + TRACE(ctx, "New default NS -> ID %u", ns); + } + } + + /* Resolve namespaces */ + e->ns = ns_resolve(ctx, &e->name, ctx->ns_default); + XML_ATTR_FOR_EACH(a, e) + a->ns = ns_resolve(ctx, &a->name, 0); +} + +void xml_ns_pop_element(struct xml_context *ctx) +{ + if (!ns_enabled(ctx)) + return; + + struct xml_ns_prefix *px; + while ((px = ctx->ns_prefix_stack) && px->e == ctx->node) + { + struct ns_hash_entry *he = px->he; + if (he) + { + TRACE(ctx, "NS: Restoring prefix <%s> -> ID %u", he->name, px->prev_ns); + he->ns = px->prev_ns; + } + else + { + TRACE(ctx, "NS: Restoring default NS -> ID %u", px->prev_ns); + ctx->ns_default = px->prev_ns; + } + ctx->ns_prefix_stack = px->prev; + } +} diff --git a/libucw/ucw-xml/parse.c b/libucw/ucw-xml/parse.c new file mode 100644 index 0000000..532095d --- /dev/null +++ b/libucw/ucw-xml/parse.c @@ -0,0 +1,1405 @@ +/* + * UCW Library -- A simple XML parser + * + * (c) 2007--2008 Pavel Charvat + * (c) 2015 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/*** Basic parsing ***/ + +void NONRET +xml_fatal_expected(struct xml_context *ctx, uint c) +{ + if (c >= 32 && c < 127) + xml_fatal(ctx, "Expected '%c'", c); + else + xml_fatal(ctx, "Expected U+%04x", c); +} + +void NONRET +xml_fatal_expected_white(struct xml_context *ctx) +{ + xml_fatal(ctx, "Expected a white space"); +} + +void NONRET +xml_fatal_expected_quot(struct xml_context *ctx) +{ + xml_fatal(ctx, "Expected a quotation mark"); +} + +void +xml_parse_eq(struct xml_context *ctx) +{ + /* Eq ::= S? '=' S? */ + xml_parse_white(ctx, 0); + xml_parse_char(ctx, '='); + xml_parse_white(ctx, 0); +} + +/*** Memory management ***/ + +void *xml_do_push(struct xml_context *ctx, uint size) +{ + /* Saves ctx->stack and ctx->flags state */ + struct mempool_state state; + mp_save(ctx->stack, &state); + struct xml_stack *s = mp_alloc(ctx->stack, size); + s->state = state; + s->flags = ctx->flags; + s->next = ctx->stack_list; + ctx->stack_list = s; + return s; +} + +void xml_do_pop(struct xml_context *ctx, struct xml_stack *s) +{ + /* Restore ctx->stack and ctx->flags state */ + ctx->stack_list = s->next; + ctx->flags = s->flags; + mp_restore(ctx->stack, &s->state); +} + +struct xml_node *xml_push_dom(struct xml_context *ctx, struct mempool_state *state) +{ + /* Create a new DOM node */ + TRACE(ctx, "push_dom"); + struct xml_dom_stack *s = xml_do_push(ctx, sizeof(*s)); + if (state) + s->state = *state; + else + mp_save(ctx->pool, &s->state); + struct xml_node *n = mp_alloc(ctx->pool, sizeof(*n)); + n->user = NULL; + if (n->parent = ctx->node) + clist_add_tail(&n->parent->sons, &n->n); + return ctx->node = n; +} + +void xml_pop_dom(struct xml_context *ctx, uint free) +{ + /* Leave DOM subtree */ + TRACE(ctx, "pop_dom"); + ASSERT(ctx->node); + struct xml_node *p = ctx->node->parent; + struct xml_dom_stack *s = (void *)ctx->stack_list; + if (free) + { + /* See xml_pop_element() for cleanup of attribute hash table */ + if (p) + clist_remove(&ctx->node->n); + mp_restore(ctx->pool, &s->state); + } + ctx->node = p; + xml_do_pop(ctx, &s->stack); +} + +/*** Basics ***/ + +uint xml_parse_white(struct xml_context *ctx, uint mandatory) +{ + /* mandatory=1 -> S ::= (#x20 | #x9 | #xD | #xA)+ + * mandatory=0 -> S? */ + uint cnt = 0; + while (xml_peek_cat(ctx) & XML_CHAR_WHITE) + { + xml_skip_char(ctx); + cnt++; + } + if (unlikely(mandatory && !cnt)) + xml_fatal_expected_white(ctx); + return cnt; +} + +/*** Names and nmtokens ***/ + +static char * +xml_parse_string(struct xml_context *ctx, struct mempool *pool, uint first_cat, uint next_cat, char *err) +{ + char *p = mp_start_noalign(pool, 2); + *p++ = '<'; /* We always prepend a '<', so we can seek backwards in the string */ + if (unlikely(!(xml_peek_cat(ctx) & first_cat))) + xml_fatal(ctx, "%s", err); + do + { + p = mp_spread(pool, p, 5); + p = utf8_32_put(p, xml_skip_char(ctx)); + } + while (xml_peek_cat(ctx) & next_cat); + *p++ = 0; + return mp_end(pool, p) + 1; +} + +static void +xml_skip_string(struct xml_context *ctx, uint first_cat, uint next_cat, char *err) +{ + if (unlikely(!(xml_get_cat(ctx) & first_cat))) + xml_fatal(ctx, "%s", err); + while (xml_peek_cat(ctx) & next_cat) + xml_skip_char(ctx); +} + +char * +xml_parse_name(struct xml_context *ctx, struct mempool *pool) +{ + /* Name ::= NameStartChar (NameChar)* */ + return xml_parse_string(ctx, pool, ctx->cat_sname, ctx->cat_name, "Expected a name"); +} + +void +xml_skip_name(struct xml_context *ctx) +{ + xml_skip_string(ctx, ctx->cat_sname, ctx->cat_name, "Expected a name"); +} + +char * +xml_parse_nmtoken(struct xml_context *ctx, struct mempool *pool) +{ + /* Nmtoken ::= (NameChar)+ */ + return xml_parse_string(ctx, pool, ctx->cat_name, ctx->cat_name, "Expected a nmtoken"); +} + +/*** Simple literals ***/ + +char * +xml_parse_system_literal(struct xml_context *ctx, struct mempool *pool) +{ + /* SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */ + char *p = mp_start_noalign(pool, 1); + uint q = xml_parse_quote(ctx), c; + while ((c = xml_get_char(ctx)) != q) + { + p = mp_spread(pool, p, 5); + p = utf8_32_put(p, c); + } + *p++ = 0; + return mp_end(pool, p); +} + +char * +xml_parse_pubid_literal(struct xml_context *ctx, struct mempool *pool) +{ + /* PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" */ + char *p = mp_start_noalign(pool, 1); + uint q = xml_parse_quote(ctx), c; + while ((c = xml_get_char(ctx)) != q) + { + if (unlikely(!(xml_last_cat(ctx) & XML_CHAR_PUBID))) + xml_fatal(ctx, "Expected a pubid character"); + p = mp_spread(pool, p, 2); + *p++ = c; + } + *p++ = 0; + return mp_end(pool, p); +} + +/*** Comments ***/ + +void +xml_push_comment(struct xml_context *ctx) +{ + TRACE(ctx, "push_comment"); + /* Comment ::= '' + * Already parsed: 'type = XML_NODE_COMMENT; + char *p = mp_start_noalign(ctx->pool, 6); + while (1) + { + if (xml_get_char(ctx) == '-') + if (xml_get_char(ctx) == '-') + break; + else + *p++ = '-'; + p = utf8_32_put(p, xml_last_char(ctx)); + p = mp_spread(ctx->pool, p, 6); + } + xml_parse_char(ctx, '>'); + *p = 0; + n->len = p - (char *)mp_ptr(ctx->pool); + n->text = mp_end(ctx->pool, p + 1); + if ((ctx->flags & XML_REPORT_COMMENTS) && ctx->h_comment) + ctx->h_comment(ctx); +} + +void +xml_pop_comment(struct xml_context *ctx) +{ + xml_pop_dom(ctx, !(ctx->flags & XML_ALLOC_COMMENTS)); + xml_dec(ctx); + TRACE(ctx, "pop_comment"); +} + +void +xml_skip_comment(struct xml_context *ctx) +{ + TRACE(ctx, "skip_comment"); + xml_parse_char(ctx, '-'); + while (xml_get_char(ctx) != '-' || xml_get_char(ctx) != '-'); + xml_parse_char(ctx, '>'); + xml_dec(ctx); +} + +/*** Processing instructions ***/ + +void +xml_push_pi(struct xml_context *ctx) +{ + TRACE(ctx, "push_pi"); + /* Parses a PI to ctx->value and ctx->name: + * PI ::= '' Char*)))? '?>' + * PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) + * Already parsed: 'type = XML_NODE_PI; + n->name = xml_parse_name(ctx, ctx->pool); + if (unlikely(!strcasecmp(n->name, "xml"))) + xml_error(ctx, "Reserved PI target"); + char *p = mp_start_noalign(ctx->pool, 5); + if (!xml_parse_white(ctx, 0)) + xml_parse_seq(ctx, "?>"); + else + while (1) + { + if (xml_get_char(ctx) == '?') + if (xml_peek_char(ctx) == '>') + { + xml_skip_char(ctx); + break; + } + else + *p++ = '?'; + else + p = utf8_32_put(p, xml_last_char(ctx)); + p = mp_spread(ctx->pool, p, 5); + } + *p = 0; + n->len = p - (char *)mp_ptr(ctx->pool); + n->text = mp_end(ctx->pool, p + 1); + if ((ctx->flags & XML_REPORT_PIS) && ctx->h_pi) + ctx->h_pi(ctx); +} + +void +xml_pop_pi(struct xml_context *ctx) +{ + xml_pop_dom(ctx, !(ctx->flags & XML_ALLOC_PIS)); + xml_dec(ctx); + TRACE(ctx, "pop_pi"); +} + +void +xml_skip_pi(struct xml_context *ctx) +{ + TRACE(ctx, "skip_pi"); + if (ctx->flags & XML_VALIDATING) + { + struct mempool_state state; + mp_save(ctx->stack, &state); + if (unlikely(!strcasecmp(xml_parse_name(ctx, ctx->stack), "xml"))) + xml_error(ctx, "Reserved PI target"); + mp_restore(ctx->stack, &state); + if (!xml_parse_white(ctx, 0)) + { + xml_parse_seq(ctx, "?>"); + xml_dec(ctx); + return; + } + } + while (1) + if (xml_get_char(ctx) == '?') + if (xml_peek_char(ctx) == '>') + break; + xml_skip_char(ctx); + xml_dec(ctx); +} + +/*** Character references ***/ + +uint +xml_parse_char_ref(struct xml_context *ctx) +{ + TRACE(ctx, "parse_char_ref"); + /* CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' + * Already parsed: '&#' */ + uint v = 0; + if (xml_get_char(ctx) == 'x') + { + if (!(xml_get_cat(ctx) & XML_CHAR_XDIGIT)) + { + xml_error(ctx, "Expected a hexadecimal value of character reference"); + goto recover; + } + do + { + v = (v << 4) + Cxvalue(xml_last_char(ctx)); + } + while (v < 0x110000 && (xml_get_cat(ctx) & XML_CHAR_XDIGIT)); + } + else + { + if (!(xml_last_cat(ctx) & XML_CHAR_DIGIT)) + { + xml_error(ctx, "Expected a numeric value of character reference"); + goto recover; + } + do + { + v = v * 10 + xml_last_char(ctx) - '0'; + } + while (v < 0x110000 && (xml_get_cat(ctx) & XML_CHAR_DIGIT)); + } + uint cat = xml_char_cat(v); + if (!(cat & ctx->cat_unrestricted)) + { + xml_error(ctx, "Character reference out of range"); + goto recover; + } + if (xml_last_char(ctx) == ';') + { + xml_dec(ctx); + return v; + } + xml_error(ctx, "Expected ';'"); +recover: + while (xml_last_char(ctx) != ';') + xml_get_char(ctx); + xml_dec(ctx); + return UNI_REPLACEMENT; +} + +/*** References to general entities ***/ + +static void +xml_parse_ref(struct xml_context *ctx) +{ + /* Reference ::= EntityRef | CharRef + * EntityRef ::= '&' Name ';' + * Already parsed: '&' */ + struct fastbuf *out = &ctx->chars; + if (xml_peek_char(ctx) == '#') + { + xml_skip_char(ctx); + bput_utf8_32(out, xml_parse_char_ref(ctx)); + } + else + { + TRACE(ctx, "parse_ge_ref"); + struct mempool_state state; + mp_save(ctx->stack, &state); + char *name = xml_parse_name(ctx, ctx->stack); + xml_parse_char(ctx, ';'); + struct xml_dtd_entity *ent = xml_dtd_find_entity(ctx, name); + if (!ent) + { + xml_error(ctx, "Unknown entity &%s;", name); + bputc(out, '&'); + bputs(out, name); + bputc(out, ';'); + } + else if (ent->flags & XML_DTD_ENTITY_TRIVIAL) + { + TRACE(ctx, "Trivial entity &%s;", name); + bputs(out, ent->text); + } + else + { + TRACE(ctx, "Pushed entity &%s;", name); + mp_restore(ctx->stack, &state); + xml_dec(ctx); + xml_push_entity(ctx, ent); + return; + } + mp_restore(ctx->stack, &state); + xml_dec(ctx); + } +} + +/*** Character data ***/ + +void +xml_spout_chars(struct fastbuf *fb) +{ + if (fb->bptr < fb->bufend) + return; + struct xml_context *ctx = SKIP_BACK(struct xml_context, chars, fb); + struct mempool *pool = ctx->pool; + if (fb->bufend != fb->buffer) + { + TRACE(ctx, "growing chars"); + uint len = fb->bufend - fb->buffer; + uint reported = fb->bstop - fb->buffer; + fb->buffer = mp_expand(pool); + fb->bufend = fb->buffer + mp_avail(pool); + fb->bptr = fb->buffer + len; + fb->bstop = fb->buffer + reported; + } + else + { + TRACE(ctx, "starting chars"); + mp_save(pool, &ctx->chars_state); + fb->bptr = fb->buffer = fb->bstop = mp_start_noalign(pool, 2); + fb->bufend = fb->buffer + mp_avail(pool) - 1; + } +} + +static uint +xml_end_chars(struct xml_context *ctx, char **out) +{ + struct fastbuf *fb = &ctx->chars; + uint len = fb->bptr - fb->buffer; + if (len) + { + TRACE(ctx, "ending chars"); + *fb->bptr = 0; + *out = mp_end(ctx->pool, fb->bptr + 1); + fb->bufend = fb->bstop = fb->bptr = fb->buffer; + } + return len; +} + +static uint +xml_report_chars(struct xml_context *ctx, char **out) +{ + struct fastbuf *fb = &ctx->chars; + uint len = fb->bptr - fb->buffer; + if (len) + { + *fb->bptr = 0; + *out = fb->bstop; + fb->bstop = fb->bptr; + } + return len; +} + +static uint +xml_flush_chars(struct xml_context *ctx) +{ + char *text, *rtext; + uint len = xml_end_chars(ctx, &text), rlen; + if (len) + { + if (ctx->flags & XML_NO_CHARS) + { + if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_ignorable) + ctx->h_ignorable(ctx, text, len); + mp_restore(ctx->pool, &ctx->chars_state); + return 0; + } + if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_block && (rlen = xml_report_chars(ctx, &rtext))) + ctx->h_block(ctx, rtext, rlen); + if (!(ctx->flags & XML_ALLOC_CHARS) && !(ctx->flags & XML_REPORT_CHARS)) + { + mp_restore(ctx->pool, &ctx->chars_state); + return 0; + } + struct xml_node *n = xml_push_dom(ctx, &ctx->chars_state); + n->type = XML_NODE_CHARS; + n->text = text; + n->len = len; + if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_chars) + ctx->h_chars(ctx); + } + return len; +} + +static void +xml_pop_chars(struct xml_context *ctx) +{ + xml_pop_dom(ctx, !(ctx->flags & XML_ALLOC_CHARS)); + TRACE(ctx, "pop_chars"); +} + +static void +xml_append_chars(struct xml_context *ctx) +{ + TRACE(ctx, "append_chars"); + struct fastbuf *out = &ctx->chars; + if (ctx->flags & XML_NO_CHARS) + while (xml_get_char(ctx) != '<') + if (xml_last_cat(ctx) & XML_CHAR_WHITE) + bput_utf8_32(out, xml_last_char(ctx)); + else + { + xml_error(ctx, "This element must not contain character data"); + while (xml_get_char(ctx) != '<'); + break; + } + else + while (xml_get_char(ctx) != '<') + if (xml_last_char(ctx) == '&') + { + xml_inc(ctx); + xml_parse_ref(ctx); + } + else + bput_utf8_32(out, xml_last_char(ctx)); + xml_unget_char(ctx); +} + +/*** CDATA sections ***/ + +static void +xml_skip_cdata(struct xml_context *ctx) +{ + TRACE(ctx, "skip_cdata"); + xml_parse_seq(ctx, "CDATA["); + while (xml_get_char(ctx) != ']' || xml_get_char(ctx) != ']' || xml_get_char(ctx) != '>'); + xml_dec(ctx); +} + +static void +xml_append_cdata(struct xml_context *ctx) +{ + /* CDSect :== '' Char*)) ']]>' + * Already parsed: 'flags & XML_NO_CHARS) + { + xml_error(ctx, "This element must not contain CDATA"); + xml_skip_cdata(ctx); + return; + } + xml_parse_seq(ctx, "CDATA["); + struct fastbuf *out = &ctx->chars; + uint rlen; + char *rtext; + if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_block && (rlen = xml_report_chars(ctx, &rtext))) + ctx->h_block(ctx, rtext, rlen); + while (1) + { + if (xml_get_char(ctx) == ']') + { + if (xml_get_char(ctx) == ']') + if (xml_get_char(ctx) == '>') + break; + else + bputc(out, ']'); + bputc(out, ']'); + } + bput_utf8_32(out, xml_last_char(ctx)); + } + if ((ctx->flags & XML_REPORT_CHARS) && ctx->h_cdata && (rlen = xml_report_chars(ctx, &rtext))) + ctx->h_cdata(ctx, rtext, rlen); + xml_dec(ctx); +} + +/*** Attribute values ***/ + +char * +xml_parse_attr_value(struct xml_context *ctx, struct xml_dtd_attr *attr UNUSED) +{ + TRACE(ctx, "parse_attr_value"); + /* AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */ + /* FIXME: -- check value constrains / normalize leading/trailing WS and repeated WS */ + struct mempool_state state; + uint quote = xml_parse_quote(ctx); + mp_save(ctx->stack, &state); + struct fastbuf *out = &ctx->chars; + struct xml_source *src = ctx->src; + while (1) + { + uint c = xml_get_char(ctx); + if (c == '&') + { + xml_inc(ctx); + xml_parse_ref(ctx); + } + else if (c == quote && src == ctx->src) + break; + else if (c == '<') + xml_error(ctx, "Attribute value must not contain '<'"); + else if (xml_last_cat(ctx) & XML_CHAR_WHITE) + bputc(out, ' '); + else + bput_utf8_32(out, c); + } + mp_restore(ctx->stack, &state); + char *text; + return xml_end_chars(ctx, &text) ? text : ""; +} + +uint +xml_normalize_white(struct xml_context *ctx UNUSED, char *text) +{ + char *s = text, *d = text; + while (*s == 0x20) + s++; + while (1) + { + while (*s & ~0x20) + *d++ = *s++; + if (!*s) + break; + while (*++s == 0x20); + *d++ = 0x20; + } + if (d != text && d[-1] == 0x20) + d--; + *d = 0; + return d - text; +} + +/*** Attributes ***/ + +static void +xml_raw_add_attr(struct xml_context *ctx, struct xml_node *e, char *name, char *value) +{ + struct xml_attr *a = mp_alloc(ctx->pool, sizeof(*a)); + a->elem = e; + a->ns = 0; /* Namespaces will be resolved later */ + a->name = name; + a->val = value; + a->dtd = NULL; + a->user = NULL; + /* a->hash will be calculated later */ + slist_add_tail(&e->attrs, &a->n); +} + +static inline uint +xml_attr_hash(uint ns, char *name) +{ + return hash_string(name) ^ hash_u32(ns); +} + +static void +xml_parse_attr(struct xml_context *ctx) +{ + TRACE(ctx, "parse_attr"); + /* Attribute ::= Name Eq AttValue */ + struct xml_node *e = ctx->node; + char *n = xml_parse_name(ctx, ctx->pool); + xml_parse_eq(ctx); + char *v = xml_parse_attr_value(ctx, NULL); + xml_raw_add_attr(ctx, e, n, v); +} + +static void +xml_process_attr(struct xml_context *ctx, struct xml_attr *a) +{ + struct xml_node *e = a->elem; + a->hash = xml_attr_hash(a->ns, a->name); + + XML_ATTR_FOR_EACH(a2, e) + { + if (a2 == a) + break; + if (a2->hash == a->hash && a2->ns == a->ns && !strcmp(a2->name, a->name)) + xml_error(ctx, "Attribute %s is not unique in element <%s>", xml_attr_qname(ctx, a), xml_node_qname(ctx, e)); + } +} + +struct xml_attr * +xml_attr_find(struct xml_context *ctx, struct xml_node *node, char *name) +{ + return xml_attr_find_ns(ctx, node, 0, name); +} + +struct xml_attr * +xml_attr_find_ns(struct xml_context *ctx UNUSED, struct xml_node *node, uint ns, char *name) +{ + ASSERT(node->type == XML_NODE_ELEM); + uint hash = xml_attr_hash(ns, name); + XML_ATTR_FOR_EACH(a, node) + if (a->hash == hash && a->ns == ns && !strcmp(a->name, name)) + return a; + return NULL; +} + +char * +xml_attr_value_ns(struct xml_context *ctx, struct xml_node *node, uint ns, char *name) +{ + struct xml_attr *attr = xml_attr_find_ns(ctx, node, ns, name); + if (attr) + return attr->val; + if (!node->dtd) + return NULL; + if (ns) /* So far, our DTD support is not namespace-aware */ + return NULL; + struct xml_dtd_attr *dtd = xml_dtd_find_attr(ctx, node->dtd, name); + return dtd ? dtd->default_value : NULL; +} + +char * +xml_attr_value(struct xml_context *ctx, struct xml_node *node, char *name) +{ + return xml_attr_value_ns(ctx, node, 0, name); +} + +char * +xml_attr_qname(struct xml_context *ctx UNUSED, struct xml_attr *attr) +{ + char *n = attr->name; + while (n[-1] != '<') + n--; + return n; +} + +/*** Elements ***/ + +static uint +xml_validate_element(struct xml_dtd_elem_node *root, struct xml_dtd_elem *elem) +{ + if (root->elem) + return elem == root->elem; + else + SLIST_FOR_EACH(struct xml_dtd_elem_node *, son, root->sons) + if (xml_validate_element(son, elem)) + return 1; + return 0; +} + +static void +xml_push_element(struct xml_context *ctx) +{ + TRACE(ctx, "push_element"); + /* EmptyElemTag | STag + * EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' + * STag ::= '<' Name (S Attribute)* S? '>' + * Already parsed: '<' */ + struct xml_node *e = xml_push_dom(ctx, NULL); + clist_init(&e->sons); + e->type = XML_NODE_ELEM; + e->name = xml_parse_name(ctx, ctx->pool); + slist_init(&e->attrs); + + if (!e->parent) + { + ctx->dom = e; + if (ctx->doctype && strcmp(e->name, ctx->doctype)) + xml_error(ctx, "The root element <%s> does not match the document type <%s>", e->name, ctx->doctype); + } + + if (!ctx->dtd) + e->dtd = NULL; + else if (!(e->dtd = xml_dtd_find_elem(ctx, e->name))) + xml_error(ctx, "Undefined element <%s>", e->name); + else + { + struct xml_dtd_elem *dtd = e->dtd, *parent_dtd = e->parent ? e->parent->dtd : NULL; + if (dtd->type == XML_DTD_ELEM_MIXED) + ctx->flags &= ~XML_NO_CHARS; + else + ctx->flags |= XML_NO_CHARS; + if (parent_dtd) + if (parent_dtd->type == XML_DTD_ELEM_EMPTY) + xml_error(ctx, "Empty element must not contain children"); + else if (parent_dtd->type != XML_DTD_ELEM_ANY) + { + // FIXME: validate regular expressions + if (!xml_validate_element(parent_dtd->node, dtd)) + xml_error(ctx, "Unexpected element <%s>", e->name); + } + } + + /* Parse attributes */ + while (1) + { + uint white = xml_parse_white(ctx, 0); + uint c = xml_get_char(ctx); + if (c == '/') + { + xml_parse_char(ctx, '>'); + ctx->flags |= XML_EMPTY_ELEM_TAG; + break; + } + else if (c == '>') + break; + else if (!white) + xml_fatal_expected_white(ctx); + xml_unget_char(ctx); + xml_parse_attr(ctx); + } + + /* Resolve namespaces */ + xml_ns_push_element(ctx); + + /* Once we have namespaces, hash attribute names */ + XML_ATTR_FOR_EACH(a, e) + xml_process_attr(ctx, a); + + /* FIXME: DTD logic is not namespace-aware */ + if (e->dtd) + { + XML_ATTR_FOR_EACH(a, e) + { + if (!(a->dtd = xml_dtd_find_attr(ctx, e->dtd, a->name))) + xml_error(ctx, "Undefined attribute %s in element <%s>", a->name, e->name); + else + xml_validate_attr(ctx, a->dtd, a->val); + } + SLIST_FOR_EACH(struct xml_dtd_attr *, a, e->dtd->attrs) + { + if (a->default_mode == XML_ATTR_REQUIRED) + { + if (!xml_attr_find(ctx, e, a->name)) + xml_error(ctx, "Missing required attribute %s in element <%s>", a->name, e->name); + } + else if (a->default_mode != XML_ATTR_IMPLIED && ctx->flags & XML_ALLOC_DEFAULT_ATTRS) + { + if (!xml_attr_find(ctx, e, a->name)) + xml_raw_add_attr(ctx, e, a->name, a->default_value); + } + } + } + + if ((ctx->flags & XML_REPORT_TAGS) && ctx->h_stag) + ctx->h_stag(ctx); +} + +static void +xml_pop_element(struct xml_context *ctx) +{ + TRACE(ctx, "pop_element"); + if ((ctx->flags & XML_REPORT_TAGS) && ctx->h_etag) + ctx->h_etag(ctx); + + xml_ns_pop_element(ctx); + + struct xml_node *e = ctx->node; + uint free = !(ctx->flags & XML_ALLOC_TAGS); + if (free) + { + if (!e->parent) + ctx->dom = NULL; +#if 0 + /* + * With the current data structures, freeing of attributes is not necessary, + * but it might be if we switch to a global hash table of large elements. + */ + SLIST_FOR_EACH(struct xml_attr *, a, e->attrs) + xml_attrs_remove(ctx->tab_attrs, a); + struct xml_node *n; + while (n = clist_head(&e->sons)) + { + if (n->type == XML_NODE_ELEM) + { + SLIST_FOR_EACH(struct xml_attr *, a, n->attrs) + xml_attrs_remove(ctx->tab_attrs, a); + clist_insert_list_after(&n->sons, &n->n); + } + clist_remove(&n->n); + } +#endif + } + + xml_pop_dom(ctx, free); + xml_dec(ctx); +} + +static void +xml_parse_etag(struct xml_context *ctx) +{ + /* ETag ::= '' + * Already parsed: '<' */ + struct xml_node *e = ctx->node; + ASSERT(e); + char *n = xml_node_qname(ctx, e); + while (*n) + { + uint c; + n = utf8_32_get(n, &c); + if (xml_get_char(ctx) != c) + goto recover; + } + xml_parse_white(ctx, 0); + if (xml_get_char(ctx) != '>') + { +recover: + xml_error(ctx, "Invalid ETag, expected ", xml_node_qname(ctx, e)); + while (xml_get_char(ctx) != '>'); + } + xml_dec(ctx); +} + +char * +xml_node_qname(struct xml_context *ctx UNUSED, struct xml_node *node) +{ + ASSERT(node->type == XML_NODE_ELEM); + char *n = node->name; + while (n[-1] != '<') + n--; + return n; +} + +/*** Document type declaration ***/ + +static void +xml_parse_doctype_decl(struct xml_context *ctx) +{ + TRACE(ctx, "parse_doctype_decl"); + /* doctypedecl ::= '' + * Already parsed: '' */ + if (ctx->doctype) + xml_fatal(ctx, "Multiple document types not allowed"); + xml_parse_seq(ctx, "DOCTYPE"); + xml_parse_white(ctx, 1); + ctx->doctype = xml_parse_name(ctx, ctx->pool); + TRACE(ctx, "doctype=%s", ctx->doctype); + uint c; + if (xml_parse_white(ctx, 0) && ((c = xml_peek_char(ctx)) == 'S' || c == 'P')) + { + if (c == 'S') + { + xml_parse_seq(ctx, "SYSTEM"); + xml_parse_white(ctx, 1); + ctx->system_id = xml_parse_system_literal(ctx, ctx->pool); + } + else + { + xml_parse_seq(ctx, "PUBLIC"); + xml_parse_white(ctx, 1); + ctx->public_id = xml_parse_pubid_literal(ctx, ctx->pool); + xml_parse_white(ctx, 1); + ctx->system_id = xml_parse_system_literal(ctx, ctx->pool); + } + xml_parse_white(ctx, 0); + ctx->flags |= XML_HAS_EXTERNAL_SUBSET; + } + if (xml_peek_char(ctx) == '[') + { + ctx->flags |= XML_HAS_INTERNAL_SUBSET; + xml_skip_char(ctx); + xml_inc(ctx); + } + if (ctx->h_doctype_decl) + ctx->h_doctype_decl(ctx); +} + + + +/////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/* DTD: Internal subset */ + +static void +xml_parse_subset(struct xml_context *ctx, uint external) +{ + // FIXME: + // -- comments/pi have no parent + // -- conditional sections in external subset + // -- check corectness of parameter entities + + /* '[' intSubset ']' + * intSubset :== (markupdecl | DeclSep) + * Already parsed: '[' + * + * extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)* + */ + while (1) + { + xml_parse_white(ctx, 0); + uint c = xml_get_char(ctx); + xml_inc(ctx); + if (c == '<') + if ((c = xml_get_char(ctx)) == '!') + switch (c = xml_get_char(ctx)) + { + case '-': + xml_push_comment(ctx); + xml_pop_comment(ctx); + break; + case 'N': + xml_parse_seq(ctx, "OTATION"); + xml_parse_notation_decl(ctx); + break; + case 'E': + if ((c = xml_get_char(ctx)) == 'N') + { + xml_parse_seq(ctx, "TITY"); + xml_parse_entity_decl(ctx); + } + else if (c == 'L') + { + xml_parse_seq(ctx, "EMENT"); + xml_parse_element_decl(ctx); + } + else + goto invalid_markup; + break; + case 'A': + xml_parse_seq(ctx, "TTLIST"); + xml_parse_attr_list_decl(ctx); + break; + default: + goto invalid_markup; + } + else if (c == '?') + { + xml_push_pi(ctx); + xml_pop_pi(ctx); + } + else + goto invalid_markup; + else if (c == '%') + xml_parse_pe_ref(ctx); + else if (c == ']' && !external) + { + break; + } + else if (c == '>' && external) + { + break; + } + else + goto invalid_markup; + } + xml_dec(ctx); + return; +invalid_markup: ; + xml_fatal(ctx, "Invalid markup in the %s subset", external ? "external" : "internal"); +} + +/*** The State Machine ***/ + +uint +xml_next(struct xml_context *ctx) +{ + /* A nasty state machine */ + +#define PULL(x) do { if (ctx->pull & XML_PULL_##x) return ctx->state = XML_STATE_##x; /* fall-thru */ case XML_STATE_##x: ; } while (0) +#define PULL_STATE(x, s) do { if (ctx->pull & XML_PULL_##x) return ctx->state = XML_STATE_##s, XML_STATE_##x; /* fall-thru */ case XML_STATE_##s: ; } while (0) + + TRACE(ctx, "xml_next (state=%u)", ctx->state); + jmp_buf throw_buf; + ctx->throw_buf = &throw_buf; + if (setjmp(throw_buf)) + { +error: + if (ctx->err_code == XML_ERR_EOF && ctx->h_fatal) + ctx->h_fatal(ctx); + TRACE(ctx, "raised fatal error"); + return ctx->state = XML_STATE_EOF; + } + uint c; + switch (ctx->state) + { + case XML_STATE_START: + TRACE(ctx, "entering prolog"); + ctx->flags |= XML_SRC_DOCUMENT | XML_SRC_EXPECTED_DECL; + if (ctx->h_document_start) + ctx->h_document_start(ctx); + /* XMLDecl */ + xml_refill(ctx); + if (ctx->h_xml_decl) + ctx->h_xml_decl(ctx); + PULL(XML_DECL); + + /* Misc* (doctypedecl Misc*)? */ + while (1) + { + xml_parse_white(ctx, 0); + xml_parse_char(ctx, '<'); + xml_inc(ctx); + if ((c = xml_get_char(ctx)) == '?') + /* Processing intruction */ + if (!(ctx->flags & XML_REPORT_PIS)) + xml_skip_pi(ctx); + else + { + xml_push_pi(ctx); + PULL_STATE(PI, PROLOG_PI); + xml_pop_pi(ctx); + } + else if (c != '!') + { + /* Found the root tag */ + xml_unget_char(ctx); + goto first_tag; + } + else if (xml_get_char(ctx) == '-') + if (!(ctx->flags & XML_REPORT_COMMENTS)) + xml_skip_comment(ctx); + else + { + xml_push_comment(ctx); + PULL_STATE(COMMENT, PROLOG_COMMENT); + xml_pop_comment(ctx); + } + else + { + /* DocTypeDecl */ + xml_unget_char(ctx); + xml_parse_doctype_decl(ctx); + PULL(DOCTYPE_DECL); + if (ctx->flags & XML_HAS_DTD) + if (ctx->flags & XML_PARSE_DTD) + { + xml_dtd_init(ctx); + if (ctx->h_dtd_start) + ctx->h_dtd_start(ctx); + if (ctx->flags & XML_HAS_INTERNAL_SUBSET) + { + xml_parse_subset(ctx, 0); + xml_dec(ctx); + } + if (ctx->flags & XML_HAS_EXTERNAL_SUBSET) + { + struct xml_dtd_entity ent = { + .system_id = ctx->system_id, + .public_id = ctx->public_id, + }; + xml_parse_white(ctx, 0); + xml_parse_char(ctx, '>'); + xml_unget_char(ctx); + ASSERT(ctx->h_resolve_entity); + ctx->h_resolve_entity(ctx, &ent); + ctx->flags |= XML_SRC_EXPECTED_DECL; + xml_parse_subset(ctx, 1); + xml_unget_char(ctx);; + } + if (ctx->h_dtd_end) + ctx->h_dtd_end(ctx); + } + else if (ctx->flags & XML_HAS_INTERNAL_SUBSET) + xml_skip_internal_subset(ctx); + xml_parse_white(ctx, 0); + xml_parse_char(ctx, '>'); + xml_dec(ctx); + } + } + + case XML_STATE_CHARS: + + while (1) + { + if (xml_peek_char(ctx) != '<') + { + /* CharData */ + xml_append_chars(ctx); + continue; + } + else + xml_skip_char(ctx); + xml_inc(ctx); +first_tag: + + if ((c = xml_get_char(ctx)) == '?') + { + /* PI */ + if (!(ctx->flags & (XML_REPORT_PIS | XML_ALLOC_PIS))) + xml_skip_pi(ctx); + else + { + if (xml_flush_chars(ctx)) + { + PULL_STATE(CHARS, CHARS_BEFORE_PI); + xml_pop_chars(ctx); + } + xml_push_pi(ctx); + PULL(PI); + xml_pop_pi(ctx); + } + } + + else if (c == '!') + if ((c = xml_get_char(ctx)) == '-') + { + /* Comment */ + if (!(ctx->flags & (XML_REPORT_COMMENTS | XML_ALLOC_COMMENTS))) + xml_skip_comment(ctx); + else + { + if (xml_flush_chars(ctx)) + { + PULL_STATE(CHARS, CHARS_BEFORE_COMMENT); + xml_pop_chars(ctx); + } + xml_push_comment(ctx); + PULL(COMMENT); + xml_pop_comment(ctx); + } + } + else if (c == '[') + { + /* CDATA */ + xml_append_cdata(ctx); + } + else + xml_fatal(ctx, "Unexpected character after 'flags & XML_EMPTY_ELEM_TAG) + goto pop_element; + } + + else + { + /* ETag */ + if (xml_flush_chars(ctx)) + { + PULL_STATE(CHARS, CHARS_BEFORE_ETAG); + xml_pop_chars(ctx); + } + + xml_parse_etag(ctx); +pop_element: + PULL(ETAG); + xml_pop_element(ctx); + if (!ctx->node) + goto epilog; + } + } + +epilog: + /* Misc* */ + TRACE(ctx, "entering epilog"); + while (1) + { + /* Epilog whitespace is the only place, where a valid document can reach EOF */ + if (setjmp(throw_buf)) + if (ctx->err_code == XML_ERR_EOF) + { + TRACE(ctx, "reached EOF"); + ctx->state = XML_STATE_EOF; + if (ctx->h_document_end) + ctx->h_document_end(ctx); + case XML_STATE_EOF: + ctx->err_code = 0; + ctx->err_msg = NULL; + return XML_STATE_EOF; + } + else + goto error; + xml_parse_white(ctx, 0); + if (setjmp(throw_buf)) + goto error; + + /* Misc */ + xml_parse_char(ctx, '<'); + xml_inc(ctx); + if ((c = xml_get_char(ctx)) == '?') + /* Processing instruction */ + if (!(ctx->flags & XML_REPORT_PIS)) + xml_skip_pi(ctx); + else + { + xml_push_pi(ctx); + PULL_STATE(PI, EPILOG_PI); + xml_pop_pi(ctx); + } + else if (c == '!') + { + xml_parse_char(ctx, '-'); + /* Comment */ + if (!(ctx->flags & XML_REPORT_COMMENTS)) + xml_skip_comment(ctx); + else + { + xml_push_comment(ctx); + PULL_STATE(COMMENT, EPILOG_COMMENT); + xml_pop_comment(ctx); + } + } + else + xml_fatal(ctx, "Syntax error in the epilog"); + } + + } + ASSERT(0); +} + +uint +xml_next_state(struct xml_context *ctx, uint pull) +{ + uint saved = ctx->pull; + ctx->pull = pull; + uint res = xml_next(ctx); + ctx->pull = saved; + return res; +} + +uint +xml_skip_element(struct xml_context *ctx) +{ + ASSERT(ctx->state == XML_STATE_STAG); + struct xml_node *node = ctx->node; + uint saved = ctx->pull, res; + ctx->pull = XML_PULL_ETAG; + while ((res = xml_next(ctx)) && ctx->node != node); + ctx->pull = saved; + return res; +} + +uint +xml_parse(struct xml_context *ctx) +{ + /* This cycle should run only once unless the user overrides the value of ctx->pull in a SAX handler */ + do + { + ctx->pull = 0; + } + while (xml_next(ctx)); + return ctx->err_code; +} + +char * +xml_merge_chars(struct xml_context *ctx UNUSED, struct xml_node *node, struct mempool *pool) +{ + ASSERT(node->type == XML_NODE_ELEM); + char *p = mp_start_noalign(pool, 1); + XML_NODE_FOR_EACH(son, node) + if (son->type == XML_NODE_CHARS) + { + p = mp_spread(pool, p, son->len + 1); + memcpy(p, son->text, son->len); + p += son->len; + } + *p++ = 0; + return mp_end(pool, p); +} + +static char * +xml_append_dom_chars(char *p, struct mempool *pool, struct xml_node *node) +{ + XML_NODE_FOR_EACH(son, node) + if (son->type == XML_NODE_CHARS) + { + p = mp_spread(pool, p, son->len + 1); + memcpy(p, son->text, son->len); + p += son->len; + } + else if (son->type == XML_NODE_ELEM) + p = xml_append_dom_chars(p, pool, son); + return p; +} + +char * +xml_merge_dom_chars(struct xml_context *ctx UNUSED, struct xml_node *node, struct mempool *pool) +{ + ASSERT(node->type == XML_NODE_ELEM); + char *p = mp_start_noalign(pool, 1); + p = xml_append_dom_chars(p, pool, node); + *p++ = 0; + return mp_end(pool, p); +} diff --git a/libucw/ucw-xml/source.c b/libucw/ucw-xml/source.c new file mode 100644 index 0000000..5396c50 --- /dev/null +++ b/libucw/ucw-xml/source.c @@ -0,0 +1,486 @@ +/* + * UCW Library -- A simple XML parser + * + * (c) 2007--2008 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include + +/*** Character categorization ***/ + +#include "obj/ucw-xml/unicat.c" + +static void +xml_init_cats(struct xml_context *ctx) +{ + if (!(ctx->flags & XML_VERSION_1_1)) + { + ctx->cat_chars = XML_CHAR_VALID_1_0; + ctx->cat_unrestricted = XML_CHAR_VALID_1_0; + ctx->cat_new_line = XML_CHAR_NEW_LINE_1_0; + ctx->cat_name = XML_CHAR_NAME_1_0; + ctx->cat_sname = XML_CHAR_SNAME_1_0; + } + else + { + ctx->cat_chars = XML_CHAR_VALID_1_1; + ctx->cat_unrestricted = XML_CHAR_UNRESTRICTED_1_1; + ctx->cat_new_line = XML_CHAR_NEW_LINE_1_1; + ctx->cat_name = XML_CHAR_NAME_1_1; + ctx->cat_sname = XML_CHAR_SNAME_1_1; + } +} + +/*** Reading of document/external entities ***/ + +static void NONRET +xml_eof(struct xml_context *ctx) +{ + ctx->err_msg = "Unexpected EOF"; + ctx->err_code = XML_ERR_EOF; + xml_throw(ctx); +} + +void NONRET +xml_fatal_nested(struct xml_context *ctx) +{ + xml_fatal(ctx, "Entity is not nested correctly"); +} + +static inline void +xml_add_char(u32 **bstop, uint c) +{ + *(*bstop)++ = c; + *(*bstop)++ = xml_char_cat(c); +} + +struct xml_source * +xml_push_source(struct xml_context *ctx) +{ + xml_push(ctx); + struct xml_source *src = ctx->src; + if (src) + { + src->bptr = ctx->bptr; + src->bstop = ctx->bstop; + } + src = mp_alloc_zero(ctx->stack, sizeof(*src)); + src->next = ctx->src; + src->saved_depth = ctx->depth; + ctx->src = src; + ctx->flags &= ~(XML_SRC_EOF | XML_SRC_EXPECTED_DECL | XML_SRC_DOCUMENT); + ctx->bstop = ctx->bptr = src->buf; + ctx->depth = 0; + return src; +} + +struct xml_source * +xml_push_fastbuf(struct xml_context *ctx, struct fastbuf *fb) +{ + struct xml_source *src = xml_push_source(ctx); + src->fb = fb; + return src; +} + +static void +xml_close_source(struct xml_source *src) +{ + bclose(src->fb); + if (src->wrapped_fb) + bclose(src->wrapped_fb); +} + +static void +xml_pop_source(struct xml_context *ctx) +{ + TRACE(ctx, "pop_source"); + if (unlikely(ctx->depth != 0)) + xml_fatal(ctx, "Unexpected end of entity"); + struct xml_source *src = ctx->src; + if (!src) + xml_fatal(ctx, "Undefined source"); + xml_close_source(src); + ctx->depth = src->saved_depth; + ctx->src = src = src->next; + if (src) + { + ctx->bptr = src->bptr; + ctx->bstop = src->bstop; + } + xml_pop(ctx); + if (unlikely(!src)) + xml_eof(ctx); +} + +void +xml_sources_cleanup(struct xml_context *ctx) +{ + struct xml_source *s; + while (s = ctx->src) + { + ctx->src = s->next; + xml_close_source(s); + } +} + +static void xml_refill_utf8(struct xml_context *ctx); + +void +xml_def_resolve_entity(struct xml_context *ctx, struct xml_dtd_entity *ent UNUSED) +{ + xml_error(ctx, "References to external entities are not supported"); +} + +void +xml_push_entity(struct xml_context *ctx, struct xml_dtd_entity *ent) +{ + TRACE(ctx, "xml_push_entity"); + struct xml_source *src; + if (ent->flags & XML_DTD_ENTITY_EXTERNAL) + { + ASSERT(ctx->h_resolve_entity); + ctx->h_resolve_entity(ctx, ent); + ctx->flags |= XML_SRC_EXPECTED_DECL; + src = ctx->src; + } + else + { + src = xml_push_source(ctx); + fbbuf_init_read(src->fb = &src->wrap_fb, ent->text, strlen(ent->text), 0); + } + src->refill = xml_refill_utf8; + src->refill_cat1 = ctx->cat_unrestricted & ~ctx->cat_new_line; + src->refill_cat2 = ctx->cat_new_line; +} + +static uint +xml_error_restricted(struct xml_context *ctx, uint c) +{ + if (c == ~1U) + xml_error(ctx, "Corrupted encoding"); + else + xml_error(ctx, "Restricted char U+%04X", c); + return UNI_REPLACEMENT; +} + +static void xml_parse_decl(struct xml_context *ctx); + +#define REFILL(ctx, func, params...) \ + struct xml_source *src = ctx->src; \ + struct fastbuf *fb = src->fb; \ + if (ctx->bptr == ctx->bstop) \ + ctx->bptr = ctx->bstop = src->buf; \ + uint c, t1 = src->refill_cat1, t2 = src->refill_cat2, row = src->row; \ + u32 *bend = src->buf + ARRAY_SIZE(src->buf), *bstop = ctx->bstop, \ + *last_0xd = src->pending_0xd ? bstop : NULL; \ + do \ + { \ + c = func(fb, ##params); \ + uint t = xml_char_cat(c); \ + if (t & t1) \ + /* Typical branch */ \ + *bstop++ = c, *bstop++ = t; \ + else if (t & t2) \ + { \ + /* New line */ \ + /* XML 1.0: 0xA | 0xD | 0xD 0xA */ \ + /* XML 1.1: 0xA | 0xD | 0xD 0xA | 0x85 | 0xD 0x85 | 0x2028 */ \ + if (c == 0xd) \ + last_0xd = bstop + 2; \ + else if (c != 0x2028 && last_0xd == bstop) \ + { \ + last_0xd = NULL; \ + continue; \ + } \ + xml_add_char(&bstop, 0xa), row++; \ + } \ + else if (c == '>') \ + { \ + /* Used only in XML/TextDecl to switch the encoding */ \ + *bstop++ = c, *bstop++ = t; \ + break; \ + } \ + else if (~c) \ + /* Restricted character */ \ + xml_add_char(&bstop, xml_error_restricted(ctx, c)); \ + else \ + { \ + /* EOF */ \ + ctx->flags |= XML_SRC_EOF; \ + break; \ + } \ + } \ + while (bstop < bend); \ + src->pending_0xd = (last_0xd == bstop); \ + ctx->bstop = bstop; \ + src->row = row; + +static void +xml_refill_utf8(struct xml_context *ctx) +{ + REFILL(ctx, bget_utf8_repl, ~1U); +} + +static void +xml_refill_utf16_le(struct xml_context *ctx) +{ + REFILL(ctx, bget_utf16_le_repl, ~1U); +} + +static void +xml_refill_utf16_be(struct xml_context *ctx) +{ + REFILL(ctx, bget_utf16_be_repl, ~1U); +} + +#undef REFILL + +void +xml_refill(struct xml_context *ctx) +{ + do + { + if (ctx->flags & XML_SRC_EOF) + xml_pop_source(ctx); + else if (ctx->flags & XML_SRC_EXPECTED_DECL) + xml_parse_decl(ctx); + else + { + ctx->src->refill(ctx); + TRACE(ctx, "refilled %u characters", (uint)((ctx->bstop - ctx->bptr) / 2)); + } + } + while (ctx->bptr == ctx->bstop); +} + +static uint +xml_source_row(struct xml_context *ctx, struct xml_source *src) +{ + uint row = src->row; + for (u32 *p = ctx->bstop; p != ctx->bptr; p -= 2) + if (p[-1] & src->refill_cat2) + row--; + return row + 1; +} + +uint +xml_row(struct xml_context *ctx) +{ + return ctx->src ? xml_source_row(ctx, ctx->src) : 0; +} + +/* Document/external entity header */ + +static char * +xml_parse_encoding_name(struct xml_context *ctx) +{ + /* EncName ::= '"' [A-Za-z] ([A-Za-z0-9._] | '-')* '"' | "'" [A-Za-z] ([A-Za-z0-9._] | '-')* "'" */ + char *p = mp_start_noalign(ctx->pool, 1); + uint q = xml_parse_quote(ctx); + if (unlikely(!(xml_get_cat(ctx) & XML_CHAR_ENC_SNAME))) + xml_fatal(ctx, "Invalid character in the encoding name"); + while (1) + { + p = mp_spread(ctx->pool, p, 2); + *p++ = xml_last_char(ctx); + if (xml_get_char(ctx) == q) + break; + if (unlikely(!(xml_last_cat(ctx) & XML_CHAR_ENC_NAME))) + xml_fatal(ctx, "Invalid character in the encoding name"); + } + *p++ = 0; + return mp_end(ctx->pool, p); +} + +static void +xml_init_charconv(struct xml_context *ctx, int cs) +{ + // XXX: with a direct access to libucw-charset tables could be faster + struct xml_source *src = ctx->src; + TRACE(ctx, "wrapping charset %s", charset_name(cs)); + src->wrapped_fb = src->fb; + src->fb = fb_wrap_charconv_in(src->fb, cs, CONV_CHARSET_UTF8); +} + +static void +xml_parse_decl(struct xml_context *ctx) +{ + TRACE(ctx, "xml_parse_decl"); + struct xml_source *src = ctx->src; + ctx->flags &= ~XML_SRC_EXPECTED_DECL; + uint doc = ctx->flags & XML_SRC_DOCUMENT; + + /* Setup valid Unicode ranges and force the reader to abort refill() after each '>', where we can switch encoding or XML version */ + if (doc) + xml_init_cats(ctx); + src->refill_cat1 = ctx->cat_unrestricted & ~ctx->cat_new_line & ~XML_CHAR_GT; + src->refill_cat2 = ctx->cat_new_line; + + /* Initialize the supplied charset (if any) or try to guess it */ + char *expected_encoding = src->expected_encoding; + src->refill = xml_refill_utf8; + int bom = bpeekc(src->fb); + if (bom < 0) + ctx->flags |= XML_SRC_EOF; + if (!src->fb_encoding) + { + if (bom == 0xfe) + src->refill = xml_refill_utf16_be; + else if (bom == 0xff) + src->refill = xml_refill_utf16_le; + } + else + { + int cs = find_charset_by_name(src->fb_encoding); + if (cs == CONV_CHARSET_UTF8) + {} + else if (cs >= 0) + { + xml_init_charconv(ctx, cs); + bom = 0; + } + else if (strcasecmp(src->fb_encoding, "UTF-16")) + { + src->refill = xml_refill_utf16_be; + if (bom == 0xff) + src->refill = xml_refill_utf16_le; + } + else if (strcasecmp(src->fb_encoding, "UTF-16BE")) + src->refill = xml_refill_utf16_be; + else if (strcasecmp(src->fb_encoding, "UTF-16LE")) + src->refill = xml_refill_utf16_le; + else + { + xml_error(ctx, "Unknown encoding '%s'", src->fb_encoding); + expected_encoding = NULL; + } + } + uint utf16 = src->refill == xml_refill_utf16_le || src->refill == xml_refill_utf16_be; + if (utf16) + src->fb_encoding = (src->refill == xml_refill_utf16_be) ? "UTF-16BE" : "UTF-16LE"; + if (!expected_encoding) + expected_encoding = src->fb_encoding; + if (bom > 0 && xml_peek_char(ctx) == 0xfeff) + xml_skip_char(ctx); + else if (utf16) + xml_error(ctx, "Missing or corrupted BOM"); + TRACE(ctx, "Initial encoding=%s", src->fb_encoding ? : "?"); + + /* Look ahead for presence of XMLDecl or optional TextDecl */ + if (!(ctx->flags & XML_SRC_EOF) && ctx->bstop != src->buf + ARRAY_SIZE(src->buf)) + xml_refill(ctx); + u32 *bptr = ctx->bptr; + uint have_decl = (12 <= ctx->bstop - ctx->bptr && (bptr[11] & XML_CHAR_WHITE) && + bptr[0] == '<' && bptr[2] == '?' && (bptr[4] & 0xdf) == 'X' && (bptr[6] & 0xdf) == 'M' && (bptr[8] & 0xdf) == 'L'); + if (!have_decl) + { + if (doc) + xml_fatal(ctx, "Missing or corrupted XML header"); + else if (expected_encoding && strcasecmp(src->expected_encoding, "UTF-8") && !utf16) + xml_error(ctx, "Missing or corrupted entity header"); + goto exit; + } + ctx->bptr = bptr + 12; + xml_parse_white(ctx, 0); + + /* Parse version string (mandatory in XMLDecl, optional in TextDecl) */ + if (xml_peek_char(ctx) == 'v') + { + xml_parse_seq(ctx, "version"); + xml_parse_eq(ctx); + char *version = xml_parse_pubid_literal(ctx, ctx->pool); + TRACE(ctx, "version=%s", version); + uint v = 0; + if (!strcmp(version, "1.1")) + v = XML_VERSION_1_1; + else if (strcmp(version, "1.0")) + { + xml_error(ctx, "Unknown XML version string '%s'", version); + version = "1.0"; + } + if (doc) + { + ctx->version_str = version; + ctx->flags |= v; + } + else if (v > (ctx->flags & XML_VERSION_1_1)) + xml_error(ctx, "XML 1.1 external entity included from XML 1.0 document"); + if (!xml_parse_white(ctx, !doc)) + goto end; + } + else if (doc) + { + xml_error(ctx, "Expected XML version"); + ctx->version_str = "1.0"; + } + + /* Parse encoding string (optional in XMLDecl, mandatory in TextDecl) */ + if (xml_peek_char(ctx) == 'e') + { + xml_parse_seq(ctx, "encoding"); + xml_parse_eq(ctx); + src->decl_encoding = xml_parse_encoding_name(ctx); + TRACE(ctx, "encoding=%s", src->decl_encoding); + if (!xml_parse_white(ctx, 0)) + goto end; + } + else if (!doc) + xml_error(ctx, "Expected XML encoding"); + + /* Parse whether the document is standalone (optional in XMLDecl) */ + if (doc && xml_peek_char(ctx) == 's') + { + xml_parse_seq(ctx, "standalone"); + xml_parse_eq(ctx); + uint c = xml_parse_quote(ctx); + if (ctx->standalone = (xml_peek_char(ctx) == 'y')) + xml_parse_seq(ctx, "yes"); + else + xml_parse_seq(ctx, "no"); + xml_parse_char(ctx, c); + TRACE(ctx, "standalone=%d", ctx->standalone); + xml_parse_white(ctx, 0); + } +end: + xml_parse_seq(ctx, "?>"); + + /* Switch to the final encoding */ + if (src->decl_encoding) + { + int cs = find_charset_by_name(src->decl_encoding); + if (cs < 0 && !expected_encoding) + xml_error(ctx, "Unknown encoding '%s'", src->decl_encoding); + else if (!src->fb_encoding && cs >= 0 && cs != CONV_CHARSET_UTF8) + { + xml_init_charconv(ctx, cs); + src->fb_encoding = src->decl_encoding; + } + else if (expected_encoding && strcasecmp(src->decl_encoding, expected_encoding) && (!utf16 || + !(!strcasecmp(src->decl_encoding, "UTF-16") || + (!strcasecmp(src->decl_encoding, "UTF-16BE") && strcasecmp(expected_encoding, "UTF-16LE")) || + (!strcasecmp(src->decl_encoding, "UTF-16LE") && strcasecmp(expected_encoding, "UTF-16BE"))))) + xml_error(ctx, "The header contains encoding '%s' instead of expected '%s'", src->decl_encoding, expected_encoding); + } + if (!src->fb_encoding) + src->fb_encoding = "UTF-8"; + TRACE(ctx, "Final encoding=%s", src->fb_encoding); + +exit: + /* Update valid Unicode ranges */ + if (doc) + xml_init_cats(ctx); + src->refill_cat1 = ctx->cat_unrestricted & ~ctx->cat_new_line; + src->refill_cat2 = ctx->cat_new_line; +} diff --git a/libucw/ucw-xml/unicat.pl b/libucw/ucw-xml/unicat.pl new file mode 100755 index 0000000..c1bc442 --- /dev/null +++ b/libucw/ucw-xml/unicat.pl @@ -0,0 +1,165 @@ +#!/usr/bin/perl +# +# UCW Library -- Character map for the XML parser +# +# (c) 2007 Pavel Charvat +# +# This software may be freely distributed and used according to the terms +# of the GNU Lesser General Public License. +# + +my @cat = (); +my @lcat = (); +my %ids = (); +my %cls = (); +for (my $i = 0; $i < 0x10000; $i++) { $cat[$i] = 0; } +for (my $i = 0; $i < 0x11; $i++) { $lcat[$i] = 0; } + +my @white = (0x9, 0xA, 0xD, 0x20); +my @base_char_1_0 = ( + [0x0041,0x005A], [0x0061,0x007A], [0x00C0,0x00D6], [0x00D8,0x00F6], [0x00F8,0x00FF], [0x0100,0x0131], + [0x0134,0x013E], [0x0141,0x0148], [0x014A,0x017E], [0x0180,0x01C3], [0x01CD,0x01F0], [0x01F4,0x01F5], + [0x01FA,0x0217], [0x0250,0x02A8], [0x02BB,0x02C1], 0x0386, [0x0388,0x038A], 0x038C, [0x038E,0x03A1], + [0x03A3,0x03CE], [0x03D0,0x03D6], 0x03DA, 0x03DC, 0x03DE, 0x03E0, [0x03E2,0x03F3], [0x0401,0x040C], + [0x040E,0x044F], [0x0451,0x045C], [0x045E,0x0481], [0x0490,0x04C4], [0x04C7,0x04C8], [0x04CB,0x04CC], + [0x04D0,0x04EB], [0x04EE,0x04F5], [0x04F8,0x04F9], [0x0531,0x0556], 0x0559, [0x0561,0x0586], [0x05D0,0x05EA], + [0x05F0,0x05F2], [0x0621,0x063A], [0x0641,0x064A], [0x0671,0x06B7], [0x06BA,0x06BE], [0x06C0,0x06CE], + [0x06D0,0x06D3], 0x06D5, [0x06E5,0x06E6], [0x0905,0x0939], 0x093D, [0x0958,0x0961], [0x0985,0x098C], + [0x098F,0x0990], [0x0993,0x09A8], [0x09AA,0x09B0], 0x09B2, [0x09B6,0x09B9], [0x09DC,0x09DD], [0x09DF,0x09E1], + [0x09F0,0x09F1], [0x0A05,0x0A0A], [0x0A0F,0x0A10], [0x0A13,0x0A28], [0x0A2A,0x0A30], [0x0A32,0x0A33], + [0x0A35,0x0A36], [0x0A38,0x0A39], [0x0A59,0x0A5C], 0x0A5E, [0x0A72,0x0A74], [0x0A85,0x0A8B], 0x0A8D, + [0x0A8F,0x0A91], [0x0A93,0x0AA8], [0x0AAA,0x0AB0], [0x0AB2,0x0AB3], [0x0AB5,0x0AB9], 0x0ABD, 0x0AE0, + [0x0B05,0x0B0C], [0x0B0F,0x0B10], [0x0B13,0x0B28], [0x0B2A,0x0B30], [0x0B32,0x0B33], [0x0B36,0x0B39], + 0x0B3D, [0x0B5C,0x0B5D], [0x0B5F,0x0B61], [0x0B85,0x0B8A], [0x0B8E,0x0B90], [0x0B92,0x0B95], [0x0B99,0x0B9A], + 0x0B9C, [0x0B9E,0x0B9F], [0x0BA3,0x0BA4], [0x0BA8,0x0BAA], [0x0BAE,0x0BB5], [0x0BB7,0x0BB9], [0x0C05,0x0C0C], + [0x0C0E,0x0C10], [0x0C12,0x0C28], [0x0C2A,0x0C33], [0x0C35,0x0C39], [0x0C60,0x0C61], [0x0C85,0x0C8C], + [0x0C8E,0x0C90], [0x0C92,0x0CA8], [0x0CAA,0x0CB3], [0x0CB5,0x0CB9], 0x0CDE, [0x0CE0,0x0CE1], [0x0D05,0x0D0C], + [0x0D0E,0x0D10], [0x0D12,0x0D28], [0x0D2A,0x0D39], [0x0D60,0x0D61], [0x0E01,0x0E2E], 0x0E30, [0x0E32,0x0E33], + [0x0E40,0x0E45], [0x0E81,0x0E82], 0x0E84, [0x0E87,0x0E88], 0x0E8A, 0x0E8D, [0x0E94,0x0E97], [0x0E99,0x0E9F], + [0x0EA1,0x0EA3], 0x0EA5, 0x0EA7, [0x0EAA,0x0EAB], [0x0EAD,0x0EAE], 0x0EB0, [0x0EB2,0x0EB3], 0x0EBD, + [0x0EC0,0x0EC4], [0x0F40,0x0F47], [0x0F49,0x0F69], [0x10A0,0x10C5], [0x10D0,0x10F6], 0x1100, [0x1102,0x1103], + [0x1105,0x1107], 0x1109, [0x110B,0x110C], [0x110E,0x1112], 0x113C, 0x113E, 0x1140, 0x114C, 0x114E, 0x1150, + [0x1154,0x1155], 0x1159, [0x115F,0x1161], 0x1163, 0x1165, 0x1167, 0x1169, [0x116D,0x116E], [0x1172,0x1173], + 0x1175, 0x119E, 0x11A8, 0x11AB, [0x11AE,0x11AF], [0x11B7,0x11B8], 0x11BA, [0x11BC,0x11C2], 0x11EB, 0x11F0, + 0x11F9, [0x1E00,0x1E9B], [0x1EA0,0x1EF9], [0x1F00,0x1F15], [0x1F18,0x1F1D], [0x1F20,0x1F45], [0x1F48,0x1F4D], + [0x1F50,0x1F57], 0x1F59, 0x1F5B, 0x1F5D, [0x1F5F,0x1F7D], [0x1F80,0x1FB4], [0x1FB6,0x1FBC], 0x1FBE, + [0x1FC2,0x1FC4], [0x1FC6,0x1FCC], [0x1FD0,0x1FD3], [0x1FD6,0x1FDB], [0x1FE0,0x1FEC], [0x1FF2,0x1FF4], + [0x1FF6,0x1FFC], 0x2126, [0x212A,0x212B], 0x212E, [0x2180,0x2182], [0x3041,0x3094], [0x30A1,0x30FA], + [0x3105,0x312C], [0xAC00,0xD7A3]); +my @ideographic_1_0 = ([0x4E00,0x9FA5], 0x3007, [0x3021,0x3029]); +my @combining_char_1_0 = ( + [0x0300,0x0345], [0x0360,0x0361], [0x0483,0x0486], [0x0591,0x05A1], [0x05A3,0x05B9], [0x05BB,0x05BD], + 0x05BF, [0x05C1,0x05C2], 0x05C4, [0x064B,0x0652], 0x0670, [0x06D6,0x06DC], [0x06DD,0x06DF], [0x06E0,0x06E4], + [0x06E7,0x06E8], [0x06EA,0x06ED], [0x0901,0x0903], 0x093C, [0x093E,0x094C], 0x094D, [0x0951,0x0954], + [0x0962,0x0963], [0x0981,0x0983], 0x09BC, 0x09BE, 0x09BF, [0x09C0,0x09C4], [0x09C7,0x09C8], [0x09CB,0x09CD], + 0x09D7, [0x09E2,0x09E3], 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, [0x0A40,0x0A42], [0x0A47,0x0A48], [0x0A4B,0x0A4D], + [0x0A70,0x0A71], [0x0A81,0x0A83], 0x0ABC, [0x0ABE,0x0AC5], [0x0AC7,0x0AC9], [0x0ACB,0x0ACD], [0x0B01,0x0B03], + 0x0B3C, [0x0B3E,0x0B43], [0x0B47,0x0B48], [0x0B4B,0x0B4D], [0x0B56,0x0B57], [0x0B82,0x0B83], [0x0BBE,0x0BC2], + [0x0BC6,0x0BC8], [0x0BCA,0x0BCD], 0x0BD7, [0x0C01,0x0C03], [0x0C3E,0x0C44], [0x0C46,0x0C48], [0x0C4A,0x0C4D], + [0x0C55,0x0C56], [0x0C82,0x0C83], [0x0CBE,0x0CC4], [0x0CC6,0x0CC8], [0x0CCA,0x0CCD], [0x0CD5,0x0CD6], + [0x0D02,0x0D03], [0x0D3E,0x0D43], [0x0D46,0x0D48], [0x0D4A,0x0D4D], 0x0D57, 0x0E31, [0x0E34,0x0E3A], + [0x0E47,0x0E4E], 0x0EB1, [0x0EB4,0x0EB9], [0x0EBB,0x0EBC], [0x0EC8,0x0ECD], [0x0F18,0x0F19], 0x0F35, + 0x0F37, 0x0F39, 0x0F3E, 0x0F3F, [0x0F71,0x0F84], [0x0F86,0x0F8B], [0x0F90,0x0F95], 0x0F97, [0x0F99,0x0FAD], + [0x0FB1,0x0FB7], 0x0FB9, [0x20D0,0x20DC], 0x20E1, [0x302A,0x302F], 0x3099, 0x309A); +my @digit_1_0 = ( + [0x0030,0x0039], [0x0660,0x0669], [0x06F0,0x06F9], [0x0966,0x096F], [0x09E6,0x09EF], [0x0A66,0x0A6F], + [0x0AE6,0x0AEF], [0x0B66,0x0B6F], [0x0BE7,0x0BEF], [0x0C66,0x0C6F], [0x0CE6,0x0CEF], [0x0D66,0x0D6F], + [0x0E50,0x0E59], [0x0ED0,0x0ED9], [0x0F20,0x0F29]); +my @extender_1_0 = ( + 0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005, [0x3031,0x3035], [0x309D,0x309E], [0x30FC,0x30FE]); +my @sname_1_1 = ( + "[:A-Z_a-z]", [0xC0,0xD6], [0xD8,0xF6], [0xF8,0x2FF], [0x370,0x37D], [0x37F,0x1FFF], + [0x200C,0x200D], [0x2070,0x218F], [0x2C00,0x2FEF], [0x3001,0xD7FF], [0xF900,0xFDCF], [0xFDF0,0xFFFD], [0x10000,0xEFFFF]); + +set("WHITE", @white); +set("NEW_LINE_1_0", 0xA, 0xD); +set("NEW_LINE_1_1", 0xA, 0xD, 0x85, 0x2028); +set("DIGIT", "[0-9]"); +set("XDIGIT", "[0-9a-fA-F]"); +set("VALID_1_0", @white, [0x20,0xD7FF], [0xE000,0xFFFD], [0x10000,0x10FFFF]); +set("VALID_1_1", [0x1,0xD7FF], [0xE000,0xFFFD], [0x10000,0x10FFFF]); +set("UNRESTRICTED_1_1", @white, [0x20,0x7E], 0x85, [0xA0,0xD7FF], [0xE000,0xFFFD], [0x10000,0x10FFFF]); +set("PUBID", 0x20, 0xD, 0xA, "[-a-zA-Z0-9'()+,./:=?:!*#@\$_%]"); +set("ENC_SNAME", "[a-zA-Z]"); +set("ENC_NAME", "[-a-zA-Z0-9._]"); +set("SNAME_1_0", "[_:]", @base_char_1_0, @ideographic_1_0); +set("NAME_1_0", "[-_:.]", @base_char_1_0, @ideographic_1_0, @combining_char_1_0, @digit_1_0, @extender_1_0); +set("SNAME_1_1", @sname_1_1); +set("NAME_1_1", @sname_1_1, "[-.0-9]", 0xB7, [0x0300,0x036F], [0x203F,0x2040]); +set("GT", "[>]"); + +($ARGV[0] eq "" || $ARGV[1] eq "") && die("Invalid usage"); +find_cls(); +open(H, ">", $ARGV[0]) or die("Cannot create $ARGV[0]"); +open(C, ">", $ARGV[1]) or die("Cannot create $ARGV[1]"); +gen_enum(); +gen_tabs(); +close(H); +close(C); + +sub set { + my $id = shift; + $ids{$id} = scalar keys(%ids) if !defined($ids{$id}); + my $mask = 1 << $ids{$id}; + foreach my $i (@_) { + if (ref($i) eq "ARRAY") { + my $j = $i->[0]; + for (; $j <= $i->[1] && $j < 0x10000; $j++) { $cat[$j] |= $mask; } + for (; $j <= $i->[1]; $j += 0x10000) { $lcat[$j >> 16] |= $mask; } + } + elsif ($i =~ /^\[/) { for (my $j=0; $j < 128; $j++) { if (chr($j) =~ /$i/) { $cat[$j] |= $mask; } } } + else { $cat[$i] |= $mask; } + } +} + +sub find_cls { + foreach (my $i=0; $i<@cat; $i++) { $cls{$cat[$i]} = scalar keys(%cls) if !defined($cls{$cat[$i]}); } + foreach (my $i=0; $i<@lcat; $i++) { $cls{$lcat[$i]} = scalar keys(%cls) if !defined($cls{$lcat[$i]}); } +} + +sub gen_enum { + print H "enum xml_char_type {\n"; + foreach my $id (sort keys %ids) { + my $mask = 0; + foreach my $i (keys %cls) { + $mask |= 1 << $cls{$i} if $cls{$i} && ($i & (1 << $ids{$id})); + } + printf H " XML_CHAR_%-20s = 0x%08x,\n", $id, $mask; + } + print H "};\n\n"; +} + +sub gen_tabs { + my @tab = (); + my %hash = (); + + print H "extern const byte ucw_xml_char_tab1[];\n"; + print H "extern const uint ucw_xml_char_tab2[];\n"; + print H "extern const byte ucw_xml_char_tab3[];\n"; + + print C "const uint ucw_xml_char_tab2[] = {\n "; + for (my $t=0; $t<256; $t++) { + my $i = $t * 256; + my @x = (); + for (my $j=0; $j<256; $j += 32) { + push @x, join(",", map($cls{$_}, @cat[$i+$j..$i+$j+31])); + } + my $sub = " " . join(",\n ", @x); + if (!defined($hash{$sub})) { + $hash{$sub} = 256 * scalar @tab; + push @tab, $sub; + } + printf C "0x%x", $hash{$sub}; + print C ((~$t & 15) ? "," : ($t < 255) ? ",\n " : "\n};\n\n"); + } + + print C "const byte ucw_xml_char_tab1[] = {\n"; + print C join(",\n\n", @tab); + print C "\n};\n\n"; + + my @l = (); + for (my $i=0; $i<0x11; $i++) { + push @l, sprintf("%d", $cls{$lcat[$i]}); + } + print C "const byte ucw_xml_char_tab3[] = {" . join(",", @l) . "};\n"; +} diff --git a/libucw/ucw-xml/xml-test.c b/libucw/ucw-xml/xml-test.c new file mode 100644 index 0000000..7b7fcd1 --- /dev/null +++ b/libucw/ucw-xml/xml-test.c @@ -0,0 +1,394 @@ +/* + * UCW Library -- A simple XML parser + * + * (c) 2007--2008 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +enum { + WANT_FIRST = 0x100, + WANT_HIDE_ERRORS, + WANT_IGNORE_COMMENTS, + WANT_IGNORE_PIS, + WANT_REPORT_BLOCKS, + WANT_REPORT_IGNORABLE, + WANT_FILE_ENTITIES, + WANT_QNAMES, +}; + +static char *shortopts = "spdtn" CF_SHORT_OPTS; +static struct option longopts[] = { + CF_LONG_OPTS + { "sax", 0, 0, 's' }, + { "pull", 0, 0, 'p' }, + { "dom", 0, 0, 't' }, + { "dtd", 0, 0, 'd' }, + { "namespaces", 0, 0, 'n' }, + { "hide-errors", 0, 0, WANT_HIDE_ERRORS }, + { "ignore-comments", 0, 0, WANT_IGNORE_COMMENTS }, + { "ignore-pis", 0, 0, WANT_IGNORE_PIS }, + { "report-blocks", 0, 0, WANT_REPORT_BLOCKS }, + { "report-ignorable", 0, 0, WANT_REPORT_IGNORABLE }, + { "file-entities", 0, 0, WANT_FILE_ENTITIES }, + { "qnames", 0, 0, WANT_QNAMES }, + { NULL, 0, 0, 0 } +}; + +static void NONRET +usage(void) +{ + fputs("\ +Usage: xml-test [options] < input.xml\n\ +\n\ +Options:\n" +CF_USAGE +"\ +-p, --pull Test PULL interface\n\ +-s, --sax Test SAX interface\n\ +-t, --dom Test DOM interface\n\ +-d, --dtd Enable parsing of DTD\n\ +-n, --namespaces Resolve namespaces\n\ + --hide-errors Hide warnings and error messages\n\ + --ignore-comments Ignore comments\n\ + --ignore-pis Ignore processing instructions\n\ + --report-blocks Report blocks or characters and CDATA sections\n\ + --report-ignorable Report ignorable whitespace\n\ + --file-entities Resolve file external entities (not fully normative)\n\ + --qnames Display qualified names including namespace prefixes\n\ +\n", stderr); + exit(1); +} + +static uint want_sax; +static uint want_pull; +static uint want_dom; +static uint want_ns; +static uint want_parse_dtd; +static uint want_hide_errors; +static uint want_ignore_comments; +static uint want_ignore_pis; +static uint want_report_blocks; +static uint want_report_ignorable; +static uint want_file_entities; +static uint want_qnames; + +static struct fastbuf *out; + +static char * +node_type(struct xml_node *node) +{ + switch (node->type) + { + case XML_NODE_ELEM: return "element"; + case XML_NODE_COMMENT: return "comment"; + case XML_NODE_PI: return "pi"; + case XML_NODE_CHARS: return "chars"; + default: return "unknown"; + } +} + +static void +show_node(struct xml_context *ctx, struct xml_node *node) +{ + switch (node->type) + { + case XML_NODE_ELEM: + if (want_ns) + bprintf(out, " (ns%u)<%s>", node->ns, (want_qnames ? xml_node_qname(ctx, node) : node->name)); + else + bprintf(out, " <%s>", node->name); + XML_ATTR_FOR_EACH(a, node) + if (want_ns) + bprintf(out, " (ns%u)%s='%s'", a->ns, (want_qnames ? xml_attr_qname(ctx, a) : a->name), a->val); + else + bprintf(out, " %s='%s'", a->name, a->val); + bputc(out, '\n'); + break; + case XML_NODE_COMMENT: + bprintf(out, " text='%s'\n", node->text); + break; + case XML_NODE_PI: + bprintf(out, " target=%s text='%s'\n", node->name, node->text); + break; + case XML_NODE_CHARS: + bprintf(out, " text='%s'\n", node->text); + break; + default: + bputc(out, '\n'); + } +} + +static void +show_tree(struct xml_context *ctx, struct xml_node *node, uint level) +{ + if (!node) + return; + bputs(out, "DOM: "); + for (uint i = 0; i < level; i++) + bputs(out, " "); + bputs(out, node_type(node)); + show_node(ctx, node); + if (node->type == XML_NODE_ELEM) + XML_NODE_FOR_EACH(son, node) + show_tree(ctx, son, level + 1); +} + +static void +h_error(struct xml_context *ctx) +{ + bprintf(out, "SAX: %s at %u: %s\n", (ctx->err_code < XML_ERR_ERROR) ? "warn" : "error", xml_row(ctx), ctx->err_msg); +} + +static void +h_document_start(struct xml_context *ctx UNUSED) +{ + bputs(out, "SAX: document_start\n"); +} + +static void +h_document_end(struct xml_context *ctx UNUSED) +{ + bputs(out, "SAX: document_end\n"); +} + +static void +h_xml_decl(struct xml_context *ctx) +{ + bprintf(out, "SAX: xml_decl version=%s standalone=%d fb_encoding=%s\n", ctx->version_str, ctx->standalone, ctx->src->fb_encoding); +} + +static void +h_doctype_decl(struct xml_context *ctx) +{ + bprintf(out, "SAX: doctype_decl type=%s public='%s' system='%s' extsub=%d intsub=%d\n", + ctx->doctype, ctx->public_id ? : "", ctx->system_id ? : "", + !!(ctx->flags & XML_HAS_EXTERNAL_SUBSET), !!(ctx->flags & XML_HAS_INTERNAL_SUBSET)); +} + +static void +h_comment(struct xml_context *ctx) +{ + bputs(out, "SAX: comment"); + show_node(ctx, ctx->node); +} + +static void +h_pi(struct xml_context *ctx) +{ + bputs(out, "SAX: pi"); + show_node(ctx, ctx->node); +} + +static void +h_stag(struct xml_context *ctx) +{ + bputs(out, "SAX: stag"); + show_node(ctx, ctx->node); +} + +static void +h_etag(struct xml_context *ctx) +{ + bprintf(out, "SAX: etag \n", ctx->node->name); +} + +static void +h_chars(struct xml_context *ctx) +{ + bputs(out, "SAX: chars"); + show_node(ctx, ctx->node); +} + +static void +h_block(struct xml_context *ctx UNUSED, char *text, uint len UNUSED) +{ + bprintf(out, "SAX: block text='%s'\n", text); +} + +static void +h_cdata(struct xml_context *ctx UNUSED, char *text, uint len UNUSED) +{ + bprintf(out, "SAX: cdata text='%s'\n", text); +} + +static void +h_ignorable(struct xml_context *ctx UNUSED, char *text, uint len UNUSED) +{ + bprintf(out, "SAX: ignorable text='%s'\n", text); +} + +static void +h_dtd_start(struct xml_context *ctx UNUSED) +{ + bputs(out, "SAX: dtd_start\n"); +} + +static void +h_dtd_end(struct xml_context *ctx UNUSED) +{ + bputs(out, "SAX: dtd_end\n"); +} + +static void +h_resolve_entity(struct xml_context *ctx, struct xml_dtd_entity *e) +{ + xml_push_fastbuf(ctx, bopen(e->system_id, O_RDONLY, 4096)); +} + +int +main(int argc, char **argv) +{ + int opt; + cf_def_file = NULL; + log_init(argv[0]); + while ((opt = cf_getopt(argc, argv, shortopts, longopts, NULL)) >= 0) + switch (opt) + { + case 's': + want_sax++; + break; + case 'p': + want_pull++; + break; + case 't': + want_dom++; + break; + case 'd': + want_parse_dtd++; + break; + case 'n': + want_ns++; + break; + case WANT_HIDE_ERRORS: + want_hide_errors++; + break; + case WANT_IGNORE_COMMENTS: + want_ignore_comments++; + break; + case WANT_IGNORE_PIS: + want_ignore_pis++; + break; + case WANT_REPORT_BLOCKS: + want_report_blocks++; + break; + case WANT_REPORT_IGNORABLE: + want_report_ignorable++; + break; + case WANT_FILE_ENTITIES: + want_file_entities++; + break; + case WANT_QNAMES: + want_qnames++; + break; + default: + usage(); + } + if (optind != argc) + usage(); + + out = bfdopen_shared(1, 4096); + struct xml_context ctx; + xml_init(&ctx); + if (!want_hide_errors) + ctx.h_warn = ctx.h_error = ctx.h_fatal = h_error; + if (want_sax) + { + ctx.h_document_start = h_document_start; + ctx.h_document_end = h_document_end; + ctx.h_xml_decl = h_xml_decl; + ctx.h_doctype_decl = h_doctype_decl; + ctx.h_comment = h_comment; + ctx.h_pi = h_pi; + ctx.h_stag = h_stag; + ctx.h_etag = h_etag; + ctx.h_chars = h_chars; + if (want_report_blocks) + { + ctx.h_block = h_block; + ctx.h_cdata = h_cdata; + } + if (want_report_ignorable) + ctx.h_ignorable = h_ignorable; + ctx.h_dtd_start = h_dtd_start; + ctx.h_dtd_end = h_dtd_end; + } + if (want_dom) + ctx.flags |= XML_ALLOC_ALL; + if (want_parse_dtd) + ctx.flags |= XML_PARSE_DTD; + if (want_ignore_comments) + ctx.flags &= ~(XML_REPORT_COMMENTS | XML_ALLOC_COMMENTS); + if (want_ignore_pis) + ctx.flags &= ~(XML_REPORT_PIS | XML_ALLOC_PIS); + if (want_file_entities) + ctx.h_resolve_entity = h_resolve_entity; + if (want_ns) + xml_ns_enable(&ctx); + xml_push_fastbuf(&ctx, bfdopen_shared(0, 4096)); + bputs(out, "PULL: start\n"); + if (want_pull) + { + ctx.pull = XML_PULL_CHARS | XML_PULL_STAG | XML_PULL_ETAG | XML_PULL_COMMENT | XML_PULL_PI; + uint state; + while (state = xml_next(&ctx)) + switch (state) + { + case XML_STATE_CHARS: + bputs(out, "PULL: chars"); + show_node(&ctx, ctx.node); + break; + case XML_STATE_STAG: + bputs(out, "PULL: stag"); + show_node(&ctx, ctx.node); + break; + case XML_STATE_ETAG: + bprintf(out, "PULL: etag \n", ctx.node->name); + break; + case XML_STATE_COMMENT: + bputs(out, "PULL: comment"); + show_node(&ctx, ctx.node); + break; + case XML_STATE_PI: + bputs(out, "PULL: pi"); + show_node(&ctx, ctx.node); + break; + default: + bputs(out, "PULL: unknown\n"); + break; + } + } + else + xml_parse(&ctx); + if (ctx.err_code) + bprintf(out, "PULL: fatal error at %u: %s\n", xml_row(&ctx), ctx.err_msg); + else + { + bputs(out, "PULL: eof\n"); + if (want_dom) + show_tree(&ctx, ctx.dom, 0); + } + + if (want_ns) + { + bputs(out, "Known namespaces:\n"); + for (uns i=0; i < GARY_SIZE(ctx.ns_by_id); i++) + bprintf(out, "%u\t%s\n", i, ctx.ns_by_id[i]); + } + + xml_cleanup(&ctx); + bclose(out); + return 0; +} diff --git a/libucw/ucw-xml/xml-test.t b/libucw/ucw-xml/xml-test.t new file mode 100644 index 0000000..d48fd40 --- /dev/null +++ b/libucw/ucw-xml/xml-test.t @@ -0,0 +1,58 @@ +# Tests for the XML parser +# (c) 2008 Pavel Charvat + +Run: ../obj/ucw-xml/xml-test +In: + +Out: PULL: start + PULL: eof + +Run: ../obj/ucw-xml/xml-test -s +In: + text1&amp;<text2 +Out: PULL: start + SAX: document_start + SAX: xml_decl version=1.0 standalone=0 fb_encoding=ISO-8859-1 + SAX: stag + SAX: stag a1='val1' a2='val2' + SAX: chars text='text1&<' + SAX: etag + SAX: chars text='text2' + SAX: etag + SAX: document_end + PULL: eof + +Run: ../obj/ucw-xml/xml-test -sptd +In: + + "> + %pe1; + + + ]> + &e1;&e2; +Out: PULL: start + SAX: document_start + SAX: xml_decl version=1.0 standalone=0 fb_encoding=UTF-8 + SAX: doctype_decl type=root public='' system='' extsub=0 intsub=1 + SAX: dtd_start + SAX: dtd_end + SAX: stag + PULL: stag + SAX: chars text='text' + PULL: chars text='text' + SAX: stag + PULL: stag + SAX: chars text='' + PULL: chars text='' + PULL: etag + SAX: etag + PULL: etag + SAX: etag + SAX: document_end + PULL: eof + DOM: element + DOM: chars text='text' + DOM: element + DOM: chars text='' diff --git a/libucw/ucw-xml/xml.h b/libucw/ucw-xml/xml.h new file mode 100644 index 0000000..1712607 --- /dev/null +++ b/libucw/ucw-xml/xml.h @@ -0,0 +1,404 @@ +/* + * UCW Library -- A simple XML parser + * + * (c) 2007--2008 Pavel Charvat + * (c) 2015 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_XML_XML_H +#define _UCW_XML_XML_H + +#include +#include +#include +#include + +#ifdef CONFIG_UCW_CLEAN_ABI +#define xml_attr_find ucw_xml_attr_find +#define xml_attr_find_ns ucw_xml_attr_find_ns +#define xml_attr_qname ucw_xml_attr_qname +#define xml_attr_value ucw_xml_attr_value +#define xml_attr_value_ns ucw_xml_attr_value_ns +#define xml_cleanup ucw_xml_cleanup +#define xml_def_find_entity ucw_xml_def_find_entity +#define xml_def_resolve_entity ucw_xml_def_resolve_entity +#define xml_error ucw_xml_error +#define xml_fatal ucw_xml_fatal +#define xml_init ucw_xml_init +#define xml_merge_chars ucw_xml_merge_chars +#define xml_merge_dom_chars ucw_xml_merge_dom_chars +#define xml_next ucw_xml_next +#define xml_next_state ucw_xml_next_state +#define xml_node_qname ucw_xml_node_qname +#define xml_normalize_white ucw_xml_normalize_white +#define xml_ns_by_id ucw_xml_ns_by_id +#define xml_ns_by_name ucw_xml_ns_by_name +#define xml_ns_enable ucw_xml_ns_enable +#define xml_parse ucw_xml_parse +#define xml_push_fastbuf ucw_xml_push_fastbuf +#define xml_reset ucw_xml_reset +#define xml_row ucw_xml_row +#define xml_skip_element ucw_xml_skip_element +#define xml_warn ucw_xml_warn +#endif + +/*** + * === Constants + ***/ + +struct xml_context; +struct xml_dtd_entity; + +/** Error code reported by the parser. So far, only the basic error classes are recognized. **/ +enum xml_error { + XML_ERR_OK = 0, + XML_ERR_WARN = 1000, /* Warning */ + XML_ERR_ERROR = 2000, /* Recoverable error */ + XML_ERR_FATAL = 3000, /* Unrecoverable error */ + XML_ERR_EOF, +}; + +/** Parser state. A pull parser returns one of these to indicate the type of the current node. **/ +enum xml_state { + XML_STATE_EOF, /* EOF or a fatal error */ + XML_STATE_START, /* Initial state */ + XML_STATE_XML_DECL, /* XML_PULL_XML_DECL */ + XML_STATE_DOCTYPE_DECL, /* XML_PULL_DOCTYPE_DECL */ + XML_STATE_CHARS, /* XML_PULL_CHARS */ + XML_STATE_STAG, /* XML_PULL_STAG */ + XML_STATE_ETAG, /* XML_PULL_ETAG */ + XML_STATE_COMMENT, /* XML_PULL_COMMENT */ + XML_STATE_PI, /* XML_PULL_PI */ + + /* Internal states */ + XML_STATE_CHARS_BEFORE_STAG, + XML_STATE_CHARS_BEFORE_ETAG, + XML_STATE_CHARS_BEFORE_CDATA, + XML_STATE_CHARS_BEFORE_COMMENT, + XML_STATE_CHARS_BEFORE_PI, + XML_STATE_PROLOG_COMMENT, + XML_STATE_PROLOG_PI, + XML_STATE_EPILOG_COMMENT, + XML_STATE_EPILOG_PI, +}; + +/** Pull requests: a bit mask of node types you want to return. The other nodes are silently skipped. **/ +enum xml_pull { + XML_PULL_XML_DECL = 0x00000001, /* Stop after the XML declaration */ + XML_PULL_DOCTYPE_DECL = 0x00000002, /* Stop in the doctype declaration (before optional internal subset) */ + XML_PULL_CHARS = 0x00000004, + XML_PULL_STAG = 0x00000008, + XML_PULL_ETAG = 0x00000010, + XML_PULL_COMMENT = 0x00000020, + XML_PULL_PI = 0x00000040, + XML_PULL_ALL = 0xffffffff, +}; + +/** Parser mode flags. **/ +enum xml_flags { + /* Enable reporting of various events via SAX and/or PULL interface */ + XML_REPORT_COMMENTS = 0x00000001, /* Report comments */ + XML_REPORT_PIS = 0x00000002, /* Report processing instructions */ + XML_REPORT_CHARS = 0x00000004, /* Report characters */ + XML_REPORT_TAGS = 0x00000008, /* Report element starts/ends */ + XML_REPORT_MISC = XML_REPORT_COMMENTS | XML_REPORT_PIS, + XML_REPORT_ALL = XML_REPORT_MISC | XML_REPORT_CHARS | XML_REPORT_TAGS, + + /* Enable construction of DOM for these types */ + XML_ALLOC_COMMENTS = 0x00000010, /* Create comment nodes */ + XML_ALLOC_PIS = 0x00000020, /* Create processing instruction nodes */ + XML_ALLOC_CHARS = 0x00000040, /* Create character nodes */ + XML_ALLOC_TAGS = 0x00000080, /* Create element nodes */ + XML_ALLOC_MISC = XML_ALLOC_COMMENTS | XML_ALLOC_PIS, + XML_ALLOC_ALL = XML_ALLOC_MISC | XML_ALLOC_CHARS | XML_ALLOC_TAGS, + + /* Other parameters */ + XML_VALIDATING = 0x00000100, /* Validate everything (not fully implemented!) */ + XML_PARSE_DTD = 0x00000200, /* Enable parsing of DTD */ + XML_NO_CHARS = 0x00000400, /* The current element must not contain character data (filled automaticaly if using DTD) */ + XML_ALLOC_DEFAULT_ATTRS = 0x00000800, /* Allocate default attribute values so they can be found by XML_ATTR_FOR_EACH */ + XML_NAMESPACES = 0x00001000, /* Parse namespaces, use xml_ns_enable() to set this */ + + /* Internals, do not change! */ + XML_EMPTY_ELEM_TAG = 0x00010000, /* The current element match EmptyElemTag */ + XML_VERSION_1_1 = 0x00020000, /* XML version is 1.1, otherwise 1.0 */ + XML_HAS_EXTERNAL_SUBSET = 0x00040000, /* The document contains a reference to external DTD subset */ + XML_HAS_INTERNAL_SUBSET = 0x00080000, /* The document contains an internal subset */ + XML_HAS_DTD = XML_HAS_EXTERNAL_SUBSET | XML_HAS_INTERNAL_SUBSET, + XML_SRC_EOF = 0x00100000, /* EOF reached */ + XML_SRC_EXPECTED_DECL = 0x00200000, /* Just before optional or required XMLDecl/TextDecl */ + XML_SRC_DOCUMENT = 0x00400000, /* The document entity */ + XML_SRC_EXTERNAL = 0x00800000, /* An external entity */ +}; + +/*** + * === Internal representation of DOM + * + * All DOM nodes are allocated within temporary memory pools and they are not + * guaranteed to survive when the parser leaves the element. Upon <>, + * all remaining nodes are always freed. + ***/ + +/** Node types **/ +enum xml_node_type { + XML_NODE_ELEM, /* Element */ + XML_NODE_COMMENT, /* Comment */ + XML_NODE_CHARS, /* Character data */ + XML_NODE_PI, /* Processing instruction */ +}; + +/** Iterate over all children of a node. **/ +#define XML_NODE_FOR_EACH(var, node) CLIST_FOR_EACH(struct xml_node *, var, (node)->sons) + +/** Iterate over all attributes of a node. **/ +#define XML_ATTR_FOR_EACH(var, node) SLIST_FOR_EACH(struct xml_attr *, var, (node)->attrs) + +/** A single DOM node. **/ +struct xml_node { + cnode n; /* Node for list of parent's sons */ + uint type; /* XML_NODE_x */ + struct xml_node *parent; /* Parent node */ + /* + * If namespaces are enabled, node->name points to the local part of the name + * and node->ns is the resolved namespace ID. + * + * However, the namespace prefix is kept in memory just before the local part, + * so you can use xml_node_qname() to find out the full qualified name. + * The same applies to attributes, but the function is xml_attr_qname(). + */ + char *name; /* Element name / PI target */ + clist sons; /* Children nodes */ + union { + struct { + char *text; /* PI text / Comment / CDATA */ + uint len; /* Text length in bytes */ + }; + struct { + uint ns; /* Namespace ID */ + struct xml_dtd_elem *dtd; /* Element DTD */ + slist attrs; /* Link list of element attributes */ + }; + }; + void *user; /* User-defined (initialized to NULL) */ +}; + +/** A single attribute. **/ +struct xml_attr { + snode n; /* Node for elem->attrs */ + uint hash; /* Internal hash of ns + name */ + struct xml_node *elem; /* Parent element */ + struct xml_dtd_attr *dtd; /* Attribute DTD */ + uint ns; /* Namespace ID */ + char *name; /* Attribute name without NS prefix */ + char *val; /* Attribute value */ + void *user; /* User-defined (initialized to NULL) */ +}; + +#define XML_BUF_SIZE 32 /* At least 8 -- hardcoded */ + +struct xml_source { + struct xml_source *next; /* Link list of pending fastbufs (xml_context.sources) */ + struct fastbuf *fb; /* Source fastbuf */ + struct fastbuf *wrapped_fb; /* Original wrapped fastbuf (needed for cleanup) */ + struct fastbuf wrap_fb; /* Fbmem wrapper */ + u32 buf[2 * XML_BUF_SIZE]; /* Read buffer with Unicode values and categories */ + u32 *bptr, *bstop; /* Current state of the buffer */ + uint row; /* File position */ + char *expected_encoding; /* Initial encoding before any transformation has been made (expected in XMLDecl/TextDecl) */ + char *fb_encoding; /* Encoding of the source fastbuf */ + char *decl_encoding; /* Encoding read from the XMLDecl/TextDecl */ + uint refill_cat1; /* Character categories, which should be directly passed to the buffer */ + uint refill_cat2; /* Character categories, which should be processed as newlines (possibly in some built-in + sequences) */ + void (*refill)(struct xml_context *ctx); /* Callback to decode source characters to the buffer */ + unsigned short *refill_in_to_x; /* Libucw-charset input table */ + uint saved_depth; /* Saved ctx->depth */ + uint pending_0xd; /* The last read character is 0xD */ +}; + +/** Finds a qualified name (including namespace prefix) of a given element node. **/ +char *xml_node_qname(struct xml_context *ctx, struct xml_node *node); + +/** Finds a qualified name (including namespace prefix) of a given attribute. **/ +char *xml_attr_qname(struct xml_context *ctx, struct xml_attr *node); + +/** Finds a given attribute value in a `XML_NODE_ELEM` node **/ +struct xml_attr *xml_attr_find(struct xml_context *ctx, struct xml_node *node, char *name); + +/** The same, but namespace-aware **/ +struct xml_attr *xml_attr_find_ns(struct xml_context *ctx, struct xml_node *node, uint ns, char *name); + +/** Similar to xml_attr_find, but it deals also with default values **/ +char *xml_attr_value(struct xml_context *ctx, struct xml_node *node, char *name); + +/** The same, but namespace-aware **/ +char *xml_attr_value_ns(struct xml_context *ctx, struct xml_node *node, uint ns, char *name); + +/** Remove leading/trailing spaces and replaces sequences of spaces to a single space character (non-CDATA attribute normalization) **/ +uint xml_normalize_white(struct xml_context *ctx, char *value); + +/** Merge character contents of a given element to a single string (not recursive) **/ +char *xml_merge_chars(struct xml_context *ctx, struct xml_node *node, struct mempool *pool); + +/** Merge character contents of a given subtree to a single string **/ +char *xml_merge_dom_chars(struct xml_context *ctx, struct xml_node *node, struct mempool *pool); + +/*** + * === Parser context + ***/ + +/** + * The state of the parser is kept in this structure. There are some + * user-accessible parts (like pointers to various hooks), but the + * majority of fields is private. + **/ +struct xml_context { + /* Error handling */ + char *err_msg; /* Last error message */ + enum xml_error err_code; /* Last error code */ + void *throw_buf; /* Where to jump on error */ + void (*h_warn)(struct xml_context *ctx); /* Warning callback */ + void (*h_error)(struct xml_context *ctx); /* Recoverable error callback */ + void (*h_fatal)(struct xml_context *ctx); /* Unrecoverable error callback */ + + /* Memory management (private) */ + struct mempool *pool; /* DOM pool */ + struct mempool *stack; /* Stack pool (freed as soon as possible) */ + struct xml_stack *stack_list; /* See xml_push(), xml_pop() */ + uint flags; /* XML_FLAG_x (restored on xml_pop()) */ + uint depth; /* Nesting level (for checking of valid source nesting -> valid pushes/pops on memory pools) */ + struct fastbuf chars; /* Character data / attribute value */ + struct mempool_state chars_state; /* Mempool state before the current character block has started */ + char *chars_trivial; /* If not empty, it will be appended to chars */ + + /* Input (private) */ + struct xml_source *src; /* Current source */ + u32 *bptr, *bstop; /* Buffer with preprocessed characters (validated UCS-4 + category flags) */ + uint cat_chars; /* Unicode range of supported characters (cdata, attribute values, ...) */ + uint cat_unrestricted; /* Unrestricted characters (may appear in document/external entities) */ + uint cat_new_line; /* New line characters */ + uint cat_name; /* Characters that may appear in names */ + uint cat_sname; /* Characters that may begin a name */ + + /* SAX-like interface */ + void (*h_document_start)(struct xml_context *ctx); /* Called before entering prolog */ + void (*h_document_end)(struct xml_context *ctx); /* Called after leaving epilog */ + void (*h_xml_decl)(struct xml_context *ctx); /* Called after the XML declaration */ + void (*h_doctype_decl)(struct xml_context *ctx); /* Called in the doctype declaration (before optional internal subset) */ + void (*h_comment)(struct xml_context *ctx); /* Called after a comment (only with XML_REPORT_COMMENTS) */ + void (*h_pi)(struct xml_context *ctx); /* Called after a processing instruction (only with XML_REPORT_PIS) */ + void (*h_stag)(struct xml_context *ctx); /* Called after STag or EmptyElemTag (only with XML_REPORT_TAGS) */ + void (*h_etag)(struct xml_context *ctx); /* Called before ETag or after EmptyElemTag (only with XML_REPORT_TAGS) */ + void (*h_chars)(struct xml_context *ctx); /* Called after some characters (only with XML_REPORT_CHARS) */ + void (*h_block)(struct xml_context *ctx, char *text, uint len); /* Called for each continuous block of characters not reported by h_cdata() (only with XML_REPORT_CHARS) */ + void (*h_cdata)(struct xml_context *ctx, char *text, uint len); /* Called for each CDATA section (only with XML_REPORT_CHARS) */ + void (*h_ignorable)(struct xml_context *ctx, char *text, uint len); /* Called for ignorable whitespace (content in tags without #PCDATA) */ + void (*h_dtd_start)(struct xml_context *ctx); /* Called just after the DTD structure is initialized */ + void (*h_dtd_end)(struct xml_context *ctx); /* Called after DTD subsets subsets */ + struct xml_dtd_entity *(*h_find_entity)(struct xml_context *ctx, char *name); /* Called when needed to resolve a general entity */ + void (*h_resolve_entity)(struct xml_context *ctx, struct xml_dtd_entity *ent); /* User should push source fastbuf for a parsed external entity (either general or parameter) */ + + /* DOM */ + struct xml_node *dom; /* DOM root */ + struct xml_node *node; /* Current DOM node */ + + /* Namespaces (private) */ + struct mempool *ns_pool; /* Memory pool for NS definitions */ + const char **ns_by_id; /* A growing array translating NS IDs to their names */ + void *ns_by_name; /* Hash table translating NS names to their IDs */ + void *ns_by_prefix; /* Hash table translating current prefixes to NS IDs, allocated from xml->stack */ + struct xml_ns_prefix *ns_prefix_stack; /* A stack of prefix definitions, allocated from xml->stack */ + uint ns_default; /* Current default namespace */ + + /* Other stuff */ + char *version_str; + uint standalone; + char *doctype; /* The document type (or NULL if unknown) */ + char *system_id; /* DTD external id */ + char *public_id; /* DTD public id */ + struct xml_dtd *dtd; /* The DTD structure (or NULL) */ + uint state; /* Current state for the PULL interface (XML_STATE_x) */ + uint pull; /* Parameters for the PULL interface (XML_PULL_x) */ +}; + +/** Initialize XML context **/ +void xml_init(struct xml_context *ctx); + +/** Clean up all internal structures **/ +void xml_cleanup(struct xml_context *ctx); + +/** Reuse XML context, equivalent to xml_cleanup() and xml_init(), but faster **/ +void xml_reset(struct xml_context *ctx); + +/** Add XML source (fastbuf will be automatically closed) **/ +struct xml_source *xml_push_fastbuf(struct xml_context *ctx, struct fastbuf *fb); + +/** Parse the whole document without the PULL interface, return `XML_ERR_x` code (zero on success) **/ +uint xml_parse(struct xml_context *ctx); + +/** Parse with the PULL interface, return `XML_STATE_x` (zero on EOF or fatal error) **/ +uint xml_next(struct xml_context *ctx); + +/** Equivalent to xml_next, but with temporarily changed ctx->pull value **/ +uint xml_next_state(struct xml_context *ctx, uint pull); + +/** May be called on XML_STATE_STAG to skip its content; can return `XML_STATE_ETAG` or `XML_STATE_EOF` on fatal error **/ +uint xml_skip_element(struct xml_context *ctx); + +/** Returns the current row (line) number in the document entity **/ +uint xml_row(struct xml_context *ctx); + +/* The default value of h_find_entity(), knows <, >, &, ' and " */ +struct xml_dtd_entity *xml_def_find_entity(struct xml_context *ctx, char *name); + +/* The default value of h_resolve_entity(), throws an error */ +void xml_def_resolve_entity(struct xml_context *ctx, struct xml_dtd_entity *ent); + +/** Throw a warning at the current node **/ +void xml_warn(struct xml_context *ctx, const char *format, ...); + +/** Throw an error at the current node **/ +void xml_error(struct xml_context *ctx, const char *format, ...); + +/** Throw a fatal error, aborting parsing. This can be called only from SAX hooks (and from parser internals). **/ +void NONRET xml_fatal(struct xml_context *ctx, const char *format, ...); + +/*** + * === Namespaces + * + * When namespace-aware parsing is requested by calling xml_ns_enable(), + * all namespaces are collected and assigned integer identifiers. Names of + * elements and attributes then always contain a namespace ID and a local + * name within the namespace. An ID of zero corresponds to an unspecified + * namespace. + * + * Once an ID is assigned, it is never changed, even if the namespace + * goes out of scope temporarily. + */ + +/** Request processing of namespaces (must be called before the first node is parsed). **/ +void xml_ns_enable(struct xml_context *ctx); + +/** + * Looks up namespace by its ID, dies on an invalid ID. Returns a pointer + * which remains valid until the context is cleaned up or reset. + **/ +const char *xml_ns_by_id(struct xml_context *ctx, uint ns); + +/** + * Looks up namespace by its name and returns its ID. Assigns a new ID if necessary. + * When this function returns, @name is not referenced any more. + **/ +uint xml_ns_by_name(struct xml_context *ctx, const char *name); + +/** Well-known namespaces. **/ +enum xml_ns_id { + XML_NS_NONE = 0, /* This element has no namespace */ + XML_NS_XMLNS = 1, /* xmlns: */ + XML_NS_XML = 2, /* xml: */ +}; + +#endif diff --git a/libucw/ucw/Makefile b/libucw/ucw/Makefile new file mode 100644 index 0000000..767cef6 --- /dev/null +++ b/libucw/ucw/Makefile @@ -0,0 +1,240 @@ +# Makefile for the UCW Library (c) 1997--2015 Martin Mares + +DIRS+=ucw +LIBUCW=$(o)/ucw/libucw.pc + +ifdef CONFIG_UCW_UTILS +include $(s)/ucw/utils/Makefile +endif + +LIBUCW_MODS= \ + threads \ + alloc alloc_str alloc-std \ + bigalloc mempool mempool-str mempool-fmt eltpool \ + partmap hashfunc \ + slists simple-lists bitsig \ + log log-stream log-file log-syslog log-conf tbf \ + conf-context conf-alloc conf-dump conf-input conf-intr conf-journal conf-parse conf-section conf-getopt \ + ipaccess \ + fastbuf ff-binary ff-string ff-printf ff-unicode ff-varint ff-stkstring \ + fb-file fb-mem fb-temp tempfile fb-mmap fb-limfd fb-buffer fb-grow fb-pool fb-atomic fb-param fb-socket fb-multi fb-null \ + fw-hex \ + char-cat char-upper char-lower unicode varint stkstring \ + wildmatch regex \ + prime primetable \ + random-legacy random-fast random-strong \ + time-stamp time-timer time-conf \ + bit-ffs bit-fls bit-array \ + url \ + mainloop main-block main-rec \ + proctitle exitstatus runcmd \ + lizard lizard-safe adler32 sighandler \ + md5 sha1 sha1-hmac crc crc-tables \ + base64 base224 \ + io-careful io-sync io-mmap io-size \ + string str-esc str-split str-match str-imatch str-hex str-fix \ + bbuf gary \ + getopt \ + strtonum \ + resource trans res-fd res-mem res-subpool res-mempool res-eltpool \ + daemon daemon-ctrl \ + signames \ + opt opt-help opt-conf \ + table xtypes xtypes-basic xtypes-extra + +LIBUCW_MAIN_INCLUDES= \ + lib.h log.h tbf.h threads.h time.h \ + alloc.h mempool.h eltpool.h \ + clists.h slists.h simple-lists.h \ + string.h stkstring.h unicode.h varint.h chartype.h regex.h \ + wildmatch.h \ + unaligned.h \ + bbuf.h gbuf.h gary.h bitarray.h bitsig.h \ + hashfunc.h hashtable.h \ + heap.h binheap.h binheap-node.h \ + redblack.h \ + prime.h \ + random.h \ + bitops.h \ + conf.h getopt.h ipaccess.h \ + fastbuf.h io.h ff-unicode.h ff-varint.h ff-binary.h fb-socket.h fw-hex.h \ + url.h \ + mainloop.h \ + process.h \ + lizard.h \ + md5.h sha1.h crc.h \ + base64.h base224.h \ + kmp.h kmp-search.h trie.h binsearch.h \ + partmap.h \ + strtonum.h \ + resource.h trans.h \ + daemon.h \ + signames.h \ + sighandler.h \ + opt.h \ + table.h xtypes-extra.h \ + xtypes.h + +ifdef CONFIG_UCW_THREADS +# Some modules require threading +LIBUCW_MODS+=threads-conf workqueue asio +LIBUCW_MAIN_INCLUDES+=workqueue.h semaphore.h asio.h +endif + +ifdef CONFIG_UCW_FB_DIRECT +LIBUCW_MODS+=fb-direct +endif + +ifdef CONFIG_UCW_OWN_GETOPT +include $(s)/ucw/getopt/Makefile +endif + +LIBUCW_INCLUDES=$(LIBUCW_MAIN_INCLUDES) + +include $(s)/ucw/sorter/Makefile +include $(s)/ucw/doc/Makefile + +LIBUCW_MOD_PATHS=$(addprefix $(o)/ucw/,$(LIBUCW_MODS)) + +export LIBUCW_LIBS=-lm +ifdef CONFIG_UCW_THREADS +LIBUCW_LIBS+=-lpthread +endif +ifdef CONFIG_UCW_PCRE +LIBUCW_LIBS+=-lpcre +endif +ifdef CONFIG_UCW_MONOTONIC_CLOCK +LIBUCW_LIBS+=-lrt +endif + +$(o)/ucw/libucw$(LV).a: $(addsuffix .o,$(LIBUCW_MOD_PATHS)) +$(o)/ucw/libucw$(LV).so: $(addsuffix .oo,$(LIBUCW_MOD_PATHS)) +$(o)/ucw/libucw$(LV).so: SONAME_SUFFIX=.0 +$(o)/ucw/libucw$(LV).so: LIBS+=$(LIBUCW_LIBS) + +ifdef CONFIG_INSTALL_API +$(o)/ucw/libucw.pc: $(addprefix $(o)/ucw/libucw$(LV),.a) +endif + +$(o)/ucw/hashfunc.o $(o)/ucw/hashfunc.oo: CFLAGS += -funroll-loops +$(o)/ucw/lizard.o: CFLAGS += $(COPT2) -funroll-loops + +$(o)/ucw/ff-varint-t: $(LIBUCW) +$(o)/ucw/varint-t: $(LIBUCW) +$(o)/ucw/conf-test: $(o)/ucw/conf-test.o $(LIBUCW) +$(o)/ucw/hash-test: $(o)/ucw/hash-test.o $(LIBUCW) +$(o)/ucw/hashfunc-test: $(o)/ucw/hashfunc-test.o $(LIBUCW) +$(o)/ucw/asort-test: $(o)/ucw/asort-test.o $(LIBUCW) +$(o)/ucw/redblack-test: $(o)/ucw/redblack-test.o $(LIBUCW) +$(o)/ucw/binheap-test: $(o)/ucw/binheap-test.o $(LIBUCW) +$(o)/ucw/lizard-test: $(o)/ucw/lizard-test.o $(LIBUCW) +$(o)/ucw/kmp-test: $(o)/ucw/kmp-test.o $(LIBUCW) +$(o)/ucw/strtonum-test: $(o)/ucw/strtonum-test.o $(LIBUCW) +ifdef CONFIG_CHARSET +$(o)/ucw/kmp-test: $(LIBCHARSET) +endif +$(o)/ucw/ipaccess-test: $(o)/ucw/ipaccess-test.o $(LIBUCW) +$(o)/ucw/trie-test: $(o)/ucw/trie-test.o $(LIBUCW) +$(o)/ucw/opt-test: $(o)/ucw/opt-test.o $(LIBUCW) +$(o)/ucw/table-test: $(o)/ucw/table-test.o $(LIBUCW) +$(o)/ucw/table-test-2: $(o)/ucw/table-test-2.o $(LIBUCW) +$(o)/ucw/table-test-align: $(o)/ucw/table-test-align.o $(LIBUCW) +$(o)/ucw/xtypes-test: $(o)/ucw/xtypes-test.o $(LIBUCW) +$(o)/ucw/random-test: $(o)/ucw/random-test.o $(LIBUCW) + +TESTS+=$(addprefix $(o)/ucw/,regex.test unicode.test hash-test.test mempool.test stkstring.test \ + slists.test bbuf.test kmp-test.test getopt.test ff-unicode.test eltpool.test \ + trie-test.test string.test sha1.test asort-test.test binheap-test.test \ + fb-file.test fb-socket.test fb-grow.test fb-pool.test fb-atomic.test fb-limfd.test fb-temp.test \ + fb-mem.test fb-buffer.test fb-mmap.test fb-multi.test fb-null.test \ + redblack-test.test url.test strtonum-test.test \ + gary.test time.test crc.test signames.test md5.test bitops.test opt.test \ + table.test table-test.test table-test-2.test table-test-align.test xtypes-test.test random-test.test) + +$(o)/ucw/varint.test: $(o)/ucw/varint-t +$(o)/ucw/regex.test: $(o)/ucw/regex-t +$(o)/ucw/unicode.test: $(o)/ucw/unicode-t +$(o)/ucw/hash-test.test: $(o)/ucw/hash-test +$(o)/ucw/mempool.test: $(o)/ucw/mempool-t $(o)/ucw/mempool-fmt-t $(o)/ucw/mempool-str-t +$(o)/ucw/stkstring.test: $(o)/ucw/stkstring-t +$(o)/ucw/bitops.test: $(o)/ucw/bit-ffs-t $(o)/ucw/bit-fls-t +$(o)/ucw/slists.test: $(o)/ucw/slists-t +$(o)/ucw/kmp-test.test: $(o)/ucw/kmp-test +$(o)/ucw/bbuf.test: $(o)/ucw/bbuf-t +$(o)/ucw/getopt.test: $(o)/ucw/getopt-t +$(o)/ucw/ff-unicode.test: $(o)/ucw/ff-unicode-t +$(o)/ucw/ff-varint.test: $(o)/ucw/ff-varint-t +$(o)/ucw/eltpool.test: $(o)/ucw/eltpool-t +$(o)/ucw/string.test: $(o)/ucw/str-hex-t $(o)/ucw/str-esc-t $(o)/ucw/str-fix-t +$(o)/ucw/sha1.test: $(o)/ucw/sha1-t $(o)/ucw/sha1-hmac-t +$(o)/ucw/trie-test.test: $(o)/ucw/trie-test +$(o)/ucw/asort-test.test: $(o)/ucw/asort-test +$(o)/ucw/binheap-test.test: $(o)/ucw/binheap-test +$(o)/ucw/redblack-test.test: $(o)/ucw/redblack-test +$(o)/ucw/strtonum-test.test: $(o)/ucw/strtonum-test +$(addprefix $(o)/ucw/fb-,file.test grow.test pool.test socket.test atomic.test \ + limfd.test temp.test mem.test buffer.test mmap.test multi.test null.test): %.test: %-t +$(o)/ucw/url.test: $(o)/ucw/url-t +$(o)/ucw/gary.test: $(o)/ucw/gary-t +$(o)/ucw/time.test: $(o)/ucw/time-conf-t +$(o)/ucw/crc.test: $(o)/ucw/crc-t +$(o)/ucw/signames.test: $(o)/ucw/signames-t +$(o)/ucw/md5.test: $(o)/ucw/md5-t +$(o)/ucw/opt.test: $(o)/ucw/opt-test +$(o)/ucw/table.test: $(o)/ucw/table-t +$(o)/ucw/table-test.test: $(o)/ucw/table-test +$(o)/ucw/table-test-2.test: $(o)/ucw/table-test-2 +$(o)/ucw/table-test-align.test: $(o)/ucw/table-test-align +$(o)/ucw/xtypes-test.test: $(o)/ucw/xtypes-test +$(o)/ucw/random-test.test: $(o)/ucw/random-test + +ifdef CONFIG_UCW_THREADS +TESTS+=$(addprefix $(o)/ucw/,asio.test) +$(o)/ucw/asio.test: $(o)/ucw/asio-t +endif + +# The version of autoconf.h that is a part of the public API needs to have +# the internal symbols filtered out, so we generate ucw/autoconf.h in the +# configure script and let the public config.h refer to +# instead of plain "autoconf.h". + +API_LIBS+=libucw +API_INCLUDES+=$(o)/ucw/.include-stamp +$(o)/ucw/.include-stamp: $(addprefix $(s)/ucw/,$(LIBUCW_INCLUDES)) $(o)/ucw/autoconf.h + $(Q)$(BUILDSYS)/install-includes $(/' <$(s)/ucw/config.h >run/include/ucw/config.h + $(Q)touch $@ +run/lib/pkgconfig/libucw.pc: $(o)/ucw/libucw.pc + +ifdef CONFIG_UCW_PERL +include $(s)/ucw/perl/Makefile +endif + +ifdef CONFIG_UCW_SHELL_UTILS +include $(s)/ucw/shell/Makefile +endif + +CONFIGS+=libucw + +INSTALL_TARGETS+=install-libucw-lib +install-libucw-lib: + install -d -m 755 $(DESTDIR)$(INSTALL_LIB_DIR) + install -m 644 run/lib/libucw$(LV).so.0 $(DESTDIR)$(INSTALL_LIB_DIR)/libucw$(LV).so.0.0 + ln -sf libucw$(LV).so.0.0 $(DESTDIR)$(INSTALL_LIB_DIR)/libucw$(LV).so.0 +.PHONY: install-libucw-lib + +INSTALL_TARGETS+=install-libucw-api +install-libucw-api: install-ucw-sorter-api + install -d -m 755 $(DESTDIR)$(INSTALL_LIB_DIR) $(DESTDIR)$(INSTALL_INCLUDE_DIR)/ucw/ $(DESTDIR)$(INSTALL_PKGCONFIG_DIR) + install -m 644 $(addprefix run/include/ucw/,$(LIBUCW_MAIN_INCLUDES) autoconf.h config.h) $(DESTDIR)$(INSTALL_INCLUDE_DIR)/ucw/ + install -m 644 run/lib/pkgconfig/libucw.pc $(DESTDIR)$(INSTALL_PKGCONFIG_DIR) + ln -sf libucw$(LV).so.0.0 $(DESTDIR)$(INSTALL_LIB_DIR)/libucw$(LV).so + install -m 644 run/lib/libucw$(LV).a $(DESTDIR)$(INSTALL_LIB_DIR) +.PHONY: install-libucw-api + +INSTALL_TARGETS+=install-libucw-config +install-libucw-config: + install -d -m 755 $(DESTDIR)$(INSTALL_CONFIG_DIR) + install -m 644 run/$(CONFIG_DIR)/libucw $(DESTDIR)$(INSTALL_CONFIG_DIR) +.PHONY: install-libucw-config diff --git a/libucw/ucw/adler32.c b/libucw/ucw/adler32.c new file mode 100644 index 0000000..5a7c47c --- /dev/null +++ b/libucw/ucw/adler32.c @@ -0,0 +1,48 @@ +/* + * adler32.c -- compute the Adler-32 checksum of a data stream + * + * Copyright (C) 1995--2003 Mark Adler + * + * Taken from zlib-1.2.1 and adjusted by Robert Spalek. For conditions of + * distribution and use, see copyright notice in zlib.h. + */ + +#include +#include + +#define BASE 65521UL /* largest prime smaller than 65536 */ +#define NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf,i) {s1 += buf[i]; s2 += s1;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); +#define MOD(a) a %= BASE + +uint +adler32_update(uint adler, const byte *buf, uint len) +{ + uint s1 = adler & 0xffff; + uint s2 = (adler >> 16) & 0xffff; + int k; + + if (!buf) return 1L; + + while (len > 0) { + k = len < NMAX ? (int)len : NMAX; + len -= k; + while (k >= 16) { + DO16(buf); + buf += 16; + k -= 16; + } + if (k != 0) do { + s1 += *buf++; + s2 += s1; + } while (--k); + MOD(s1); + MOD(s2); + } + return (s2 << 16) | s1; +} diff --git a/libucw/ucw/alloc-std.c b/libucw/ucw/alloc-std.c new file mode 100644 index 0000000..f3b1d46 --- /dev/null +++ b/libucw/ucw/alloc-std.c @@ -0,0 +1,57 @@ +/* + * UCW Library -- Generic Allocator Using Malloc + * + * (c) 2014 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include + +#include + +/* Default allocator */ + +static void *ucw_std_alloc(struct ucw_allocator *a UNUSED, size_t size) +{ + return xmalloc(size); +} + +static void *ucw_std_realloc(struct ucw_allocator *a UNUSED, void *ptr, size_t old_size UNUSED, size_t new_size) +{ + return xrealloc(ptr, new_size); +} + +static void ucw_std_free(struct ucw_allocator *a UNUSED, void *ptr) +{ + xfree(ptr); +} + +struct ucw_allocator ucw_allocator_std = { + .alloc = ucw_std_alloc, + .realloc = ucw_std_realloc, + .free = ucw_std_free, +}; + +/* Zeroing allocator */ + +static void *ucw_zeroed_alloc(struct ucw_allocator *a UNUSED, size_t size) +{ + return xmalloc_zero(size); +} + +static void *ucw_zeroed_realloc(struct ucw_allocator *a UNUSED, void *ptr, size_t old_size, size_t new_size) +{ + ptr = xrealloc(ptr, new_size); + if (old_size < new_size) + bzero((byte *) ptr + old_size, new_size - old_size); + return ptr; +} + +struct ucw_allocator ucw_allocator_zeroed = { + .alloc = ucw_zeroed_alloc, + .realloc = ucw_zeroed_realloc, + .free = ucw_std_free, +}; diff --git a/libucw/ucw/alloc.c b/libucw/ucw/alloc.c new file mode 100644 index 0000000..816da2a --- /dev/null +++ b/libucw/ucw/alloc.c @@ -0,0 +1,51 @@ +/* + * UCW Library -- Memory Allocation + * + * (c) 2000 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include + +#include +#include + +void * +xmalloc(size_t size) +{ + void *x = malloc(size); + if (!x) + die("Cannot allocate %zu bytes of memory", size); + return x; +} + +void * +xmalloc_zero(size_t size) +{ + void *x = xmalloc(size); + bzero(x, size); + return x; +} + +void +xfree(void *ptr) +{ + /* + * Maybe it is a little waste of resources to make this a function instead + * of a macro, but xmalloc() is not used for anything critical anyway, + * so let's prefer simplicity. + */ + free(ptr); +} + +void * +xrealloc(void *old, size_t size) +{ + /* We assume that realloc(NULL, x) works like malloc(x), which is true with the glibc. */ + void *x = realloc(old, size); + if (!x && size) + die("Cannot reallocate %zu bytes of memory", size); + return x; +} diff --git a/libucw/ucw/alloc.h b/libucw/ucw/alloc.h new file mode 100644 index 0000000..668c707 --- /dev/null +++ b/libucw/ucw/alloc.h @@ -0,0 +1,36 @@ +/* + * UCW Library -- Generic allocators + * + * (c) 2014 Martin Mares + */ + +#ifndef _UCW_ALLOC_H +#define _UCW_ALLOC_H + +/** + * This structure describes a generic allocator. It provides pointers + * to three functions, which handle the actual (re)allocations. + **/ +struct ucw_allocator { + void * (*alloc)(struct ucw_allocator *alloc, size_t size); + void * (*realloc)(struct ucw_allocator *alloc, void *ptr, size_t old_size, size_t new_size); + void (*free)(struct ucw_allocator *alloc, void *ptr); +}; + +/* alloc-std.c */ + +/** + * [[std]] + * This allocator uses <>, <> and <>. The memory + * it allocates is left unitialized. + **/ +extern struct ucw_allocator ucw_allocator_std; + +/** + * [[zeroing]] + * This allocator uses <>, <> and <>. All memory + * is zeroed upon allocation. + **/ +extern struct ucw_allocator ucw_allocator_zeroed; + +#endif diff --git a/libucw/ucw/alloc_str.c b/libucw/ucw/alloc_str.c new file mode 100644 index 0000000..90bc747 --- /dev/null +++ b/libucw/ucw/alloc_str.c @@ -0,0 +1,21 @@ +/* + * UCW Library -- String Allocation + * + * (c) 1997 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include + +#include + +char * +xstrdup(const char *s) +{ + if (!s) + return NULL; + uint l = strlen(s) + 1; + return memcpy(xmalloc(l), s, l); +} diff --git a/libucw/ucw/asio.c b/libucw/ucw/asio.c new file mode 100644 index 0000000..d20a6d2 --- /dev/null +++ b/libucw/ucw/asio.c @@ -0,0 +1,289 @@ +/* + * UCW Library -- Asynchronous I/O + * + * (c) 2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include +#include + +#include +#include +#include + +static uint asio_num_users; +static struct worker_pool asio_wpool; + +static void +asio_init_unlocked(void) +{ + if (asio_num_users++) + return; + + DBG("ASIO: INIT"); + asio_wpool.num_threads = 1; + worker_pool_init(&asio_wpool); +} + +static void +asio_cleanup_unlocked(void) +{ + if (--asio_num_users) + return; + + DBG("ASIO: CLEANUP"); + worker_pool_cleanup(&asio_wpool); +} + +void +asio_init_queue(struct asio_queue *q) +{ + ucwlib_lock(); + asio_init_unlocked(); + ucwlib_unlock(); + + DBG("ASIO: New queue %p", q); + ASSERT(q->buffer_size); + q->allocated_requests = 0; + q->running_requests = 0; + q->running_writebacks = 0; + q->use_count = 0; + clist_init(&q->idle_list); + clist_init(&q->done_list); + work_queue_init(&asio_wpool, &q->queue); +} + +void +asio_cleanup_queue(struct asio_queue *q) +{ + DBG("ASIO: Removing queue %p", q); + ASSERT(!q->running_requests); + ASSERT(!q->running_writebacks); + ASSERT(!q->allocated_requests); + ASSERT(clist_empty(&q->done_list)); + + struct asio_request *r; + while (r = clist_remove_head(&q->idle_list)) + { + big_free(r->buffer, q->buffer_size); + xfree(r); + } + + work_queue_cleanup(&q->queue); + + ucwlib_lock(); + asio_cleanup_unlocked(); + ucwlib_unlock(); +} + +struct asio_request * +asio_get(struct asio_queue *q) +{ + q->allocated_requests++; + struct asio_request *r = clist_head(&q->idle_list); + if (!r) + { + r = xmalloc_zero(sizeof(*r)); + r->queue = q; + r->buffer = big_alloc(q->buffer_size); + DBG("ASIO: Got %p (new)", r); + } + else + { + clist_remove(&r->work.n); + DBG("ASIO: Got %p", r); + } + r->op = ASIO_FREE; + r->fd = -1; + r->len = 0; + r->status = -1; + r->returned_errno = -1; + r->submitted = 0; + return r; +} + +static int +asio_raw_wait(struct asio_queue *q) +{ + struct asio_request *r = (struct asio_request *) work_wait(&q->queue); + if (!r) + return 0; + r->submitted = 0; + q->running_requests--; + if (r->op == ASIO_WRITE_BACK) + { + DBG("ASIO: Finished writeback %p", r); + if (r->status < 0) + die("Asynchronous write to fd %d failed: %s", r->fd, strerror(r->returned_errno)); + if (r->status != (int)r->len) + die("Asynchronous write to fd %d wrote only %d bytes out of %d", r->fd, r->status, r->len); + q->running_writebacks--; + asio_put(r); + } + else + clist_add_tail(&q->done_list, &r->work.n); + return 1; +} + +static void +asio_handler(struct worker_thread *t UNUSED, struct work *w) +{ + struct asio_request *r = (struct asio_request *) w; + + DBG("ASIO: Servicing %p (%s on fd=%d, len=%d)", r, + (char*[]) { "?", "READ", "WRITE", "WRITEBACK" }[r->op], r->fd, r->len); + errno = 0; + switch (r->op) + { + case ASIO_READ: + r->status = read(r->fd, r->buffer, r->len); + break; + case ASIO_WRITE: + case ASIO_WRITE_BACK: + r->status = write(r->fd, r->buffer, r->len); + break; + default: + die("ASIO: Got unknown request type %d", r->op); + } + r->returned_errno = errno; + DBG("ASIO: Finished %p (status=%d, errno=%d)", r, r->status, r->returned_errno); +} + +void +asio_submit(struct asio_request *r) +{ + struct asio_queue *q = r->queue; + DBG("ASIO: Submitting %p on queue %p", r, q); + ASSERT(r->op != ASIO_FREE); + ASSERT(!r->submitted); + if (r->op == ASIO_WRITE_BACK) + { + while (q->running_writebacks >= q->max_writebacks) + { + DBG("ASIO: Waiting for free writebacks"); + if (!asio_raw_wait(q)) + ASSERT(0); + } + q->running_writebacks++; + } + q->running_requests++; + r->submitted = 1; + r->work.go = asio_handler; + r->work.priority = 0; + work_submit(&q->queue, &r->work); +} + +struct asio_request * +asio_wait(struct asio_queue *q) +{ + struct asio_request *r; + while (!(r = clist_head(&q->done_list))) + { + DBG("ASIO: Waiting on queue %p", q); + if (!asio_raw_wait(q)) + return NULL; + } + clist_remove(&r->work.n); + DBG("ASIO: Done %p", r); + return r; +} + +void +asio_put(struct asio_request *r) +{ + struct asio_queue *q = r->queue; + DBG("ASIO: Put %p", r); + ASSERT(!r->submitted); + ASSERT(q->allocated_requests); + clist_add_tail(&q->idle_list, &r->work.n); + q->allocated_requests--; +} + +void +asio_sync(struct asio_queue *q) +{ + DBG("ASIO: Syncing queue %p", q); + while (q->running_requests) + if (!asio_raw_wait(q)) + ASSERT(0); +} + +#ifdef TEST + +int main(void) +{ + struct asio_queue q; + struct asio_request *r; + + q.buffer_size = 4096; + q.max_writebacks = 2; + asio_init_queue(&q); + +#if 0 + + for (;;) + { + r = asio_get(&q); + r->op = ASIO_READ; + r->fd = 0; + r->len = q.buffer_size; + asio_submit(r); + r = asio_wait(&q); + ASSERT(r); + if (r->status <= 0) + { + asio_put(r); + break; + } + r->op = ASIO_WRITE_BACK; + r->fd = 1; + r->len = r->status; + asio_submit(r); + } + asio_sync(&q); + +#else + + r = asio_get(&q); + r->op = ASIO_READ; + r->fd = 0; + r->len = 1; + asio_submit(r); + r = asio_wait(&q); + ASSERT(r); + asio_put(r); + + for (uint i=0; i<10; i++) + { + r = asio_get(&q); + r->op = ASIO_WRITE_BACK; + r->fd = 1; + r->len = 1; + r->buffer[0] = 'A' + i; + asio_submit(r); + } + asio_sync(&q); + + r = asio_get(&q); + r->op = ASIO_WRITE; + r->fd = 1; + r->len = 1; + r->buffer[0] = '\n'; + asio_submit(r); + r = asio_wait(&q); + ASSERT(r); + asio_put(r); + +#endif + + asio_cleanup_queue(&q); + return 0; +} + +#endif diff --git a/libucw/ucw/asio.h b/libucw/ucw/asio.h new file mode 100644 index 0000000..37ef532 --- /dev/null +++ b/libucw/ucw/asio.h @@ -0,0 +1,80 @@ +/* + * UCW Library -- Asynchronous I/O + * + * (c) 2006 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_ASIO_H +#define _UCW_ASIO_H + +#include +#include + +#ifdef CONFIG_UCW_CLEAN_ABI +#define asio_cleanup_queue ucw_asio_cleanup_queue +#define asio_get ucw_asio_get +#define asio_init_queue ucw_asio_init_queue +#define asio_put ucw_asio_put +#define asio_submit ucw_asio_submit +#define asio_sync ucw_asio_sync +#define asio_wait ucw_asio_wait +#endif + +/* + * This module takes care of scheduling and executing asynchronous I/O requests + * on files opened with O_DIRECT. It is primarily used by the fb-direct fastbuf + * back-end, but you can use it explicitly, too. + * + * You can define several I/O queues, each for use by a single thread. Requests + * on a single queue are always processed in order of their submits, requests + * from different queues may be interleaved (although the current implementation + * does not do so). Normal read and write requests are returned to their queue + * when they are completed. Write-back requests are automatically freed when + * done, but the number of such requests in fly is limited in order to avoid + * consuming all memory, so a submit of a write-back request can block. + */ + +struct asio_queue { + uint buffer_size; // How large buffers do we use [user-settable] + uint max_writebacks; // Maximum number of writeback requests active [user-settable] + uint allocated_requests; + uint running_requests; // Total number of running requests + uint running_writebacks; // How many of them are writebacks + clist idle_list; // Recycled requests waiting for get + clist done_list; // Finished requests + struct work_queue queue; + uint use_count; // For use by the caller +}; + +enum asio_op { + ASIO_FREE, + ASIO_READ, + ASIO_WRITE, + ASIO_WRITE_BACK, // Background write with no success notification +}; + +struct asio_request { + struct work work; // asio_requests are internally just work nodes + struct asio_queue *queue; + byte *buffer; + int fd; + enum asio_op op; + uint len; + int status; + int returned_errno; + int submitted; + void *user_data; // For use by the caller +}; + +void asio_init_queue(struct asio_queue *q); // Initialize a new queue +void asio_cleanup_queue(struct asio_queue *q); +struct asio_request *asio_get(struct asio_queue *q); // Get an empty request +void asio_submit(struct asio_request *r); // Submit the request (can block if too many writebacks) +struct asio_request *asio_wait(struct asio_queue *q); // Wait for the first finished request, NULL if no more +void asio_put(struct asio_request *r); // Return a finished request for recycling +void asio_sync(struct asio_queue *q); // Wait until all requests are finished + +#endif /* !_UCW_ASIO_H */ diff --git a/libucw/ucw/asio.t b/libucw/ucw/asio.t new file mode 100644 index 0000000..a98974b --- /dev/null +++ b/libucw/ucw/asio.t @@ -0,0 +1,4 @@ +# Tests for asynchronous I/O + +Run: echo y | ../obj/ucw/asio-t +Out: ABCDEFGHIJ diff --git a/libucw/ucw/asort-test.c b/libucw/ucw/asort-test.c new file mode 100644 index 0000000..c35db74 --- /dev/null +++ b/libucw/ucw/asort-test.c @@ -0,0 +1,83 @@ +/* + * UCW Library -- Universal Array Sorter Test and Benchmark + * + * (c) 2003 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include + +#include +#include + +#define N 4000037 /* a prime */ + +struct elt { + u32 key; + u32 x, y; +}; + +static struct elt array[N]; + +#define ASORT_KEY_TYPE u32 +#define ASORT_ELT(i) array[i].key +#define ASORT_SWAP(i,j) do { struct elt e=array[j]; array[j]=array[i]; array[i]=e; } while(0) + +static void generate(void) +{ + uint i; + for (i=0; ikey < Y->key) + return -1; + else if (X->key > Y->key) + return 1; + else + return 0; +} + +#define ASORT_PREFIX(x) as_##x +#include + +int main(void) +{ + timestamp_t timer; + + generate(); + init_timer(&timer); + qsort(array, N, sizeof(array[0]), (int (*)(const void *, const void *)) qs_comp); + printf("qsort: %d ms\n", get_timer(&timer)); + check(); + generate(); + init_timer(&timer); + as_sort(N); + printf("asort: %d ms\n", get_timer(&timer)); + check(); + return errors; +} diff --git a/libucw/ucw/asort-test.t b/libucw/ucw/asort-test.t new file mode 100644 index 0000000..5325080 --- /dev/null +++ b/libucw/ucw/asort-test.t @@ -0,0 +1,3 @@ +# Test for the arraysort module + +Run: ../obj/ucw/asort-test diff --git a/libucw/ucw/base224.c b/libucw/ucw/base224.c new file mode 100644 index 0000000..6af7178 --- /dev/null +++ b/libucw/ucw/base224.c @@ -0,0 +1,170 @@ +/* + * UCW Library -- Base 224 Encoding & Decoding + * + * (c) 2002 Martin Mares + * + * The `base-224' encoding transforms general sequences of bytes + * to sequences of non-control 8-bit characters (0x20-0xff). Since + * 224 and 256 are incompatible bases (there is no k,l: 224^k=256^l) + * and we want to avoid lengthy calculations, we cheat a bit: + * + * Each base-224 digit can be represented as a (base-7 digit, base-32 digit) + * pair, so we pass the lower 5 bits directly and use a base-7 encoder + * for the upper part. We process blocks of 39 bits and encode them + * to 5 base-224 digits: we take 5x5 bits as the lower halves and convert + * the remaining 14 bits in base-7 (2^14 = 16384 < 16807 = 7^5) to get + * the 7 upper parts we need (with a little redundancy). Little endian + * ordering is used to make handling of partial blocks easy. + * + * We transform 39 source bits to 40 destination bits, stretching the data + * by 1/39 = approx. 2.56%. + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include + +static void +encode_block(byte *w, u32 hi, u32 lo) +{ + uint x, y; + + /* + * Splitting of the 39-bit block: [a-e][0-5] are the base-32 digits, *'s are used for base-7. + * +----------------+----------------+----------------+----------------+----------------+ + * +00******e4e3e2e1|e0******d4d3d2d1|d0******c4c3c2c1|c0******b4b3b2b1|b0****a4a3a2a1a0| + * +----------------+----------------+----------------+----------------+----------------+ + */ + + w[0] = lo & 0x1f; + w[1] = (lo >> 7) & 0x1f; + w[2] = (lo >> 15) & 0x1f; + w[3] = (lo >> 23) & 0x1f; + w[4] = (lo >> 31) | ((hi << 1) & 0x1e); + x = (lo >> 5) & 0x0003 + | (lo >> 10) & 0x001c + | (lo >> 15) & 0x00e0 + | (lo >> 20) & 0x0700 + | (hi << 7) & 0x3800; + DBG("<<< h=%08x l=%08x x=%d", hi, lo, x); + for (y=0; y<5; y++) + { + w[y] += 0x20 + ((x % 7) << 5); + x /= 7; + } +} + +uint +base224_encode(byte *dest, const byte *src, uint len) +{ + u32 lo=0, hi=0; /* 64-bit buffer accumulating input bits */ + uint i=0; /* How many source bits do we have buffered */ + u32 x; + byte *w=dest; + + while (len--) + { + x = *src++; + if (i < 32) + { + lo |= x << i; + if (i > 24) + hi |= x >> (32-i); + } + else + hi |= x << (i-32); + i += 8; + if (i >= 39) + { + encode_block(w, hi, lo); + w += 5; + lo = hi >> 7; + hi = 0; + i -= 39; + } + } + if (i) /* Partial block */ + { + encode_block(w, hi, lo); + w += (i+8)/8; /* Just check logarithms if you want to understand */ + } + return w - dest; +} + +uint +base224_decode(byte *dest, const byte *src, uint len) +{ + u32 hi=0, lo=0; /* 64-bit buffer accumulating output bits */ + uint i=0; /* How many bits do we have accumulated */ + u32 h, l; /* Decoding of the current block */ + uint x; /* base-7 part of the current block */ + uint len0; + byte *start = dest; + + do + { + if (!len) + break; + len0 = len; + + ASSERT(*src >= 0x20); /* byte 0 */ + h = 0; + l = *src & 0x1f; + x = (*src++ >> 5) - 1; + if (!--len) + goto blockend; + + ASSERT(*src >= 0x20); /* byte 1 */ + l |= (*src & 0x1f) << 7; + x += ((*src++ >> 5) - 1) * 7; + if (!--len) + goto blockend; + + ASSERT(*src >= 0x20); /* byte 2 */ + l |= (*src & 0x1f) << 15; + x += ((*src++ >> 5) - 1) * 7*7; + if (!--len) + goto blockend; + + ASSERT(*src >= 0x20); /* byte 3 */ + l |= (*src & 0x1f) << 23; + x += ((*src++ >> 5) - 1) * 7*7*7; + if (!--len) + goto blockend; + + ASSERT(*src >= 0x20); /* byte 4 */ + l |= *src << 31; + h = (*src & 0x1f) >> 1; + x += ((*src++ >> 5) - 1) * 7*7*7*7; + --len; + + blockend: + len0 -= len; + l |= ((x & 0x0003) << 5) /* Decode base-7 */ + | ((x & 0x001c) << 10) + | ((x & 0x00e0) << 15) + | ((x & 0x0700) << 20); + h |= (x & 0x3800) >> 7; + + DBG("<<< i=%d h=%08x l=%08x x=%d len0=%d", i, h, l, x, len0); + lo |= l << i; + hi |= h << i; + if (i) + hi |= l >> (32-i); + i += len0*8 - 1; + + while (i >= 8) + { + *dest++ = lo; + lo = (lo >> 8U) | (hi << 24); + hi >>= 8; + i -= 8; + } + } + while (len0 == 5); + return dest-start; +} diff --git a/libucw/ucw/base224.h b/libucw/ucw/base224.h new file mode 100644 index 0000000..0dd2cab --- /dev/null +++ b/libucw/ucw/base224.h @@ -0,0 +1,41 @@ +/* + * UCW Library -- Base 224 Encoding & Decoding + * + * (c) 2002 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifdef CONFIG_UCW_CLEAN_ABI +#define base224_decode ucw_base224_decode +#define base224_encode ucw_base224_encode +#endif + +/** + * Encodes @len bytes of data pointed to by @src by base224 encoding. + * Stores them in @dest and returns the number of bytes the output + * takes. + */ +uint base224_encode(byte *dest, const byte *src, uint len); +/** + * Decodes @len bytes of data pointed to by @src from base224 encoding. + * All invalid characters are ignored. The result is stored into @dest + * and length of the result is returned. + */ +uint base224_decode(byte *dest, const byte *src, uint len); + +/** + * Use this macro to calculate @base224_encode() output buffer size. + * It can happen 4 more bytes would be needed, this macro takes care + * of that. + */ +#define BASE224_ENC_LENGTH(x) (((x)*8+38)/39*5) + +/* + * When called for BASE224_IN_CHUNK-byte chunks, the result will be + * always BASE224_OUT_CHUNK bytes long. If a longer block is split + * to such chunks, the result will be identical. + */ +#define BASE224_IN_CHUNK 39 /** Chunk size on the un-encoded side. **/ +#define BASE224_OUT_CHUNK 40 /** Chunk size on the encoded side. **/ diff --git a/libucw/ucw/base64.c b/libucw/ucw/base64.c new file mode 100644 index 0000000..8d8f837 --- /dev/null +++ b/libucw/ucw/base64.c @@ -0,0 +1,127 @@ +/* + * UCW Library -- Base 64 Encoding & Decoding + * + * (c) 2002, Robert Spalek + * (c) 2018, Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#undef LOCAL_DEBUG + +#include +#include + +const byte base64_enc_table[65] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +const byte base64_dec_table[256] = { + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x3e, 0x80, 0x80, 0x80, 0x3f, + 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x80, 0x80, 0x80, 0x40, 0x80, 0x80, + 0x80, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, + 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, + 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +}; + +uint base64_encode(byte *dest, const byte *src, uint len) +{ + const byte *ptr = src; + const byte *end = src + len; + byte *out = dest; + + /* keep going until we have less than 24 bits */ + if (end - ptr >= 3) + for (const byte *x = end - 2; ptr < x; ) + { + out[0] = base64_enc_table[ptr[0] >> 2]; + out[1] = base64_enc_table[((ptr[0] & 0x03) << 4) + (ptr[1] >> 4)]; + out[2] = base64_enc_table[((ptr[1] & 0x0f) << 2) + (ptr[2] >> 6)]; + out[3] = base64_enc_table[ptr[2] & 0x3f]; + out += 4; + ptr += 3; + } + + /* now deal with the tail end of things */ + if (ptr != end) + { + out[0] = base64_enc_table[ptr[0] >> 2]; + out[3] = BASE64_PADDING; + if (end - ptr >= 2) + { + out[1] = base64_enc_table[((ptr[0] & 0x03) << 4) + (ptr[1] >> 4)]; + out[2] = base64_enc_table[(ptr[1] & 0x0f) << 2]; + } + else + { + out[1] = base64_enc_table[(ptr[0] & 0x03) << 4]; + out[2] = BASE64_PADDING; + } + out += 4; + } + + return out - dest; +} + +uint base64_decode(byte *dest, const byte *src, uint len) +{ + const byte *ptr = src; + const byte *end = src + len; + byte *out = dest; + while (1) + { + uint val, ch; + do + { + if (ptr == end || (ch = base64_dec_table[*ptr++]) == BASE64_DEC_PADDING) + goto end; + } + while (ch > BASE64_DEC_PADDING); + val = ch; + do + { + if (ptr == end || (ch = base64_dec_table[*ptr++]) == BASE64_DEC_PADDING) + goto end; // Broken base64 encoding, we only have 6 bits + } + while (ch > BASE64_DEC_PADDING); + val = (val << 6) | ch; + do + { + if (ptr == end || (ch = base64_dec_table[*ptr++]) == BASE64_DEC_PADDING) + { + out[0] = val >> 4; + out += 1; + goto end; + } + } + while (ch > BASE64_DEC_PADDING); + val = (val << 6) | ch; + do + { + if (ptr == end || (ch = base64_dec_table[*ptr++]) == BASE64_DEC_PADDING) + { + out[0] = val >> 10; + out[1] = val >> 2; + out += 2; + goto end; + } + } + while (ch > BASE64_DEC_PADDING); + val = (val << 6) | ch; + out[0] = val >> 16; + out[1] = val >> 8; + out[2] = val; + out += 3; + } +end: + return out - dest; +} diff --git a/libucw/ucw/base64.h b/libucw/ucw/base64.h new file mode 100644 index 0000000..a8edc36 --- /dev/null +++ b/libucw/ucw/base64.h @@ -0,0 +1,63 @@ +/* + * UCW Library -- Base 64 Encoding & Decoding + * + * (c) 2002, Robert Spalek + * (c) 2017, Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifdef CONFIG_UCW_CLEAN_ABI +#define base64_decode ucw_base64_decode +#define base64_encode ucw_base64_encode +#define base64_enc_table ucw_base64_enc_table +#define base64_dec_table ucw_base64_dec_table +#endif + +/** + * Encodes @len bytes of data pointed to by @src by base64 encoding. + * Stores them in @dest and returns the number of bytes the output + * takes. + */ +uint base64_encode(byte *dest, const byte *src, uint len); +/** + * Decodes @len bytes of data pointed to by @src from base64 encoding. + * All invalid characters are ignored. The result is stored into @dest + * and length of the result is returned. It is allowed to use the + * same buffer for both input and output. + */ +uint base64_decode(byte *dest, const byte *src, uint len); + +/** + * Use this macro to calculate @base64_encode() output buffer size. + */ +#define BASE64_ENC_LENGTH(x) (((x)+2)/3 *4) + +/* + * When called for BASE64_IN_CHUNK-byte chunks, the result will be + * always BASE64_OUT_CHUNK bytes long. If a longer block is split + * to such chunks, the result will be identical. + */ +#define BASE64_IN_CHUNK 3 /** Size of chunk on the un-encoded side. **/ +#define BASE64_OUT_CHUNK 4 /** Size of chunk on the encoded side. **/ + +/* + * Lookup table for fast encoding. + * For each 6bit value contains corresponding base64 character. + */ +extern const byte base64_enc_table[65]; +#define BASE64_PADDING '=' /* Padding character */ + +/* + * Lookup table for fast decoding: + * -- for valid base64 characters contains their 6bit values + * -- for BASE64_PADDING character contains special value BASE64_DEC_PADDING + * -- for all other characters contains BASE64_DEC_INVALID + * + * Note that BASE64_DEC_INVALID is greater than BASE64_DEC_PADDING + * (can be useful to know for some optimizations). + */ +extern const byte base64_dec_table[256]; +#define BASE64_DEC_PADDING 0x40 +#define BASE64_DEC_INVALID 0x80 diff --git a/libucw/ucw/bbuf.c b/libucw/ucw/bbuf.c new file mode 100644 index 0000000..36ece2a --- /dev/null +++ b/libucw/ucw/bbuf.c @@ -0,0 +1,86 @@ +/* + * UCW Library -- A simple growing buffers for byte-sized items + * + * (c) 2006 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include + +#include + +char * +bb_vprintf_at(bb_t *bb, size_t ofs, const char *fmt, va_list args) +{ + bb_grow(bb, ofs + 1); + va_list args2; + va_copy(args2, args); + int cnt = vsnprintf(bb->ptr + ofs, bb->len - ofs, fmt, args2); + va_end(args2); + if (cnt < 0) + { + /* Our C library doesn't support C99 return value of vsnprintf, so we need to iterate */ + do + { + bb_do_grow(bb, bb->len + 1); + va_copy(args2, args); + cnt = vsnprintf(bb->ptr + ofs, bb->len - ofs, fmt, args2); + va_end(args2); + } + while (cnt < 0); + } + else if ((uint)cnt >= bb->len - ofs) + { + bb_do_grow(bb, ofs + cnt + 1); + va_copy(args2, args); + int cnt2 = vsnprintf(bb->ptr + ofs, bb->len - ofs, fmt, args2); + va_end(args2); + ASSERT(cnt2 == cnt); + } + return bb->ptr + ofs; +} + +char * +bb_printf_at(bb_t *bb, size_t ofs, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + char *res = bb_vprintf_at(bb, ofs, fmt, args); + va_end(args); + return res; +} + +char * +bb_vprintf(bb_t *bb, const char *fmt, va_list args) +{ + return bb_vprintf_at(bb, 0, fmt, args); +} + +char * +bb_printf(bb_t *bb, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + char *res = bb_vprintf_at(bb, 0, fmt, args); + va_end(args); + return res; +} + +#ifdef TEST + +int main(void) +{ + bb_t bb; + bb_init(&bb); + char *x = bb_printf(&bb, "", "World"); + fputs(x, stdout); + x = bb_printf_at(&bb, 5, "\n", "World"); + fputs(x, stdout); + bb_done(&bb); + return 0; +} + +#endif diff --git a/libucw/ucw/bbuf.h b/libucw/ucw/bbuf.h new file mode 100644 index 0000000..0cbccbc --- /dev/null +++ b/libucw/ucw/bbuf.h @@ -0,0 +1,56 @@ +/* + * UCW Library -- A simple growing buffer for byte-sized items. + * + * (c) 2004 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_BBUF_H +#define _UCW_BBUF_H + +#ifdef CONFIG_UCW_CLEAN_ABI +#define bb_printf ucw_bb_printf +#define bb_printf_at ucw_bb_printf_at +#define bb_vprintf ucw_bb_vprintf +#define bb_vprintf_at ucw_bb_vprintf_at +#endif + +#define GBUF_TYPE byte +#define GBUF_PREFIX(x) bb_##x +#include + +/** + * printf() into a growing buffer with `va_list` arguments. + * Generates a `'\0'`-terminated string at the beginning of the buffer + * and returns pointer to it. + * + * See @bb_printf(). + **/ +char *bb_vprintf(bb_t *bb, const char *fmt, va_list args); + +/** + * printf() into a growing buffer. + * Generates a `'\0'`-terminated string at the beginning of the buffer + * and returns pointer to it. + * + * See @bb_vprintf(). + **/ +char *bb_printf(bb_t *bb, const char *fmt, ...); + +/** + * Like @bb_vprintf(), but it does not start at the beginning of the + * buffer, but @ofs bytes further. + * + * Returns pointer to the new string (eg. @ofs bytes after the + * beginning of buffer). + **/ +char *bb_vprintf_at(bb_t *bb, size_t ofs, const char *fmt, va_list args); + +/** + * Like @bb_vprintf_at(), but it takes individual arguments. + **/ +char *bb_printf_at(bb_t *bb, size_t ofs, const char *fmt, ...); + +#endif diff --git a/libucw/ucw/bbuf.t b/libucw/ucw/bbuf.t new file mode 100644 index 0000000..ca8f64f --- /dev/null +++ b/libucw/ucw/bbuf.t @@ -0,0 +1,4 @@ +# Tests for growing buffers + +Run: ../obj/ucw/bbuf-t +Out: diff --git a/libucw/ucw/bigalloc.c b/libucw/ucw/bigalloc.c new file mode 100644 index 0000000..0044846 --- /dev/null +++ b/libucw/ucw/bigalloc.c @@ -0,0 +1,113 @@ +/* + * UCW Library -- Allocation of Large Aligned Buffers + * + * (c) 2006--2007 Martin Mares + * (c) 2007 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include + +#include +#include +#include + +void * +page_alloc(u64 len) +{ + if (!len) + return NULL; + if (len > SIZE_MAX) + die("page_alloc: Size %llu is too large for the current architecture", (long long) len); + ASSERT(!(len & (CPU_PAGE_SIZE-1))); + byte *p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (p == (byte*) MAP_FAILED) + die("Cannot mmap %llu bytes of memory: %m", (long long)len); + return p; +} + +void * +page_alloc_zero(u64 len) +{ + void *p = page_alloc(len); + bzero(p, len); + return p; +} + +void +page_free(void *start, u64 len) +{ + ASSERT(!(len & (CPU_PAGE_SIZE-1))); + ASSERT(!((uintptr_t) start & (CPU_PAGE_SIZE-1))); + munmap(start, len); +} + +void * +page_realloc(void *start, u64 old_len, u64 new_len) +{ + void *p = page_alloc(new_len); + memcpy(p, start, MIN(old_len, new_len)); + page_free(start, old_len); + return p; +} + +static u64 +big_round(u64 len) +{ + return ALIGN_TO(len, (u64)CPU_PAGE_SIZE); +} + +void * +big_alloc(u64 len) +{ + u64 l = big_round(len); + if (l > SIZE_MAX - 2*CPU_PAGE_SIZE) + die("big_alloc: Size %llu is too large for the current architecture", (long long) len); +#ifdef CONFIG_UCW_DEBUG + l += 2*CPU_PAGE_SIZE; +#endif + byte *p = page_alloc(l); +#ifdef CONFIG_UCW_DEBUG + *(u64*)p = len; + mprotect(p, CPU_PAGE_SIZE, PROT_NONE); + mprotect(p+l-CPU_PAGE_SIZE, CPU_PAGE_SIZE, PROT_NONE); + p += CPU_PAGE_SIZE; +#endif + return p; +} + +void * +big_alloc_zero(u64 len) +{ + void *p = big_alloc(len); + bzero(p, big_round(len)); + return p; +} + +void +big_free(void *start, u64 len) +{ + byte *p = start; + u64 l = big_round(len); +#ifdef CONFIG_UCW_DEBUG + p -= CPU_PAGE_SIZE; + mprotect(p, CPU_PAGE_SIZE, PROT_READ); + ASSERT(*(u64*)p == len); + l += 2*CPU_PAGE_SIZE; +#endif + page_free(p, l); +} + +#ifdef TEST + +int main(void) +{ + byte *p = big_alloc(123456); + // p[-1] = 1; + big_free(p, 123456); + return 0; +} + +#endif diff --git a/libucw/ucw/binheap-node.h b/libucw/ucw/binheap-node.h new file mode 100644 index 0000000..c0cb16b --- /dev/null +++ b/libucw/ucw/binheap-node.h @@ -0,0 +1,36 @@ +/* + * UCW Library -- Binomial Heaps: Declarations + * + * (c) 2003 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_BINHEAP_NODE_H +#define _UCW_BINHEAP_NODE_H + +/*** + * [[common]] + * Common definitions + * ------------------ + ***/ + +/** + * Common header of binomial heap nodes. + **/ +struct bh_node { + struct bh_node *first_son; + struct bh_node *last_son; + struct bh_node *next_sibling; + byte order; +}; + +/** + * A binomial heap. + **/ +struct bh_heap { + struct bh_node root; +}; + +#endif diff --git a/libucw/ucw/binheap-test.c b/libucw/ucw/binheap-test.c new file mode 100644 index 0000000..8d95367 --- /dev/null +++ b/libucw/ucw/binheap-test.c @@ -0,0 +1,94 @@ +/* + * UCW Library -- Binomial Heaps: Testing + * + * (c) 2003 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include + +#include +#include + +#define BH_PREFIX(x) bht_##x +#define BH_WANT_INSERT +#define BH_WANT_FINDMIN +#define BH_WANT_DELETEMIN +#include + +struct item { + struct bh_node n; + uint key; +}; + +static inline uint bht_key(struct bh_node *n) +{ + return ((struct item *)n)->key; +} + +static inline uint bht_less(struct bh_node *a, struct bh_node *b) +{ + return bht_key(a) < bht_key(b); +} + +static void +bht_do_dump(struct bh_node *a, struct bh_node *expected_last, uint offset) +{ + if (!a) + return; + printf("%*s", offset, ""); + printf("[%d](%d)%s\n", a->order, bht_key(a), a == expected_last ? " L" : ""); + for (struct bh_node *b=a->first_son; b; b=b->next_sibling) + bht_do_dump(b, a->last_son, offset+1); +} + +static void +bht_dump(struct bh_heap *h) +{ + printf("root\n"); + for (struct bh_node *b=h->root.first_son; b; b=b->next_sibling) + bht_do_dump(b, b->last_son, 1); +} + +#include + +int main(void) +{ + uint i; + struct bh_heap h; +#define N 1048576 +#define K(i) ((259309*i+1009)%N) + + bht_init(&h); + + for (i=0; ikey = K(i); + // printf("Insert %d\n", a->key); + bht_insert(&h, &a->n); + // bht_dump(&h); + } + // bht_dump(&h); + ASSERT(bht_key(bht_findmin(&h)) == 0); + uint cnt = 0; + BH_FOR_ALL(bht_, &h, a) + { + cnt++; + } + BH_END_FOR; + printf("cnt=%d\n", cnt); + ASSERT(cnt == N); + for (i=0; ikey); + ASSERT(a->key == i); + // bht_dump(&h); + } + bht_dump(&h); + + return 0; +} diff --git a/libucw/ucw/binheap-test.t b/libucw/ucw/binheap-test.t new file mode 100644 index 0000000..109d6e9 --- /dev/null +++ b/libucw/ucw/binheap-test.t @@ -0,0 +1,5 @@ +# Test for the binheap module + +Run: ../obj/ucw/binheap-test +Out: cnt=1048576 + root diff --git a/libucw/ucw/binheap.h b/libucw/ucw/binheap.h new file mode 100644 index 0000000..f367899 --- /dev/null +++ b/libucw/ucw/binheap.h @@ -0,0 +1,208 @@ +/* + * UCW Library -- Binomial Heaps + * + * (c) 2003 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +/* + * This is a generic implementation of Binomial Heaps. Each time you include + * this file with parameters set in the corresponding preprocessor macros + * as described below, it generates functions for manipulating the particular + * version of the binomial heap. + */ + +/*** + * [[generator]] + * Interface to the generator + * -------------------------- + * + * To use the binomial heaps, you need to specify: + * + * - `BH_PREFIX(x)` -- macro to add a name prefix (used on all global names + * defined by the generator). All further names mentioned + * here except for macro names will be implicitly prefixed. + * + * Then you continue by including `ucw/binheap-node.h` which defines <> + * and <> (both without prefix). The heap elements are always allocated by + * you and they must include `struct bh_node` which serves as a handle used for all + * the heap functions and it contains all information needed for heap-keeping. + * The heap itself is also allocated by you and it's represented by `struct bh_heap`. + * + * When you have the declaration of heap nodes, you continue with defining: + * + * - `less(p,q)` -- returns `1` if the key corresponding to `bh_node *p` + * is less than the one corresponding to `*q`. + * + * Then specify what operations you request: + * + * - `init(heap\*)` -- initialize the heap (always defined). + * - `insert(heap\*, node\*)` -- insert the node to the heap (`BH_WANT_INSERT`). + * - `node\* findmin(heap\*)` -- find node with minimum key (`BH_WANT_FINDMIN`). + * - `node\* deletemin(heap\*)` -- findmin and delete the node (`BH_WANT_DELETEMIN`). + * + * Then include `ucw/binheap.h` and voila, you have a binomial heap + * suiting all your needs (at least those which you've revealed :) ). + * + * You also get a iterator macro at no extra charge: + * + * BH_FOR_ALL(bh_prefix, heap*, variable) + * { + * // node* variable gets declared automatically + * do_something_with_node(variable); + * // use BH_BREAK and BH_CONTINUE instead of break and continue + * // you must not alter contents of the binomial heap here + * } + * BH_END_FOR; + * + * After including this file, all parameter macros are automatically undef'd. + ***/ + +#define BH_NODE struct bh_node +#define BH_HEAP struct bh_heap + +static void +BH_PREFIX(merge)(BH_NODE *a, BH_NODE *b) +{ + BH_NODE **pp = &a->first_son; + BH_NODE *q = b->first_son; + BH_NODE *p, *r, *s; + + while ((p = *pp) && q) + { + /* p,q are the next nodes of a,b; pp points to where p is linked */ + if (p->order < q->order) /* p is smaller => skip it */ + pp = &p->next_sibling; + else if (p->order > q->order) /* q is smaller => insert it before p */ + { + r = q; + q = q->next_sibling; + r->next_sibling = p; + *pp = r; + pp = &r->next_sibling; + } + else /* p and q are of the same order => need to merge them */ + { + if (BH_PREFIX(less)(p, q)) /* we'll hang r below s */ + { + r = q; + s = p; + } + else + { + r = p; + s = q; + } + *pp = p->next_sibling; /* unlink p,q from their lists */ + q = q->next_sibling; + + if (s->last_son) /* merge r to s, increasing order */ + s->last_son->next_sibling = r; + else + s->first_son = r; + s->last_son = r; + s->order++; + r->next_sibling = NULL; + + if (!q || q->order > s->order) /* put the result into the b's list if possible */ + { + s->next_sibling = q; + q = s; + } + else /* otherwise put the result to the a's list */ + { + p = s->next_sibling = *pp; + *pp = s; + if (p && p->order == s->order) /* 3-collision */ + pp = &s->next_sibling; + } + } + } + if (!p) + *pp = q; +} + +#ifdef BH_WANT_INSERT +static void +BH_PREFIX(insert)(BH_HEAP *heap, BH_NODE *a) +{ + BH_NODE sh; + + sh.first_son = a; + a->first_son = a->last_son = a->next_sibling = NULL; + a->order = 0; + BH_PREFIX(merge)(&heap->root, &sh); +} +#endif + +#ifdef BH_WANT_FINDMIN +static BH_NODE * +BH_PREFIX(findmin)(BH_HEAP *heap) +{ + BH_NODE *p, *best; + + best = NULL; + for (p=heap->root.first_son; p; p=p->next_sibling) + if (!best || BH_PREFIX(less)(p, best)) + best = p; + return best; +} +#endif + +#ifdef BH_WANT_DELETEMIN +static BH_NODE * +BH_PREFIX(deletemin)(BH_HEAP *heap) +{ + BH_NODE *p, **pp, **bestp; + + bestp = NULL; + for (pp=&heap->root.first_son; p=*pp; pp=&p->next_sibling) + if (!bestp || BH_PREFIX(less)(p, *bestp)) + bestp = pp; + if (!bestp) + return NULL; + + p = *bestp; + *bestp = p->next_sibling; + BH_PREFIX(merge)(&heap->root, p); + return p; +} +#endif + +static inline void +BH_PREFIX(init)(BH_HEAP *heap) +{ + bzero(heap, sizeof(*heap)); +} + +#ifndef BH_FOR_ALL + +#define BH_FOR_ALL(bh_px, bh_heap, bh_var) \ +do { \ + struct bh_node *bh_stack[32]; \ + uint bh_sp = 0; \ + if (bh_stack[0] = (bh_heap)->root.first_son) \ + bh_sp++; \ + while (bh_sp) { \ + struct bh_node *bh_var = bh_stack[--bh_sp]; \ + if (bh_var->next_sibling) \ + bh_stack[bh_sp++] = bh_var->next_sibling; \ + if (bh_var->first_son) \ + bh_stack[bh_sp++] = bh_var->first_son; +#define BH_END_FOR \ + } \ +} while (0) + +#define BH_BREAK { bh_sp=0; break; } +#define BH_CONTINUE continue + +#endif + +#undef BH_PREFIX +#undef BH_NODE +#undef BH_HEAP +#undef BH_WANT_INSERT +#undef BH_WANT_FINDMIN +#undef BH_WANT_DELETEMIN diff --git a/libucw/ucw/binsearch.h b/libucw/ucw/binsearch.h new file mode 100644 index 0000000..99bf33a --- /dev/null +++ b/libucw/ucw/binsearch.h @@ -0,0 +1,48 @@ +/* + * UCW Library -- Generic Binary Search + * + * (c) 2005 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +/*** + * [[defs]] + * Definitions + * ----------- + ***/ + +/** + * Find the first element not lower than @x in the sorted array @ary of @N elements (non-decreasing order). + * Returns the index of the found element or @N if no exists. Uses `ary_lt_x(ary,i,x)` to compare the @i'th element with @x. + * The time complexity is `O(log(N))`. + **/ +#define BIN_SEARCH_FIRST_GE_CMP(ary,N,x,ary_lt_x) ({ \ + uint l = 0, r = (N); \ + while (l < r) \ + { \ + uint m = (l+r)/2; \ + if (ary_lt_x(ary,m,x)) \ + l = m+1; \ + else \ + r = m; \ + } \ + l; \ +}) + +/** + * The default comparision macro for @BIN_SEARCH_FIRST_GE_CMP(). + **/ +#define ARY_LT_NUM(ary,i,x) (ary)[i] < (x) + +/** + * Same as @BIN_SEARCH_FIRST_GE_CMP(), but uses the default `<` operator for comparisions. + **/ +#define BIN_SEARCH_FIRST_GE(ary,N,x) BIN_SEARCH_FIRST_GE_CMP(ary,N,x,ARY_LT_NUM) + +/** + * Search the sorted array @ary of @N elements (non-decreasing) for the first occurence of @x. + * Returns the index or -1 if no such element exists. Uses the `<` operator for comparisions. + **/ +#define BIN_SEARCH_EQ(ary,N,x) ({ int i = BIN_SEARCH_FIRST_GE(ary,N,x); if (i >= (N) || (ary)[i] != (x)) i=-1; i; }) diff --git a/libucw/ucw/bit-array.c b/libucw/ucw/bit-array.c new file mode 100644 index 0000000..ff8d473 --- /dev/null +++ b/libucw/ucw/bit-array.c @@ -0,0 +1,59 @@ +/* + * UCW Library -- Support routines for bitarray + * + * (c) 2012 Pavel Charvat + * (c) 2013 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include + +uint bit_array_count_bits(bitarray_t a, uint n) +{ + uint m = 0; + n = BIT_ARRAY_WORDS(n); + while (n--) + m += bit_count(*a++); + return m; +} + +bitarray_t bit_array_xrealloc(bitarray_t a, uint old_n, uint new_n) +{ + uint old_bytes = BIT_ARRAY_BYTES(old_n); + uint new_bytes = BIT_ARRAY_BYTES(new_n); + if (old_bytes == new_bytes) + return a; + a = xrealloc(a, new_bytes); + if (old_bytes < new_bytes) + bzero(a + old_bytes, new_bytes - old_bytes); + return a; +} + +#ifdef TEST + +#include +#include + +int main(void) +{ + char buf[1024]; + bitarray_t a = alloca(BIT_ARRAY_BYTES(sizeof(buf))); + while (1) + { + if (!fgets(buf, sizeof(buf), stdin)) + return 0; + uint n; + for (n = 0; buf[n] == '0' || buf[n] == '1'; n++); + bit_array_zero(a, n); + for (uint i = 0; i < n; i++) + if (buf[i] == '1') + bit_array_set(a, i); + printf("%u\n", bit_array_count_bits(a, n)); + } +} + +#endif diff --git a/libucw/ucw/bit-ffs.c b/libucw/ucw/bit-ffs.c new file mode 100644 index 0000000..88f21e0 --- /dev/null +++ b/libucw/ucw/bit-ffs.c @@ -0,0 +1,46 @@ +/* + * UCW Library -- Find Lowest Set Bit + * + * (c) 2005 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include + +/* Just a table, the rest is in bitops.h */ + +const byte ffs_table[] = { + 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +}; + +#ifdef TEST + +#include + +int main(void) +{ + uint i; + while (scanf("%x", &i) == 1) + printf("%d\n", bit_ffs(i)); + return 0; +} + +#endif diff --git a/libucw/ucw/bit-fls.c b/libucw/ucw/bit-fls.c new file mode 100644 index 0000000..4100256 --- /dev/null +++ b/libucw/ucw/bit-fls.c @@ -0,0 +1,42 @@ +/* + * UCW Library -- Find Highest Set Bit + * + * (c) 1997-2005 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include + +int +bit_fls(u32 x) +{ + uint l; + + if (!x) + return -1; + + l = 0; + if (x & 0xffff0000) { l += 16; x &= 0xffff0000; } + if (x & 0xff00ff00) { l += 8; x &= 0xff00ff00; } + if (x & 0xf0f0f0f0) { l += 4; x &= 0xf0f0f0f0; } + if (x & 0xcccccccc) { l += 2; x &= 0xcccccccc; } + if (x & 0xaaaaaaaa) l++; + return l; +} + +#ifdef TEST + +#include + +int main(void) +{ + uint i; + while (scanf("%x", &i) == 1) + printf("%d\n", bit_fls(i)); + return 0; +} + +#endif diff --git a/libucw/ucw/bitarray.h b/libucw/ucw/bitarray.h new file mode 100644 index 0000000..78ee9d9 --- /dev/null +++ b/libucw/ucw/bitarray.h @@ -0,0 +1,110 @@ +/* + * UCW Library -- Bit Array Operations + * + * (c) 2003--2006 Martin Mares + * (c) 2012 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_BITARRAY_H +#define _UCW_BITARRAY_H + +#include + +#ifdef CONFIG_UCW_CLEAN_ABI +#define bit_array_count_bits ucw_bit_array_count_bits +#define bit_array_xrealloc ucw_bit_array_xrealloc +#endif + +typedef u32 *bitarray_t; // Must be initialized by bit_array_xmalloc(), bit_array_zero() or bit_array_set_all() + +#define BIT_ARRAY_WORDS(n) (((n)+31)/32) +#define BIT_ARRAY_BYTES(n) (4*BIT_ARRAY_WORDS(n)) +#define BIT_ARRAY(name,size) u32 name[BIT_ARRAY_WORDS(size)] + +static inline bitarray_t bit_array_xmalloc(uint n) +{ + return xmalloc(BIT_ARRAY_BYTES(n)); +} + +bitarray_t bit_array_xrealloc(bitarray_t a, uint old_n, uint new_n); + +static inline bitarray_t bit_array_xmalloc_zero(uint n) +{ + return xmalloc_zero(BIT_ARRAY_BYTES(n)); +} + +static inline void bit_array_zero(bitarray_t a, uint n) +{ + bzero(a, BIT_ARRAY_BYTES(n)); +} + +static inline void bit_array_set_all(bitarray_t a, uint n) +{ + uint w = n / 32; + memset(a, 255, w * 4); + uint m = n & 31; + if (m) + a[w] = (1U << m) - 1; +} + +static inline void bit_array_set(bitarray_t a, uint i) +{ + a[i/32] |= (1 << (i%32)); +} + +static inline void bit_array_clear(bitarray_t a, uint i) +{ + a[i/32] &= ~(1 << (i%32)); +} + +static inline void bit_array_assign(bitarray_t a, uint i, uint x) +{ + if (x) + bit_array_set(a, i); + else + bit_array_clear(a, i); +} + +static inline uint bit_array_isset(bitarray_t a, uint i) +{ + return a[i/32] & (1 << (i%32)); +} + +static inline uint bit_array_get(bitarray_t a, uint i) +{ + return !! bit_array_isset(a, i); +} + +static inline uint bit_array_test_and_set(bitarray_t a, uint i) +{ + uint t = bit_array_isset(a, i); + bit_array_set(a, i); + return t; +} + +static inline uint bit_array_test_and_clear(bitarray_t a, uint i) +{ + uint t = bit_array_isset(a, i); + bit_array_clear(a, i); + return t; +} + +uint bit_array_count_bits(bitarray_t a, uint n); + +/* Iterate over all set bits */ +#define BIT_ARRAY_FISH_BITS_BEGIN(var,ary,size) \ + for (uint var##_hi=0; var##_hi < BIT_ARRAY_WORDS(size); var##_hi++) \ + { \ + u32 var##_cur = ary[var##_hi]; \ + for (uint var = 32 * var##_hi; var##_cur; var++, var##_cur >>= 1) \ + if (var##_cur & 1) \ + do + +#define BIT_ARRAY_FISH_BITS_END \ + while (0); \ + } + +#endif diff --git a/libucw/ucw/bitops.h b/libucw/ucw/bitops.h new file mode 100644 index 0000000..21f40c4 --- /dev/null +++ b/libucw/ucw/bitops.h @@ -0,0 +1,53 @@ +/* + * UCW Library -- Bit Operations + * + * (c) 2005 Martin Mares + * (c) 2012 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_BITOPS_H +#define _UCW_BITOPS_H + +#ifdef CONFIG_UCW_CLEAN_ABI +#define bit_fls ucw_bit_fls +#define ffs_table ucw_ffs_table +#endif + +/* Find highest bit set (i.e., the floor of the binary logarithm) (bit-fls.c) */ + +int bit_fls(u32 x); /* bit_fls(0)=-1 */ + +/* Find lowest bit set, undefined for zero argument (bit-ffs.c) */ + +extern const byte ffs_table[256]; + +#ifdef __pentium4 /* On other ia32 machines, the C version is faster */ + +static inline uint bit_ffs(uint w) +{ + asm("bsfl %1,%0" :"=r" (w) :"rm" (w)); + return w; +} + +#else + +static inline uint bit_ffs(uint w) +{ + uint b = (w & 0xffff) ? 0 : 16; + b += ((w >> b) & 0xff) ? 0 : 8; + return b + ffs_table[(w >> b) & 0xff]; +} + +#endif + +/* Count the number of bits set */ + +static inline uint bit_count(uint w) +{ + return __builtin_popcount(w); +} + +#endif diff --git a/libucw/ucw/bitops.t b/libucw/ucw/bitops.t new file mode 100644 index 0000000..4224066 --- /dev/null +++ b/libucw/ucw/bitops.t @@ -0,0 +1,53 @@ +# Tests for bitops modules + +Run: ../obj/ucw/bit-ffs-t +In: 1 + 2 + 3 + 4 + 5 + 6 + 12345678 + 23030300 + 23030000 + 23000000 + 40000000 + 80000000 +Out: 0 + 1 + 0 + 2 + 0 + 1 + 3 + 8 + 16 + 24 + 30 + 31 + +Run: ../obj/ucw/bit-fls-t +In: 1 + 2 + 3 + 4 + 5 + 6 + 12345678 + 23030303 + 03030303 + 00030303 + 00000303 + 0fedcba9 +Out: 0 + 1 + 1 + 2 + 2 + 2 + 28 + 29 + 25 + 17 + 9 + 27 diff --git a/libucw/ucw/bitsig.c b/libucw/ucw/bitsig.c new file mode 100644 index 0000000..21215a4 --- /dev/null +++ b/libucw/ucw/bitsig.c @@ -0,0 +1,158 @@ +/* + * UCW Library -- Bit Array Signatures -- A Dubious Detector of Duplicates + * + * (c) 2002 Martin Mares + * + * Greatly inspired by: Faloutsos, C. and Christodoulakis, S.: Signature files + * (An access method for documents and its analytical performance evaluation), + * ACM Trans. Office Inf. Syst., 2(4):267--288, Oct. 1984. + * + * This data structure provides a very compact representation + * of a set of strings with insertion and membership search, + * but with a certain low probability it cheats by incidentally + * reporting a non-member as a member. Generally the larger you + * create the structure, the lower this probability is. + * + * How does it work: the structure is just an array of M bits + * and each possible element is hashed to a set of (at most) L + * bit positions. For each element of the represented set, we + * set its L bits to ones and we report as present all elements + * whose all L bits ar set. + * + * Analysis: Let's assume N items have already been stored and let A + * denote L/M (density of the hash function). The probability that + * a fixed bit of the array is set by any of the N items is + * 1 - (1-1/M)^(NL) = 1 - ((1-1/M)^M)^NA = approx. 1 - e^-NA. + * This is minimized by setting A=(ln 2)/N (try taking derivative). + * Given a non-present item, the probability that all of the bits + * corresponding to this item are set by the other items (that is, + * the structure gives a false answer) is (1-e^-NA)^L = 2^-L. + * Hence, if we want to give false answers with probability less + * than epsilon, we take L := -log_2 epsilon, M := 1.45*N*L. + * + * Example: For a set of 10^7 items with P[error] < 10^-6, we set + * L := 20 and M := 290*10^6 bits = cca 34.5 MB (29 bits per item). + * + * We leave L and an upper bound for N as parameters set during + * creation of the structure. Currently, the structure is limited + * to 4 Gb = 512 MB. + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include + +#include + +struct bitsig { + uint l, m, n, maxn, max_m_mult; + u32 hash[4]; + uint hindex; + byte array[0]; +}; + +struct bitsig * +bitsig_init(uint perrlog, uint maxn) +{ + struct bitsig *b; + u64 m; + uint mbytes; + + m = ((u64) maxn * perrlog * 145 + 99) / 100; + if (m >= (u64) 1 << 32) + die("bitsig_init: bitsig array too large (maximum is 4 Gb)"); + mbytes = (m + 7) >> 3U; + b = xmalloc(sizeof(struct bitsig) + mbytes); + b->l = perrlog; + b->m = m; + b->n = 0; + b->maxn = maxn; + b->max_m_mult = (0xffffffff / m) * m; + bzero(b->array, mbytes); + msg(L_DEBUG, "Initialized bitsig array with l=%d, m=%u (%u KB), expecting %d items", b->l, b->m, (mbytes+1023)/1024, maxn); + return b; +} + +void +bitsig_free(struct bitsig *b) +{ + xfree(b); +} + +static void +bitsig_hash_init(struct bitsig *b, byte *item) +{ + md5_hash_buffer((byte *) b->hash, item, strlen(item)); + b->hindex = 0; +} + +static inline uint +bitsig_hash_bit(struct bitsig *b) +{ + u32 h; + do + { + h = b->hash[b->hindex]; + b->hash[b->hindex] *= 3006477127U; + b->hindex = (b->hindex+1) % 4; + } + while (h >= b->max_m_mult); + return h % b->m; +} + +int +bitsig_member(struct bitsig *b, byte *item) +{ + uint i, bit; + + bitsig_hash_init(b, item); + for (i=0; il; i++) + { + bit = bitsig_hash_bit(b); + if (!(b->array[bit >> 3] & (1 << (bit & 7)))) + return 0; + } + return 1; +} + +int +bitsig_insert(struct bitsig *b, byte *item) +{ + uint i, bit, was; + + bitsig_hash_init(b, item); + was = 1; + for (i=0; il; i++) + { + bit = bitsig_hash_bit(b); + if (!(b->array[bit >> 3] & (1 << (bit & 7)))) + { + was = 0; + b->array[bit >> 3] |= (1 << (bit & 7)); + } + } + if (!was && b->n++ == b->maxn+1) + msg(L_ERROR, "bitsig: Too many items inserted, error rate will be higher than estimated!"); + return was; +} + +#ifdef TEST + +#include +#include + +int main(int argc, char **argv) +{ + struct bitsig *b = bitsig_init(atol(argv[1]), atol(argv[2])); + byte buf[1024]; + + while (fgets(buf, 1024, stdin)) + printf("%d\n", bitsig_insert(b, buf)); + + return 0; +} + +#endif diff --git a/libucw/ucw/bitsig.h b/libucw/ucw/bitsig.h new file mode 100644 index 0000000..556b3c9 --- /dev/null +++ b/libucw/ucw/bitsig.h @@ -0,0 +1,27 @@ +/* + * UCW Library -- Bit Array Signatures -- A Dubious Detector of Duplicates + * + * (c) 2002 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_BITSIG_H +#define _UCW_BITSIG_H + +#ifdef CONFIG_UCW_CLEAN_ABI +#define bitsig_free ucw_bitsig_free +#define bitsig_init ucw_bitsig_init +#define bitsig_insert ucw_bitsig_insert +#define bitsig_member ucw_bitsig_member +#endif + +struct bitsig; + +struct bitsig *bitsig_init(uint perrlog, uint maxn); +void bitsig_free(struct bitsig *b); +int bitsig_member(struct bitsig *b, byte *item); +int bitsig_insert(struct bitsig *b, byte *item); + +#endif diff --git a/libucw/ucw/char-cat.c b/libucw/ucw/char-cat.c new file mode 100644 index 0000000..9ca8e75 --- /dev/null +++ b/libucw/ucw/char-cat.c @@ -0,0 +1,17 @@ +/* + * UCW Library -- Character Classes + * + * (c) 1998--2004 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include + +const byte ucw_c_cat[256] = { +#define CHAR(code,upper,lower,cat) cat, +#include +#undef CHAR +}; diff --git a/libucw/ucw/char-lower.c b/libucw/ucw/char-lower.c new file mode 100644 index 0000000..db97583 --- /dev/null +++ b/libucw/ucw/char-lower.c @@ -0,0 +1,17 @@ +/* + * UCW Library -- Lowercase Map + * + * (c) 1997--2004 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include + +const byte ucw_c_lower[256] = { +#define CHAR(code,upper,lower,cat) lower, +#include +#undef CHAR +}; diff --git a/libucw/ucw/char-map.h b/libucw/ucw/char-map.h new file mode 100644 index 0000000..3e348fc --- /dev/null +++ b/libucw/ucw/char-map.h @@ -0,0 +1,268 @@ +/* + * UCW Library -- Character Code Map (UTF-8 Version) + * + * (c) 1998--2004 Martin Mares + * (c) 2004 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +/* Syntax: CHAR(code, uppercase, lowercase, category) */ + +CHAR(0x00,0x00,0x00,_C_CTRL) // +CHAR(0x01,0x01,0x01,_C_CTRL) // +CHAR(0x02,0x02,0x02,_C_CTRL) // +CHAR(0x03,0x03,0x03,_C_CTRL) // +CHAR(0x04,0x04,0x04,_C_CTRL) // +CHAR(0x05,0x05,0x05,_C_CTRL) // +CHAR(0x06,0x06,0x06,_C_CTRL) // +CHAR(0x07,0x07,0x07,_C_CTRL) // +CHAR(0x08,0x08,0x08,_C_CTRL | _C_BLANK) // +CHAR(0x09,0x09,0x09,_C_CTRL | _C_BLANK | _C_PRINT) // +CHAR(0x0A,0x0A,0x0A,_C_CTRL | _C_BLANK) // +CHAR(0x0B,0x0B,0x0B,_C_CTRL) // +CHAR(0x0C,0x0C,0x0C,_C_CTRL | _C_BLANK) // +CHAR(0x0D,0x0D,0x0D,_C_CTRL | _C_BLANK) // +CHAR(0x0E,0x0E,0x0E,_C_CTRL) // +CHAR(0x0F,0x0F,0x0F,_C_CTRL) // +CHAR(0x10,0x10,0x10,_C_CTRL) // +CHAR(0x11,0x11,0x11,_C_CTRL) // +CHAR(0x12,0x12,0x12,_C_CTRL) // +CHAR(0x13,0x13,0x13,_C_CTRL) // +CHAR(0x14,0x14,0x14,_C_CTRL) // +CHAR(0x15,0x15,0x15,_C_CTRL) // +CHAR(0x16,0x16,0x16,_C_CTRL) // +CHAR(0x17,0x17,0x17,_C_CTRL) // +CHAR(0x18,0x18,0x18,_C_CTRL) // +CHAR(0x19,0x19,0x19,_C_CTRL) // +CHAR(0x1A,0x1A,0x1A,_C_CTRL) // +CHAR(0x1B,0x1B,0x1B,_C_CTRL) // +CHAR(0x1C,0x1C,0x1C,_C_CTRL) // +CHAR(0x1D,0x1D,0x1D,_C_CTRL) // +CHAR(0x1E,0x1E,0x1E,_C_CTRL) // +CHAR(0x1F,0x1F,0x1F,_C_CTRL) // +CHAR(0x20,0x20,0x20,_C_BLANK | _C_PRINT) // SPACE +CHAR(0x21,0x21,0x21,_C_PRINT) // EXCLAMATION MARK +CHAR(0x22,0x22,0x22,_C_PRINT) // QUOTATION MARK +CHAR(0x23,0x23,0x23,_C_PRINT) // NUMBER SIGN +CHAR(0x24,0x24,0x24,_C_PRINT) // DOLLAR SIGN +CHAR(0x25,0x25,0x25,_C_PRINT) // PERCENT SIGN +CHAR(0x26,0x26,0x26,_C_PRINT) // AMPERSAND +CHAR(0x27,0x27,0x27,_C_PRINT) // APOSTROPHE +CHAR(0x28,0x28,0x28,_C_PRINT) // LEFT PARENTHESIS +CHAR(0x29,0x29,0x29,_C_PRINT) // RIGHT PARENTHESIS +CHAR(0x2A,0x2A,0x2A,_C_PRINT) // ASTERISK +CHAR(0x2B,0x2B,0x2B,_C_PRINT) // PLUS SIGN +CHAR(0x2C,0x2C,0x2C,_C_PRINT) // COMMA +CHAR(0x2D,0x2D,0x2D,_C_PRINT) // HYPHEN-MINUS +CHAR(0x2E,0x2E,0x2E,_C_PRINT) // FULL STOP +CHAR(0x2F,0x2F,0x2F,_C_PRINT) // SOLIDUS +CHAR(0x30,0x30,0x30,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT ZERO +CHAR(0x31,0x31,0x31,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT ONE +CHAR(0x32,0x32,0x32,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT TWO +CHAR(0x33,0x33,0x33,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT THREE +CHAR(0x34,0x34,0x34,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT FOUR +CHAR(0x35,0x35,0x35,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT FIVE +CHAR(0x36,0x36,0x36,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT SIX +CHAR(0x37,0x37,0x37,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT SEVEN +CHAR(0x38,0x38,0x38,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT EIGHT +CHAR(0x39,0x39,0x39,_C_DIGIT | _C_XDIGIT | _C_PRINT) // DIGIT NINE +CHAR(0x3A,0x3A,0x3A,_C_PRINT) // COLON +CHAR(0x3B,0x3B,0x3B,_C_PRINT) // SEMICOLON +CHAR(0x3C,0x3C,0x3C,_C_PRINT) // LESS-THAN SIGN +CHAR(0x3D,0x3D,0x3D,_C_PRINT) // EQUALS SIGN +CHAR(0x3E,0x3E,0x3E,_C_PRINT) // GREATER-THAN SIGN +CHAR(0x3F,0x3F,0x3F,_C_PRINT) // QUESTION MARK +CHAR(0x40,0x40,0x40,_C_PRINT) // COMMERCIAL AT +CHAR(0x41,0x41,0x61,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER A +CHAR(0x42,0x42,0x62,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER B +CHAR(0x43,0x43,0x63,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER C +CHAR(0x44,0x44,0x64,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER D +CHAR(0x45,0x45,0x65,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER E +CHAR(0x46,0x46,0x66,_C_UPPER | _C_XDIGIT | _C_PRINT) // LATIN CAPITAL LETTER F +CHAR(0x47,0x47,0x67,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER G +CHAR(0x48,0x48,0x68,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER H +CHAR(0x49,0x49,0x69,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER I +CHAR(0x4A,0x4A,0x6A,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER J +CHAR(0x4B,0x4B,0x6B,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER K +CHAR(0x4C,0x4C,0x6C,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER L +CHAR(0x4D,0x4D,0x6D,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER M +CHAR(0x4E,0x4E,0x6E,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER N +CHAR(0x4F,0x4F,0x6F,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER O +CHAR(0x50,0x50,0x70,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER P +CHAR(0x51,0x51,0x71,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER Q +CHAR(0x52,0x52,0x72,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER R +CHAR(0x53,0x53,0x73,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER S +CHAR(0x54,0x54,0x74,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER T +CHAR(0x55,0x55,0x75,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER U +CHAR(0x56,0x56,0x76,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER V +CHAR(0x57,0x57,0x77,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER W +CHAR(0x58,0x58,0x78,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER X +CHAR(0x59,0x59,0x79,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER Y +CHAR(0x5A,0x5A,0x7A,_C_UPPER | _C_PRINT) // LATIN CAPITAL LETTER Z +CHAR(0x5B,0x5B,0x5B,_C_PRINT) // LEFT SQUARE BRACKET +CHAR(0x5C,0x5C,0x5C,_C_PRINT) // REVERSE SOLIDUS +CHAR(0x5D,0x5D,0x5D,_C_PRINT) // RIGHT SQUARE BRACKET +CHAR(0x5E,0x5E,0x5E,_C_PRINT) // CIRCUMFLEX ACCENT +CHAR(0x5F,0x5F,0x5F,_C_INNER | _C_PRINT) // LOW LINE +CHAR(0x60,0x60,0x60,_C_PRINT) // GRAVE ACCENT +CHAR(0x61,0x41,0x61,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER A +CHAR(0x62,0x42,0x62,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER B +CHAR(0x63,0x43,0x63,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER C +CHAR(0x64,0x44,0x64,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER D +CHAR(0x65,0x45,0x65,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER E +CHAR(0x66,0x46,0x66,_C_LOWER | _C_XDIGIT | _C_PRINT) // LATIN SMALL LETTER F +CHAR(0x67,0x47,0x67,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER G +CHAR(0x68,0x48,0x68,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER H +CHAR(0x69,0x49,0x69,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER I +CHAR(0x6A,0x4A,0x6A,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER J +CHAR(0x6B,0x4B,0x6B,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER K +CHAR(0x6C,0x4C,0x6C,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER L +CHAR(0x6D,0x4D,0x6D,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER M +CHAR(0x6E,0x4E,0x6E,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER N +CHAR(0x6F,0x4F,0x6F,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER O +CHAR(0x70,0x50,0x70,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER P +CHAR(0x71,0x51,0x71,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER Q +CHAR(0x72,0x52,0x72,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER R +CHAR(0x73,0x53,0x73,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER S +CHAR(0x74,0x54,0x74,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER T +CHAR(0x75,0x55,0x75,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER U +CHAR(0x76,0x56,0x76,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER V +CHAR(0x77,0x57,0x77,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER W +CHAR(0x78,0x58,0x78,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER X +CHAR(0x79,0x59,0x79,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER Y +CHAR(0x7A,0x5A,0x7A,_C_LOWER | _C_PRINT) // LATIN SMALL LETTER Z +CHAR(0x7B,0x7B,0x7B,_C_PRINT) // LEFT CURLY BRACKET +CHAR(0x7C,0x7C,0x7C,_C_PRINT) // VERTICAL LINE +CHAR(0x7D,0x7D,0x7D,_C_PRINT) // RIGHT CURLY BRACKET +CHAR(0x7E,0x7E,0x7E,_C_PRINT) // TILDE +CHAR(0x7F,0x7F,0x7F,_C_CTRL) // +CHAR(0x80,0x80,0x80,_C_PRINT) // UTF-8 +CHAR(0x81,0x81,0x81,_C_PRINT) // UTF-8 +CHAR(0x82,0x82,0x82,_C_PRINT) // UTF-8 +CHAR(0x83,0x83,0x83,_C_PRINT) // UTF-8 +CHAR(0x84,0x84,0x84,_C_PRINT) // UTF-8 +CHAR(0x85,0x85,0x85,_C_PRINT) // UTF-8 +CHAR(0x86,0x86,0x86,_C_PRINT) // UTF-8 +CHAR(0x87,0x87,0x87,_C_PRINT) // UTF-8 +CHAR(0x88,0x88,0x88,_C_PRINT) // UTF-8 +CHAR(0x89,0x89,0x89,_C_PRINT) // UTF-8 +CHAR(0x8A,0x8A,0x8A,_C_PRINT) // UTF-8 +CHAR(0x8B,0x8B,0x8B,_C_PRINT) // UTF-8 +CHAR(0x8C,0x8C,0x8C,_C_PRINT) // UTF-8 +CHAR(0x8D,0x8D,0x8D,_C_PRINT) // UTF-8 +CHAR(0x8E,0x8E,0x8E,_C_PRINT) // UTF-8 +CHAR(0x8F,0x8F,0x8F,_C_PRINT) // UTF-8 +CHAR(0x90,0x90,0x90,_C_PRINT) // UTF-8 +CHAR(0x91,0x91,0x91,_C_PRINT) // UTF-8 +CHAR(0x92,0x92,0x92,_C_PRINT) // UTF-8 +CHAR(0x93,0x93,0x93,_C_PRINT) // UTF-8 +CHAR(0x94,0x94,0x94,_C_PRINT) // UTF-8 +CHAR(0x95,0x95,0x95,_C_PRINT) // UTF-8 +CHAR(0x96,0x96,0x96,_C_PRINT) // UTF-8 +CHAR(0x97,0x97,0x97,_C_PRINT) // UTF-8 +CHAR(0x98,0x98,0x98,_C_PRINT) // UTF-8 +CHAR(0x99,0x99,0x99,_C_PRINT) // UTF-8 +CHAR(0x9A,0x9A,0x9A,_C_PRINT) // UTF-8 +CHAR(0x9B,0x9B,0x9B,_C_PRINT) // UTF-8 +CHAR(0x9C,0x9C,0x9C,_C_PRINT) // UTF-8 +CHAR(0x9D,0x9D,0x9D,_C_PRINT) // UTF-8 +CHAR(0x9E,0x9E,0x9E,_C_PRINT) // UTF-8 +CHAR(0x9F,0x9F,0x9F,_C_PRINT) // UTF-8 +CHAR(0xA0,0xA0,0xA0,_C_PRINT) // UTF-8 +CHAR(0xA1,0xA1,0xA1,_C_PRINT) // UTF-8 +CHAR(0xA2,0xA2,0xA2,_C_PRINT) // UTF-8 +CHAR(0xA3,0xA3,0xA3,_C_PRINT) // UTF-8 +CHAR(0xA4,0xA4,0xA4,_C_PRINT) // UTF-8 +CHAR(0xA5,0xA5,0xA5,_C_PRINT) // UTF-8 +CHAR(0xA6,0xA6,0xA6,_C_PRINT) // UTF-8 +CHAR(0xA7,0xA7,0xA7,_C_PRINT) // UTF-8 +CHAR(0xA8,0xA8,0xA8,_C_PRINT) // UTF-8 +CHAR(0xA9,0xA9,0xA9,_C_PRINT) // UTF-8 +CHAR(0xAA,0xAA,0xAA,_C_PRINT) // UTF-8 +CHAR(0xAB,0xAB,0xAB,_C_PRINT) // UTF-8 +CHAR(0xAC,0xAC,0xAC,_C_PRINT) // UTF-8 +CHAR(0xAD,0xAD,0xAD,_C_PRINT) // UTF-8 +CHAR(0xAE,0xAE,0xAE,_C_PRINT) // UTF-8 +CHAR(0xAF,0xAF,0xAF,_C_PRINT) // UTF-8 +CHAR(0xB0,0xB0,0xB0,_C_PRINT) // UTF-8 +CHAR(0xB1,0xB1,0xB1,_C_PRINT) // UTF-8 +CHAR(0xB2,0xB2,0xB2,_C_PRINT) // UTF-8 +CHAR(0xB3,0xB3,0xB3,_C_PRINT) // UTF-8 +CHAR(0xB4,0xB4,0xB4,_C_PRINT) // UTF-8 +CHAR(0xB5,0xB5,0xB5,_C_PRINT) // UTF-8 +CHAR(0xB6,0xB6,0xB6,_C_PRINT) // UTF-8 +CHAR(0xB7,0xB7,0xB7,_C_PRINT) // UTF-8 +CHAR(0xB8,0xB8,0xB8,_C_PRINT) // UTF-8 +CHAR(0xB9,0xB9,0xB9,_C_PRINT) // UTF-8 +CHAR(0xBA,0xBA,0xBA,_C_PRINT) // UTF-8 +CHAR(0xBB,0xBB,0xBB,_C_PRINT) // UTF-8 +CHAR(0xBC,0xBC,0xBC,_C_PRINT) // UTF-8 +CHAR(0xBD,0xBD,0xBD,_C_PRINT) // UTF-8 +CHAR(0xBE,0xBE,0xBE,_C_PRINT) // UTF-8 +CHAR(0xBF,0xBF,0xBF,_C_PRINT) // UTF-8 +CHAR(0xC0,0xC0,0xC0,_C_PRINT) // UTF-8 +CHAR(0xC1,0xC1,0xC1,_C_PRINT) // UTF-8 +CHAR(0xC2,0xC2,0xC2,_C_PRINT) // UTF-8 +CHAR(0xC3,0xC3,0xC3,_C_PRINT) // UTF-8 +CHAR(0xC4,0xC4,0xC4,_C_PRINT) // UTF-8 +CHAR(0xC5,0xC5,0xC5,_C_PRINT) // UTF-8 +CHAR(0xC6,0xC6,0xC6,_C_PRINT) // UTF-8 +CHAR(0xC7,0xC7,0xC7,_C_PRINT) // UTF-8 +CHAR(0xC8,0xC8,0xC8,_C_PRINT) // UTF-8 +CHAR(0xC9,0xC9,0xC9,_C_PRINT) // UTF-8 +CHAR(0xCA,0xCA,0xCA,_C_PRINT) // UTF-8 +CHAR(0xCB,0xCB,0xCB,_C_PRINT) // UTF-8 +CHAR(0xCC,0xCC,0xCC,_C_PRINT) // UTF-8 +CHAR(0xCD,0xCD,0xCD,_C_PRINT) // UTF-8 +CHAR(0xCE,0xCE,0xCE,_C_PRINT) // UTF-8 +CHAR(0xCF,0xCF,0xCF,_C_PRINT) // UTF-8 +CHAR(0xD0,0xD0,0xD0,_C_PRINT) // UTF-8 +CHAR(0xD1,0xD1,0xD1,_C_PRINT) // UTF-8 +CHAR(0xD2,0xD2,0xD2,_C_PRINT) // UTF-8 +CHAR(0xD3,0xD3,0xD3,_C_PRINT) // UTF-8 +CHAR(0xD4,0xD4,0xD4,_C_PRINT) // UTF-8 +CHAR(0xD5,0xD5,0xD5,_C_PRINT) // UTF-8 +CHAR(0xD6,0xD6,0xD6,_C_PRINT) // UTF-8 +CHAR(0xD7,0xD7,0xD7,_C_PRINT) // UTF-8 +CHAR(0xD8,0xD8,0xD8,_C_PRINT) // UTF-8 +CHAR(0xD9,0xD9,0xD9,_C_PRINT) // UTF-8 +CHAR(0xDA,0xDA,0xDA,_C_PRINT) // UTF-8 +CHAR(0xDB,0xDB,0xDB,_C_PRINT) // UTF-8 +CHAR(0xDC,0xDC,0xDC,_C_PRINT) // UTF-8 +CHAR(0xDD,0xDD,0xDD,_C_PRINT) // UTF-8 +CHAR(0xDE,0xDE,0xDE,_C_PRINT) // UTF-8 +CHAR(0xDF,0xDF,0xDF,_C_PRINT) // UTF-8 +CHAR(0xE0,0xE0,0xE0,_C_PRINT) // UTF-8 +CHAR(0xE1,0xE1,0xE1,_C_PRINT) // UTF-8 +CHAR(0xE2,0xE2,0xE2,_C_PRINT) // UTF-8 +CHAR(0xE3,0xE3,0xE3,_C_PRINT) // UTF-8 +CHAR(0xE4,0xE4,0xE4,_C_PRINT) // UTF-8 +CHAR(0xE5,0xE5,0xE5,_C_PRINT) // UTF-8 +CHAR(0xE6,0xE6,0xE6,_C_PRINT) // UTF-8 +CHAR(0xE7,0xE7,0xE7,_C_PRINT) // UTF-8 +CHAR(0xE8,0xE8,0xE8,_C_PRINT) // UTF-8 +CHAR(0xE9,0xE9,0xE9,_C_PRINT) // UTF-8 +CHAR(0xEA,0xEA,0xEA,_C_PRINT) // UTF-8 +CHAR(0xEB,0xEB,0xEB,_C_PRINT) // UTF-8 +CHAR(0xEC,0xEC,0xEC,_C_PRINT) // UTF-8 +CHAR(0xED,0xED,0xED,_C_PRINT) // UTF-8 +CHAR(0xEE,0xEE,0xEE,_C_PRINT) // UTF-8 +CHAR(0xEF,0xEF,0xEF,_C_PRINT) // UTF-8 +CHAR(0xF0,0xF0,0xF0,_C_PRINT) // UTF-8 +CHAR(0xF1,0xF1,0xF1,_C_PRINT) // UTF-8 +CHAR(0xF2,0xF2,0xF2,_C_PRINT) // UTF-8 +CHAR(0xF3,0xF3,0xF3,_C_PRINT) // UTF-8 +CHAR(0xF4,0xF4,0xF4,_C_PRINT) // UTF-8 +CHAR(0xF5,0xF5,0xF5,_C_PRINT) // UTF-8 +CHAR(0xF6,0xF6,0xF6,_C_PRINT) // UTF-8 +CHAR(0xF7,0xF7,0xF7,_C_PRINT) // UTF-8 +CHAR(0xF8,0xF8,0xF8,_C_PRINT) // UTF-8 +CHAR(0xF9,0xF9,0xF9,_C_PRINT) // UTF-8 +CHAR(0xFA,0xFA,0xFA,_C_PRINT) // UTF-8 +CHAR(0xFB,0xFB,0xFB,_C_PRINT) // UTF-8 +CHAR(0xFC,0xFC,0xFC,_C_PRINT) // UTF-8 +CHAR(0xFD,0xFD,0xFD,_C_PRINT) // UTF-8 +CHAR(0xFE,0xFE,0xFE,_C_PRINT) // UTF-8 +CHAR(0xFF,0xFF,0xFF,_C_PRINT) // UTF-8 diff --git a/libucw/ucw/char-upper.c b/libucw/ucw/char-upper.c new file mode 100644 index 0000000..c56528c --- /dev/null +++ b/libucw/ucw/char-upper.c @@ -0,0 +1,17 @@ +/* + * UCW Library -- Uppercase Map + * + * (c) 1997--2004 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include + +const byte ucw_c_upper[256] = { +#define CHAR(code,upper,lower,cat) upper, +#include +#undef CHAR +}; diff --git a/libucw/ucw/chartype.h b/libucw/ucw/chartype.h new file mode 100644 index 0000000..65972ce --- /dev/null +++ b/libucw/ucw/chartype.h @@ -0,0 +1,64 @@ +/* + * UCW Library -- Character Types + * + * (c) 1997--2004 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_CHARTYPE_H +#define _UCW_CHARTYPE_H + +/*** + * We define our own routines to classify 8-bit characters (based on US-ASCII charset). + * This way we bypass most possible problems with different compilation environments. + * + * All functions and macros accept any numbers and if it is necessary, they simply ignore higher bits. + * It does not matter whether a parameter is signed or uintigned. Parameters are evaluated exactly once, + * so they can have side-effects. + ***/ + +#define _C_UPPER 1 /* Upper-case letters */ +#define _C_LOWER 2 /* Lower-case letters */ +#define _C_PRINT 4 /* Printable */ +#define _C_DIGIT 8 /* Digits */ +#define _C_CTRL 16 /* Control characters */ +#define _C_XDIGIT 32 /* Hexadecimal digits */ +#define _C_BLANK 64 /* White spaces (spaces, tabs, newlines) */ +#define _C_INNER 128 /* `inner punctuation' -- underscore etc. */ + +#define _C_ALPHA (_C_UPPER | _C_LOWER) +#define _C_ALNUM (_C_ALPHA | _C_DIGIT) +#define _C_WORD (_C_ALNUM | _C_INNER) +#define _C_WSTART (_C_ALPHA | _C_INNER) + +extern const byte ucw_c_cat[256], ucw_c_upper[256], ucw_c_lower[256]; + +#define Category(x) (ucw_c_cat[(byte)(x)]) +#define Ccat(x,y) (Category(x) & y) + +#define Cupper(x) Ccat(x, _C_UPPER) /** Checks for an upper-case character (`A-Z`). **/ +#define Clower(x) Ccat(x, _C_LOWER) /** Checks for a lower-case character (`a-z`). **/ +#define Calpha(x) Ccat(x, _C_ALPHA) /** Checks for an alphabetic character (`a-z`, `A-Z`). **/ +#define Calnum(x) Ccat(x, _C_ALNUM) /** Checks for an alpha-numeric character (`a-z`, `A-Z`, `0-9`). */ +#define Cprint(x) Ccat(x, _C_PRINT) /** Checks for printable characters, including 8-bit values (`\t`, `0x20-0x7E`, `0x80-0xFF`). **/ +#define Cdigit(x) Ccat(x, _C_DIGIT) /** Checks for a digit (`0-9`). **/ +#define Cxdigit(x) Ccat(x, _C_XDIGIT) /** Checks for a hexadecimal digit (`0-9`, `a-f`, `A-F`). **/ +#define Cword(x) Ccat(x, _C_WORD) /** Checks for an alpha-numeric character or an inner punctation (`a-z`, `A-Z`, `0-9`, `_`). **/ +#define Cblank(x) Ccat(x, _C_BLANK) /** Checks for a white space (`0x20`, `\t`, `\n`, `\r`, `0x8`, `0xC`). **/ +#define Cctrl(x) Ccat(x, _C_CTRL) /** Checks for control characters (`0x0-0x1F`, `0x7F`). **/ +#define Cspace(x) Cblank(x) + +#define Cupcase(x) (ucw_c_upper[(byte)(x)]) /** Convert a letter to upper case, leave non-letter characters unchanged. **/ +#define Clocase(x) (ucw_c_lower[(byte)(x)]) /** Convert a letter to lower case, leave non-letter characters unchanged. **/ + +/** + * Compute the value of a valid hexadecimal character (ie. passed the @Cxdigit() check). + **/ +static inline uint Cxvalue(byte x) +{ + return (x < (uint)'A') ? x - '0' : (x & 0xdf) - 'A' + 10; +} + +#endif diff --git a/libucw/ucw/clists.h b/libucw/ucw/clists.h new file mode 100644 index 0000000..809c480 --- /dev/null +++ b/libucw/ucw/clists.h @@ -0,0 +1,279 @@ +/* + * UCW Library -- Circular Linked Lists + * + * (c) 2003--2010 Martin Mares + * (c) 2017 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_CLISTS_H +#define _UCW_CLISTS_H + +/** + * Common header for list nodes. + **/ +typedef struct cnode { + struct cnode *next, *prev; +} cnode; + +/** + * Circular doubly linked list. + **/ +typedef struct clist { + struct cnode head; +} clist; + +/** + * Initialize a new circular linked list. Must be called before any other function. + **/ +static inline void clist_init(clist *l) +{ + cnode *head = &l->head; + head->next = head->prev = head; +} + +/** + * Return the first node on @l or NULL if @l is empty. + **/ +static inline void *clist_head(clist *l) +{ + return (l->head.next != &l->head) ? l->head.next : NULL; +} + +/** + * Return the last node on @l or NULL if @l is empty. + **/ +static inline void *clist_tail(clist *l) +{ + return (l->head.prev != &l->head) ? l->head.prev : NULL; +} + +/** + * Find the next node to @n or NULL if @n is the last one. + **/ +static inline void *clist_next(clist *l, cnode *n) +{ + return (n->next != &l->head) ? (void *) n->next : NULL; +} + +/** + * Find the previous node to @n or NULL if @n is the first one. + **/ +static inline void *clist_prev(clist *l, cnode *n) +{ + return (n->prev != &l->head) ? (void *) n->prev : NULL; +} + +/** + * Return a non-zero value iff @l is empty. + **/ +static inline int clist_empty(clist *l) +{ + return (l->head.next == &l->head); +} + +/** + * Loop over all nodes in the @list and perform the next C statement on them. The current node is stored in @n which must be defined before as pointer to any type. + * The list should not be changed during this loop command. + **/ +#define CLIST_WALK(n,list) for(n=(void*)(list).head.next; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->next) + +/** + * Same as @CLIST_WALK(), but allows removal of the current node. This macro requires one more variable to store some temporary pointers. + **/ +#define CLIST_WALK_DELSAFE(n,list,tmp) for(n=(void*)(list).head.next; tmp=(void*)((cnode*)(n))->next, (cnode*)(n) != &(list).head; n=(void*)tmp) + +/** + * Same as @CLIST_WALK(), but it defines the variable for the current node in place. @type should be a pointer type. + **/ +#define CLIST_FOR_EACH(type,n,list) for(type n=(void*)(list).head.next; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->next) + +/** + * Same as @CLIST_WALK_DELSAFE(), but it defines the variable for the current node in place. @type should be a pointer type. The temporary variable must be still known before. + **/ +#define CLIST_FOR_EACH_DELSAFE(type,n,list,tmp) for(type n=(void*)(list).head.next; tmp=(void*)((cnode*)(n))->next, (cnode*)(n) != &(list).head; n=(void*)tmp) + +/** + * Reversed version of @CLIST_FOR_EACH(). + **/ +#define CLIST_FOR_EACH_BACKWARDS(type,n,list) for(type n=(void*)(list).head.prev; (cnode*)(n) != &(list).head; n=(void*)((cnode*)(n))->prev) + +/** + * Insert a new node just after the node @after. To insert at the head of the list, use @clist_add_head() instead. + **/ +static inline void clist_insert_after(cnode *what, cnode *after) +{ + cnode *before = after->next; + what->next = before; + what->prev = after; + before->prev = what; + after->next = what; +} + +/** + * Insert a new node just before the node @before. To insert at the tail of the list, use @clist_add_tail() instead. + **/ +static inline void clist_insert_before(cnode *what, cnode *before) +{ + cnode *after = before->prev; + what->next = before; + what->prev = after; + before->prev = what; + after->next = what; +} + +/** + * Insert a new node in front of all other nodes. + **/ +static inline void clist_add_head(clist *l, cnode *n) +{ + clist_insert_after(n, &l->head); +} + +/** + * Insert a new node after all other nodes. + **/ +static inline void clist_add_tail(clist *l, cnode *n) +{ + clist_insert_before(n, &l->head); +} + +/** + * Remove node @n. + **/ +static inline void clist_remove(cnode *n) +{ + cnode *before = n->prev; + cnode *after = n->next; + before->next = after; + after->prev = before; +} + +/** + * Remove the first node in @l, if it exists. Return the pointer to that node or NULL. + **/ +static inline void *clist_remove_head(clist *l) +{ + cnode *n = clist_head(l); + if (n) + clist_remove(n); + return n; +} + +/** + * Remove the last node in @l, if it exists. Return the pointer to that node or NULL. + **/ +static inline void *clist_remove_tail(clist *l) +{ + cnode *n = clist_tail(l); + if (n) + clist_remove(n); + return n; +} + +/** + * Merge two lists by inserting the list @what just after the node @after in a different list. + * The first list is then cleared. + **/ +static inline void clist_insert_list_after(clist *what, cnode *after) +{ + if (!clist_empty(what)) + { + cnode *w = &what->head; + w->prev->next = after->next; + after->next->prev = w->prev; + w->next->prev = after; + after->next = w->next; + clist_init(what); + } +} + +/** + * Merge two lists by inserting the list @what in front of all other nodes in a different list @l. + * The first list is then cleared. + **/ +static inline void clist_add_list_head(clist *l, clist *what) +{ + clist_insert_list_after(what, &l->head); +} + +/** + * Merge two lists by inserting the list @what after all other nodes in a different list @l. + * The first list is then cleared. + **/ +static inline void clist_add_list_tail(clist *l, clist *what) +{ + clist_insert_list_after(what, l->head.prev); +} + +/** + * Move all items from a source list to a destination list. The source list + * becomes empty, the original contents of the destination list are destroyed. + **/ +static inline void clist_move(clist *to, clist *from) +{ + clist_init(to); + clist_insert_list_after(from, &to->head); + clist_init(from); +} + +/** + * Compute the number of nodes in @l. Beware of linear time complexity. + **/ +static inline uint clist_size(clist *l) +{ + uint i = 0; + CLIST_FOR_EACH(cnode *, n, *l) + i++; + return i; +} + +/** + * Remove a node @n and mark it as unlinked by setting the previous and next pointers to NULL. + **/ +static inline void clist_unlink(cnode *n) +{ + clist_remove(n); + n->prev = n->next = NULL; +} + +/** + * Remove the first node on @l and mark it as unlinked. + * Return the pointer to that node or NULL. + **/ +static inline void *clist_unlink_head(clist *l) +{ + cnode *n = clist_head(l); + if (n) + clist_unlink(n); + return n; +} + +/** + * Remove the last node on @l and mark it as unlinked. + * Return the pointer to that node or NULL. + **/ +static inline void *clist_unlink_tail(clist *l) +{ + cnode *n = clist_tail(l); + if (n) + clist_unlink(n); + return n; +} + +/** + * Check if a node is linked a list. Unlinked nodes are recognized by having their + * previous and next pointers equal to NULL. Returns 0 or 1. + * + * Nodes initialized to all zeroes are unlinked, inserting a node anywhere in a list + * makes it linked. Normal removal functions like @clist_remove() do not mark nodes + * as unlinked, you need to call @clist_unlink() instead. + **/ +static inline int clist_is_linked(cnode *n) +{ + return !!n->next; +} + +#endif diff --git a/libucw/ucw/conf-alloc.c b/libucw/ucw/conf-alloc.c new file mode 100644 index 0000000..bf4569e --- /dev/null +++ b/libucw/ucw/conf-alloc.c @@ -0,0 +1,48 @@ +/* + * UCW Library -- Configuration files: memory allocation + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2012 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include + +inline struct mempool * +cf_get_pool(void) +{ + return cf_get_context()->pool; +} + +void * +cf_malloc(uint size) +{ + return mp_alloc(cf_get_pool(), size); +} + +void * +cf_malloc_zero(uint size) +{ + return mp_alloc_zero(cf_get_pool(), size); +} + +char * +cf_strdup(const char *s) +{ + return mp_strdup(cf_get_pool(), s); +} + +char * +cf_printf(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + char *res = mp_vprintf(cf_get_pool(), fmt, args); + va_end(args); + return res; +} diff --git a/libucw/ucw/conf-context.c b/libucw/ucw/conf-context.c new file mode 100644 index 0000000..050e43b --- /dev/null +++ b/libucw/ucw/conf-context.c @@ -0,0 +1,72 @@ +/* + * UCW Library -- Configuration files: Contexts + * + * (c) 2012 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include + +static struct cf_context cf_default_context; + +static void +cf_init_context(struct cf_context *cc) +{ + cc->enable_journal = 1; + clist_init(&cc->conf_entries); +} + +struct cf_context * +cf_new_context(void) +{ + struct cf_context *cc = xmalloc_zero(sizeof(*cc)); + cf_init_context(cc); + return cc; +} + +void +cf_delete_context(struct cf_context *cc) +{ + ASSERT(!cc->is_active); + ASSERT(cc != &cf_default_context); + struct cf_context *prev = cf_switch_context(cc); + cf_revert(); + cf_switch_context(prev); + xfree(cc->parser); + xfree(cc); +} + +struct cf_context * +cf_switch_context(struct cf_context *cc) +{ + struct ucwlib_context *uc = ucwlib_thread_context(); + struct cf_context *prev = uc->cf_context; + if (prev) + prev->is_active = 0; + if (cc) + { + ASSERT(!cc->is_active); + cc->is_active = 1; + } + uc->cf_context = cc; + return prev; +} + +static void CONSTRUCTOR_WITH_PRIORITY(10100) +cf_init_default_context(void) +{ + cf_init_context(&cf_default_context); + ucwlib_thread_context()->cf_context = &cf_default_context; + cf_default_context.is_active = 1; +} + +struct cf_context * +cf_obtain_context(void) +{ + return cf_get_context(); +} diff --git a/libucw/ucw/conf-dump.c b/libucw/ucw/conf-dump.c new file mode 100644 index 0000000..9932f1d --- /dev/null +++ b/libucw/ucw/conf-dump.c @@ -0,0 +1,131 @@ +/* + * UCW Library -- Configuration files: dumping + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2012 Martin Mares + * (c) 2014 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include +#include +#include +#include + +static void +spaces(struct fastbuf *fb, uint nr) +{ + for (uint i=0; i= 0 ? u->lookup[ *(int*)ptr ] : "???"); break; + case CT_USER: + if (u->utype->dumper) + u->utype->dumper(fb, ptr); + else + bprintf(fb, "??? "); + break; + case CT_XTYPE: + bprintf(fb, "'%s' ", u->xtype->format(ptr, XTYPE_FMT_DEFAULT, cf_get_pool())); + break; + } +} + +static void dump_section(struct fastbuf *fb, struct cf_section *sec, int level, void *ptr); + +static char *class_names[] = { "end", "static", "dynamic", "parser", "section", "list", "bitmap" }; + +static void +dump_item(struct fastbuf *fb, struct cf_item *item, int level, void *ptr) +{ + ptr += (uintptr_t) item->ptr; + enum cf_type type = item->type; + uint size = cf_type_size(item->type, &item->u); + int i; + spaces(fb, level); + bprintf(fb, "%s: C%s #", item->name, class_names[item->cls]); + if (item->number == CF_ANY_NUM) + bputs(fb, "any "); + else + bprintf(fb, "%d ", item->number); + if (item->cls == CC_STATIC || item->cls == CC_DYNAMIC || item->cls == CC_BITMAP) { + bprintf(fb, "T%s ", cf_type_names[type]); + if (item->type == CT_USER) + bprintf(fb, "U%s S%d ", item->u.utype->name, size); + else if (item->type == CT_XTYPE) + bprintf(fb, "X%s S%d ", item->u.xtype->name, size); + } + if (item->cls == CC_STATIC) { + for (i=0; inumber; i++) + dump_basic(fb, ptr + i * size, type, &item->u); + } else if (item->cls == CC_DYNAMIC) { + ptr = * (void**) ptr; + if (ptr) { + int real_nr = GARY_SIZE(ptr); + bprintf(fb, "N%d ", real_nr); + for (i=0; iu); + } else + bprintf(fb, "NULL "); + } else if (item->cls == CC_BITMAP) { + u32 mask = * (u32*) ptr; + for (i=0; i<32; i++) { + if (item->type == CT_LOOKUP && !item->u.lookup[i]) + break; + if (mask & (1<type == CT_INT) + bprintf(fb, "%d ", i); + else if (item->type == CT_LOOKUP) + bprintf(fb, "%s ", item->u.lookup[i]); + } + } + } + bputc(fb, '\n'); + if (item->cls == CC_SECTION) + dump_section(fb, item->u.sec, level+1, ptr); + else if (item->cls == CC_LIST) { + uint idx = 0; + CLIST_FOR_EACH(cnode *, n, * (clist*) ptr) { + spaces(fb, level+1); + bprintf(fb, "item %d\n", ++idx); + dump_section(fb, item->u.sec, level+2, n); + } + } +} + +static void +dump_section(struct fastbuf *fb, struct cf_section *sec, int level, void *ptr) +{ + spaces(fb, level); + bprintf(fb, "S%d F%x:\n", sec->size, sec->flags); + for (struct cf_item *item=sec->cfg; item->cls; item++) + dump_item(fb, item, level, ptr); +} + +void +cf_dump_sections(struct fastbuf *fb) +{ + struct cf_context *cc = cf_get_context(); + dump_section(fb, &cc->sections, 0, NULL); +} + diff --git a/libucw/ucw/conf-getopt.c b/libucw/ucw/conf-getopt.c new file mode 100644 index 0000000..bbb81e1 --- /dev/null +++ b/libucw/ucw/conf-getopt.c @@ -0,0 +1,108 @@ +/* + * UCW Library -- Configuration files: getopt wrapper + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2012 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include +#include + +#include + +#ifndef CONFIG_UCW_DEFAULT_CONFIG +#define CONFIG_UCW_DEFAULT_CONFIG NULL +#endif +char *cf_def_file = CONFIG_UCW_DEFAULT_CONFIG; + +#ifndef CONFIG_UCW_ENV_VAR_CONFIG +#define CONFIG_UCW_ENV_VAR_CONFIG NULL +#endif +char *cf_env_file = CONFIG_UCW_ENV_VAR_CONFIG; + +void +cf_load_default(struct cf_context *cc) +{ + if (cc->config_loaded++) + return; + if (cf_def_file) + { + char *env; + if (cf_env_file && (env = getenv(cf_env_file))) + { + if (cf_load(env)) + die("Cannot load config file %s", env); + } + else if (cf_load(cf_def_file)) + die("Cannot load default config %s", cf_def_file); + } + else + { + // We need to create an empty pool and initialize all configuration items + struct cf_journal_item *oldj = cf_journal_new_transaction(1); + cf_init_stack(cc); + cf_done_stack(cc); + cf_journal_commit_transaction(1, oldj); + } +} + +static void +end_of_options(struct cf_context *cc) +{ + cf_load_default(cc); + if (cc->postpone_commit && cf_close_group()) + die("Loading of configuration failed"); +} + +int +cf_getopt(int argc, char *const argv[], const char *short_opts, const struct option *long_opts, int *long_index) +{ + struct cf_context *cc = cf_get_context(); + if (!cc->postpone_commit) + cf_open_group(); + + while (1) + { + int res = getopt_long(argc, argv, short_opts, long_opts, long_index); + if (res == 'S' || res == 'C' || res == 0x64436667) + { + if (cc->other_options) + die("The -S and -C options must precede all other arguments"); + if (res == 'S') + { + cf_load_default(cc); + if (cf_set(optarg)) + die("Cannot set %s", optarg); + } + else if (res == 'C') + { + if (cf_load(optarg)) + die("Cannot load config file %s", optarg); + } +#ifdef CONFIG_UCW_DEBUG + else + { /* --dumpconfig */ + end_of_options(cc); + struct fastbuf *b = bfdopen(1, 4096); + cf_dump_sections(b); + bclose(b); + exit(0); + } +#endif + } + else + { + /* unhandled option or end of options */ + if (res != ':' && res != '?') + end_of_options(cc); + cc->other_options++; + return res; + } + } +} diff --git a/libucw/ucw/conf-input.c b/libucw/ucw/conf-input.c new file mode 100644 index 0000000..f8d2eb8 --- /dev/null +++ b/libucw/ucw/conf-input.c @@ -0,0 +1,460 @@ +/* + * UCW Library -- Configuration files: parsing input streams + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2012 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* Text file parser */ + +#define MAX_LINE 4096 + +#include + +#define GBUF_TYPE uint +#define GBUF_PREFIX(x) split_##x +#include + +struct cf_parser_state { + const char *name_parse_fb; + struct fastbuf *parse_fb; + uint line_num; + char *line; + split_t word_buf; + uint words; + uint ends_by_brace; // the line is ended by "{" + bb_t copy_buf; + uint copied; + char line_buf[]; +}; + +static int +get_line(struct cf_parser_state *p, char **msg) +{ + int err = bgets_nodie(p->parse_fb, p->line_buf, MAX_LINE); + p->line_num++; + if (err <= 0) { + *msg = err < 0 ? "Line too long" : NULL; + return 0; + } + p->line = p->line_buf; + while (Cblank(*p->line)) + p->line++; + return 1; +} + +static void +append(struct cf_parser_state *p, char *start, char *end) +{ + uint len = end - start; + bb_grow(&p->copy_buf, p->copied + len + 1); + memcpy(p->copy_buf.ptr + p->copied, start, len); + p->copied += len + 1; + p->copy_buf.ptr[p->copied-1] = 0; +} + +static char * +get_word(struct cf_parser_state *p, uint is_command_name) +{ + char *msg; + char *line = p->line; + + if (*line == '\'') { + line++; + while (1) { + char *start = line; + while (*line && *line != '\'') + line++; + append(p, start, line); + if (*line) + break; + p->copy_buf.ptr[p->copied-1] = '\n'; + if (!get_line(p, &msg)) + return msg ? : "Unterminated apostrophe word at the end"; + line = p->line; + } + line++; + + } else if (*line == '"') { + line++; + uint start_copy = p->copied; + while (1) { + char *start = line; + uint escape = 0; + while (*line) { + if (*line == '"' && !escape) + break; + else if (*line == '\\') + escape ^= 1; + else + escape = 0; + line++; + } + append(p, start, line); + if (*line) + break; + if (!escape) + p->copy_buf.ptr[p->copied-1] = '\n'; + else // merge two lines + p->copied -= 2; + if (!get_line(p, &msg)) + return msg ? : "Unterminated quoted word at the end"; + line = p->line; + } + line++; + + char *tmp = stk_str_unesc(p->copy_buf.ptr + start_copy); + uint l = strlen(tmp); + bb_grow(&p->copy_buf, start_copy + l + 1); + strcpy(p->copy_buf.ptr + start_copy, tmp); + p->copied = start_copy + l + 1; + + } else { + // promised that *line is non-null and non-blank + char *start = line; + while (*line && !Cblank(*line) + && *line != '{' && *line != '}' && *line != ';' + && (*line != '=' || !is_command_name)) + line++; + if (*line == '=') { // nice for setting from a command-line + if (line == start) + return "Assignment without a variable"; + *line = ' '; + } + if (line == start) // already the first char is control + line++; + append(p, start, line); + } + while (Cblank(*line)) + line++; + p->line = line; + return NULL; +} + +static char * +get_token(struct cf_parser_state *p, uint is_command_name, char **err) +{ + *err = NULL; + while (1) { + if (!*p->line || *p->line == '#') { + if (!is_command_name || !get_line(p, err)) + return NULL; + } else if (*p->line == ';') { + *err = get_word(p, 0); + if (!is_command_name || *err) + return NULL; + } else if (*p->line == '\\' && !p->line[1]) { + if (!get_line(p, err)) { + if (!*err) + *err = "Last line ends by a backslash"; + return NULL; + } + if (!*p->line || *p->line == '#') + msg(L_WARN, "The line %s:%d following a backslash is empty", p->name_parse_fb ? : "", p->line_num); + } else { + split_grow(&p->word_buf, p->words+1); + uint start = p->copied; + p->word_buf.ptr[p->words++] = p->copied; + *err = get_word(p, is_command_name); + return *err ? NULL : p->copy_buf.ptr + start; + } + } +} + +static char * +split_command(struct cf_parser_state *p) +{ + p->words = p->copied = p->ends_by_brace = 0; + char *msg, *start_word; + if (!(start_word = get_token(p, 1, &msg))) + return msg; + if (*start_word == '{') // only one opening brace + return "Unexpected opening brace"; + while (*p->line != '}') // stays for the next time + { + if (!(start_word = get_token(p, 0, &msg))) + return msg; + if (*start_word == '{') { + p->words--; // discard the brace + p->ends_by_brace = 1; + break; + } + } + return NULL; +} + +/* Parsing multiple files */ + +static int +maybe_commit(struct cf_context *cc) +{ + if (cf_commit_all(cc->postpone_commit ? CF_NO_COMMIT : cc->everything_committed ? CF_COMMIT : CF_COMMIT_ALL)) + return 1; + if (!cc->postpone_commit) + cc->everything_committed = 1; + return 0; +} + +static char * +parse_fastbuf(struct cf_context *cc, const char *name_fb, struct fastbuf *fb, uint depth) +{ + struct cf_parser_state *p = cc->parser; + if (!p) + p = cc->parser = xmalloc_zero(sizeof(*p) + MAX_LINE); + p->name_parse_fb = name_fb; + p->parse_fb = fb; + p->line_num = 0; + p->line = p->line_buf; + *p->line = 0; + + if (!depth) + cf_init_stack(cc); + + char *err = NULL; + while (1) + { + err = split_command(p); + if (err) + goto error; + if (!p->words) + break; + char *name = p->copy_buf.ptr + p->word_buf.ptr[0]; + char *pars[p->words-1]; + for (uint i=1; iwords; i++) + pars[i-1] = p->copy_buf.ptr + p->word_buf.ptr[i]; + int optional_include = !strcasecmp(name, "optionalinclude"); + if (optional_include || !strcasecmp(name, "include")) + { + if (p->words != 2) + err = "Expecting one filename"; + else if (depth > 8) + err = "Too many nested files"; + else if (*p->line && *p->line != '#') // because the contents of line_buf is not re-entrant and will be cleared + err = "The include command must be the last one on a line"; + if (err) + goto error; + struct fastbuf *new_fb = bopen_try(pars[0], O_RDONLY, 1<<14); + if (!new_fb) { + if (optional_include && errno == ENOENT) + continue; + err = cf_printf("Cannot open file %s: %m", pars[0]); + goto error; + } + uint ll = p->line_num; + err = parse_fastbuf(cc, stk_strdup(pars[0]), new_fb, depth+1); + p->line_num = ll; + bclose(new_fb); + if (err) + goto error; + p->parse_fb = fb; + continue; + } + enum cf_operation op; + char *c = strchr(name, ':'); + if (!c) + op = strcmp(name, "}") ? OP_SET : OP_CLOSE; + else { + *c++ = 0; + switch (Clocase(*c)) { + case 's': op = OP_SET; break; + case 'c': op = Clocase(c[1]) == 'l' ? OP_CLEAR: OP_COPY; break; + case 'a': switch (Clocase(c[1])) { + case 'p': op = OP_APPEND; break; + case 'f': op = OP_AFTER; break; + default: op = OP_ALL; + }; break; + case 'p': op = OP_PREPEND; break; + case 'r': op = (c[1] && Clocase(c[2]) == 'm') ? OP_REMOVE : OP_RESET; break; + case 'e': op = OP_EDIT; break; + case 'b': op = OP_BEFORE; break; + default: op = OP_SET; break; + } + if (strcasecmp(c, cf_op_names[op])) { + err = cf_printf("Unknown operation %s", c); + goto error; + } + } + if (p->ends_by_brace) + op |= OP_OPEN; + err = cf_interpret_line(cc, name, op, p->words-1, pars); + if (err) + goto error; + } + + if (!depth) + { + if (cf_done_stack(cc)) + err = "Unterminated block"; + else if (maybe_commit(cc)) + err = "Commit failed"; + } + if (!err) + return NULL; + +error: + if (name_fb) + msg(L_ERROR, "File %s, line %d: %s", name_fb, p->line_num, err); + else if (p->line_num == 1) + msg(L_ERROR, "Manual setting of configuration: %s", err); + else + msg(L_ERROR, "Manual setting of configuration, line %d: %s", p->line_num, err); + return "included from here"; +} + +static int +load_file(struct cf_context *cc, const char *file) +{ + struct fastbuf *fb = bopen_try(file, O_RDONLY, 1<<14); + if (!fb) { + msg(L_ERROR, "Cannot open configuration file %s: %m", file); + return 1; + } + char *err_msg = parse_fastbuf(cc, file, fb, 0); + bclose(fb); + return !!err_msg; +} + +static int +load_string(struct cf_context *cc, const char *string) +{ + struct fastbuf fb; + fbbuf_init_read(&fb, (byte *)string, strlen(string), 0); + char *msg = parse_fastbuf(cc, NULL, &fb, 0); + return !!msg; +} + +/* Safe loading and reloading */ + +struct conf_entry { /* We remember a list of actions to apply upon reload */ + cnode n; + enum { + CE_FILE = 1, + CE_STRING = 2, + } type; + char *arg; +}; + +static void +cf_remember_entry(struct cf_context *cc, uint type, const char *arg) +{ + if (!cc->enable_journal) + return; + struct conf_entry *ce = cf_malloc(sizeof(*ce)); + ce->type = type; + ce->arg = cf_strdup(arg); + clist_add_tail(&cc->conf_entries, &ce->n); +} + +int +cf_reload(const char *file) +{ + struct cf_context *cc = cf_get_context(); + ASSERT(cc->enable_journal); + cf_journal_swap(); + struct cf_journal_item *oldj = cf_journal_new_transaction(1); + uint ec = cc->everything_committed; + cc->everything_committed = 0; + + clist old_entries; + clist_move(&old_entries, &cc->conf_entries); + cf_open_group(); + + int err = 0; + if (file) + err = load_file(cc, file); + else + CLIST_FOR_EACH(struct conf_entry *, ce, old_entries) { + if (ce->type == CE_FILE) + err |= load_file(cc, ce->arg); + else + err |= load_string(cc, ce->arg); + if (err) + break; + cf_remember_entry(cc, ce->type, ce->arg); + } + + err |= cf_close_group(); + + if (!err) { + cf_journal_delete(); + cf_journal_commit_transaction(1, NULL); + } else { + cc->everything_committed = ec; + cf_journal_rollback_transaction(1, oldj); + cf_journal_swap(); + clist_move(&cc->conf_entries, &old_entries); + } + return err; +} + +int +cf_load(const char *file) +{ + struct cf_context *cc = cf_get_context(); + struct cf_journal_item *oldj = cf_journal_new_transaction(1); + int err = load_file(cc, file); + if (!err) { + cf_journal_commit_transaction(1, oldj); + cf_remember_entry(cc, CE_FILE, file); + cc->config_loaded = 1; + } else + cf_journal_rollback_transaction(1, oldj); + return err; +} + +int +cf_set(const char *string) +{ + struct cf_context *cc = cf_get_context(); + struct cf_journal_item *oldj = cf_journal_new_transaction(0); + int err = load_string(cc, string); + if (!err) { + cf_journal_commit_transaction(0, oldj); + cf_remember_entry(cc, CE_STRING, string); + } else + cf_journal_rollback_transaction(0, oldj); + return err; +} + +void +cf_revert(void) +{ + cf_journal_swap(); + cf_journal_delete(); +} + +void +cf_open_group(void) +{ + struct cf_context *cc = cf_get_context(); + cc->postpone_commit++; +} + +int +cf_close_group(void) +{ + struct cf_context *cc = cf_get_context(); + ASSERT(cc->postpone_commit); + if (!--cc->postpone_commit) + return maybe_commit(cc); + else + return 0; +} diff --git a/libucw/ucw/conf-internal.h b/libucw/ucw/conf-internal.h new file mode 100644 index 0000000..408fc19 --- /dev/null +++ b/libucw/ucw/conf-internal.h @@ -0,0 +1,126 @@ +/* + * UCW Library -- Configuration files: only for internal use of conf-*.c + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2012 Martin Mares + * (c) 2014 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_CONF_INTERNAL_H +#define _UCW_CONF_INTERNAL_H + +#include + +#ifdef CONFIG_UCW_CLEAN_ABI +#define cf_add_dirty ucw_cf_add_dirty +#define cf_commit_all ucw_cf_commit_all +#define cf_done_stack ucw_cf_done_stack +#define cf_find_subitem ucw_cf_find_subitem +#define cf_init_stack ucw_cf_init_stack +#define cf_interpret_line ucw_cf_interpret_line +#define cf_journal_delete ucw_cf_journal_delete +#define cf_journal_swap ucw_cf_journal_swap +#define cf_load_default ucw_cf_load_default +#define cf_obtain_context ucw_cf_obtain_context +#define cf_op_names ucw_cf_op_names +#define cf_sections ucw_cf_sections +#define cf_type_names ucw_cf_type_names +#define cf_type_size ucw_cf_type_size +#endif + +/* Item stack used by conf-intr.c */ + +#define MAX_STACK_SIZE 16 + +struct item_stack { // used by conf-intr.c + struct cf_section *sec; // nested section + void *base_ptr; // because original pointers are often relative + int op; // it is performed when a closing brace is encountered + void *list; // list the operations should be done on + u32 mask; // bit array of selectors searching in a list + struct cf_item *item; // cf_item of the list +}; + +/* List of dirty sections used by conf-section.c */ + +struct dirty_section { + struct cf_section *sec; + void *ptr; +}; + +#define GBUF_TYPE struct dirty_section +#define GBUF_PREFIX(x) dirtsec_##x +#include + +/* Configuration context */ + +struct cf_context { + struct mempool *pool; + int is_active; + int config_loaded; // at least one config file was loaded + struct cf_parser_state *parser; + uint everything_committed; // did we already commit each section? + uint postpone_commit; // counter of calls to cf_open_group() + uint other_options; // used internally by cf_getopt() + clist conf_entries; // files/strings to reload + struct cf_journal_item *journal; // journalling + int enable_journal; + struct old_pools *pools; + struct item_stack stack[MAX_STACK_SIZE]; // interpreter stack + uint stack_level; + struct cf_section sections; // root section + uint sections_initialized; + dirtsec_t dirty; // dirty sections + uint dirties; +}; + +/* conf-ctxt.c */ +static inline struct cf_context *cf_get_context(void) +{ + struct cf_context *cc = ucwlib_thread_context()->cf_context; + ASSERT(cc->is_active); + return cc; +} + +// In fact, this is equivalent to cf_get_context(), but it is not inlined, +// because we want to force the linker to include conf-context.c, which contains +// a constructor of the whole context mechanism. +struct cf_context *cf_obtain_context(void); + +/* conf-intr.c */ +#define OP_MASK 0xff // only get the operation +#define OP_OPEN 0x100 // here we only get an opening brace instead of parameters +#define OP_1ST 0x200 // in the 1st phase selectors are recorded into the mask +#define OP_2ND 0x400 // in the 2nd phase real data are entered +enum cf_operation; +extern char *cf_op_names[]; +extern char *cf_type_names[]; + +uint cf_type_size(enum cf_type type, const union cf_union *u); +char *cf_interpret_line(struct cf_context *cc, char *name, enum cf_operation op, int number, char **pars); +void cf_init_stack(struct cf_context *cc); +int cf_done_stack(struct cf_context *cc); + +/* conf-journal.c */ +void cf_journal_swap(void); +void cf_journal_delete(void); + +/* conf-section.c */ +#define SEC_FLAG_DYNAMIC 0x80000000 // contains a dynamic attribute +#define SEC_FLAG_UNKNOWN 0x40000000 // ignore unknown entriies +#define SEC_FLAG_CANT_COPY 0x20000000 // contains lists or parsers +#define SEC_FLAG_NUMBER 0x0fffffff // number of entries +enum cf_commit_mode { CF_NO_COMMIT, CF_COMMIT, CF_COMMIT_ALL }; +extern struct cf_section cf_sections; + +struct cf_item *cf_find_subitem(struct cf_section *sec, const char *name); +int cf_commit_all(enum cf_commit_mode cm); +void cf_add_dirty(struct cf_section *sec, void *ptr); + +/* conf-getopt.c */ +void cf_load_default(struct cf_context *cc); + +#endif diff --git a/libucw/ucw/conf-intr.c b/libucw/ucw/conf-intr.c new file mode 100644 index 0000000..b512b10 --- /dev/null +++ b/libucw/ucw/conf-intr.c @@ -0,0 +1,665 @@ +/* + * UCW Library -- Configuration files: interpreter + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2014 Martin Mares + * (c) 2014 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define TRY(f) do { char *_msg = f; if (_msg) return _msg; } while (0) + +/* Register size of and parser for each basic type */ + +static char * +cf_parse_string(char *str, char **ptr) +{ + *ptr = cf_strdup(str); + return NULL; +} + +typedef char *cf_basic_parser(char *str, void *ptr); +static struct { + uint size; + void *parser; +} parsers[] = { + { sizeof(int), cf_parse_int }, + { sizeof(u64), cf_parse_u64 }, + { sizeof(double), cf_parse_double }, + { sizeof(u32), cf_parse_ip }, + { sizeof(char*), cf_parse_string }, + { sizeof(int), NULL }, // lookups are parsed extra + { 0, NULL }, // user-defined types are parsed extra +}; + +inline uint +cf_type_size(enum cf_type type, const union cf_union *u) +{ + switch (type) + { + case CT_USER: + return u->utype->size; + case CT_XTYPE: + return u->xtype->size; + default: + ASSERT(type < ARRAY_SIZE(parsers) - 1); + return parsers[type].size; + } +} + +static char * +cf_parse_lookup(char *str, int *ptr, const char * const *t) +{ + const char * const *n = t; + uint total_len = 0; + while (*n && strcasecmp(*n, str)) { + total_len += strlen(*n) + 2; + n++; + } + if (*n) { + *ptr = n - t; + return NULL; + } + char *err = cf_malloc(total_len + strlen(str) + 60), *c = err; + c += sprintf(err, "Invalid value %s, possible values are: ", str); + for (n=t; *n; n++) + c+= sprintf(c, "%s, ", *n); + if (*t) + c[-2] = 0; + *ptr = -1; + return err; +} + +static char * +cf_parse_ary(uint number, char **pars, void *ptr, enum cf_type type, union cf_union *u) +{ + for (uint i=0; ilookup); + else if (type == CT_USER) + msg = u->utype->parser(pars[i], ptr + i * size); + else if (type == CT_XTYPE) + msg = (char *)u->xtype->parse(pars[i], ptr + i * size, cf_get_pool()); + else + ASSERT(0); + if (msg) + return number > 1 ? cf_printf("Item %d: %s", i+1, msg) : msg; + } + return NULL; +} + +/* Interpreter */ + +#define T(x) #x, +char *cf_op_names[] = { CF_OPERATIONS }; +#undef T +char *cf_type_names[] = { "int", "u64", "double", "ip", "string", "lookup", "user", "xtype" }; + +static char * +interpret_set_dynamic(struct cf_item *item, int number, char **pars, void **ptr) +{ + enum cf_type type = item->type; + uint size = cf_type_size(type, &item->u); + cf_journal_block(ptr, sizeof(void*)); + // boundary checks done by the caller + *ptr = gary_init(size, number, mp_get_allocator(cf_get_pool())); + return cf_parse_ary(number, pars, *ptr, type, &item->u); +} + +static char * +interpret_add_dynamic(struct cf_item *item, int number, char **pars, int *processed, void **ptr, enum cf_operation op) +{ + enum cf_type type = item->type; + void *old_p = *ptr; + uint size = cf_type_size(item->type, &item->u); + ASSERT(size >= sizeof(uint)); + int old_nr = old_p ? GARY_SIZE(old_p) : 0; + int taken = MIN(number, ABS(item->number)-old_nr); + *processed = taken; + // stretch the dynamic array + void *new_p = gary_init(size, old_nr + taken, mp_get_allocator(cf_get_pool())); + cf_journal_block(ptr, sizeof(void*)); + *ptr = new_p; + if (op == OP_APPEND) { + memcpy(new_p, old_p, old_nr * size); + return cf_parse_ary(taken, pars, new_p + old_nr * size, type, &item->u); + } else if (op == OP_PREPEND) { + memcpy(new_p + taken * size, old_p, old_nr * size); + return cf_parse_ary(taken, pars, new_p, type, &item->u); + } else + return cf_printf("Dynamic arrays do not support operation %s", cf_op_names[op]); +} + +static char *interpret_set_item(struct cf_item *item, int number, char **pars, int *processed, void *ptr, uint allow_dynamic); + +static char * +interpret_section(struct cf_section *sec, int number, char **pars, int *processed, void *ptr, uint allow_dynamic) +{ + cf_add_dirty(sec, ptr); + *processed = 0; + for (struct cf_item *ci=sec->cfg; ci->cls; ci++) + { + int taken = 0; // assignment only to silence false positive warnings in some compilers about possibly uninitialized variable + char *msg = interpret_set_item(ci, number, pars, &taken, ptr + (uintptr_t) ci->ptr, allow_dynamic && !ci[1].cls); + if (msg) + return cf_printf("Item %s: %s", ci->name, msg); + *processed += taken; + number -= taken; + pars += taken; + if (!number) // stop parsing, because many parsers would otherwise complain that number==0 + break; + } + return NULL; +} + +static void +add_to_list(cnode *where, cnode *new_node, enum cf_operation op) +{ + switch (op) + { + case OP_EDIT: // editation has been done in-place + break; + case OP_REMOVE: + CF_JOURNAL_VAR(where->prev->next); + CF_JOURNAL_VAR(where->next->prev); + clist_remove(where); + break; + case OP_AFTER: // implementation dependent (prepend_head = after(list)), and where==list, see clists.h:74 + case OP_PREPEND: + case OP_COPY: + CF_JOURNAL_VAR(where->next->prev); + CF_JOURNAL_VAR(where->next); + clist_insert_after(new_node, where); + break; + case OP_BEFORE: // implementation dependent (append_tail = before(list)) + case OP_APPEND: + case OP_SET: + CF_JOURNAL_VAR(where->prev->next); + CF_JOURNAL_VAR(where->prev); + clist_insert_before(new_node, where); + break; + default: + ASSERT(0); + } +} + +static char * +interpret_add_list(struct cf_item *item, int number, char **pars, int *processed, void *ptr, enum cf_operation op) +{ + if (op >= OP_REMOVE) + return cf_printf("You have to open a block for operation %s", cf_op_names[op]); + if (!number) + return "Nothing to add to the list"; + struct cf_section *sec = item->u.sec; + *processed = 0; + uint index = 0; + while (number > 0) + { + void *node = cf_malloc(sec->size); + cf_init_section(item->name, sec, node, 1); + add_to_list(ptr, node, op); + int taken; + /* If the node contains any dynamic attribute at the end, we suppress + * auto-repetition here and pass the flag inside instead. */ + index++; + char *msg = interpret_section(sec, number, pars, &taken, node, sec->flags & SEC_FLAG_DYNAMIC); + if (msg) + return sec->flags & SEC_FLAG_DYNAMIC ? msg : cf_printf("Node %d of list %s: %s", index, item->name, msg); + *processed += taken; + number -= taken; + pars += taken; + if (sec->flags & SEC_FLAG_DYNAMIC) + break; + } + return NULL; +} + +static char * +interpret_add_bitmap(struct cf_item *item, int number, char **pars, int *processed, u32 *ptr, enum cf_operation op) +{ + if (op == OP_PREPEND || op == OP_APPEND) + op = OP_SET; + if (op != OP_SET && op != OP_REMOVE) + return cf_printf("Cannot apply operation %s on a bitmap", cf_op_names[op]); + else if (item->type != CT_INT && item->type != CT_LOOKUP) + return cf_printf("Type %s cannot be used with bitmaps", cf_type_names[item->type]); + cf_journal_block(ptr, sizeof(u32)); + for (int i=0; itype == CT_INT) + TRY( cf_parse_int(pars[i], &idx) ); + else + TRY( cf_parse_lookup(pars[i], &idx, item->u.lookup) ); + if (idx >= 32) + return "Bitmaps only have 32 bits"; + if (op == OP_SET) + *ptr |= 1<cls) + { + case CC_STATIC: + if (!number) + return "Missing value"; + taken = MIN(number, item->number); + *processed = taken; + uint size = cf_type_size(item->type, &item->u); + cf_journal_block(ptr, taken * size); + return cf_parse_ary(taken, pars, ptr, item->type, &item->u); + case CC_DYNAMIC: + if (!allow_dynamic) + return "Dynamic array cannot be used here"; + taken = MIN(number, ABS(item->number)); + *processed = taken; + return interpret_set_dynamic(item, taken, pars, ptr); + case CC_PARSER: + if (item->number < 0 && !allow_dynamic) + return "Parsers with variable number of parameters cannot be used here"; + if (item->number > 0 && number < item->number) + return "Not enough parameters available for the parser"; + taken = MIN(number, ABS(item->number)); + *processed = taken; + for (int i=0; iu.par(taken, pars, ptr); + case CC_SECTION: + return interpret_section(item->u.sec, number, pars, processed, ptr, allow_dynamic); + case CC_LIST: + if (!allow_dynamic) + return "Lists cannot be used here"; + return interpret_add_list(item, number, pars, processed, ptr, OP_SET); + case CC_BITMAP: + if (!allow_dynamic) + return "Bitmaps cannot be used here"; + return interpret_add_bitmap(item, number, pars, processed, ptr, OP_SET); + default: + ASSERT(0); + } +} + +static char * +interpret_set_all(struct cf_item *item, void *ptr, enum cf_operation op) +{ + if (item->cls == CC_BITMAP) { + cf_journal_block(ptr, sizeof(u32)); + if (op == OP_CLEAR) + * (u32*) ptr = 0; + else + if (item->type == CT_INT) + * (u32*) ptr = ~0u; + else { + uint nr = -1; + while (item->u.lookup[++nr]); + * (u32*) ptr = ~0u >> (32-nr); + } + return NULL; + } else if (op != OP_CLEAR) + return "The item is not a bitmap"; + + if (item->cls == CC_LIST) { + cf_journal_block(ptr, sizeof(clist)); + clist_init(ptr); + } else if (item->cls == CC_DYNAMIC) { + cf_journal_block(ptr, sizeof(void *)); + * (void**) ptr = GARY_FOREVER_EMPTY; + } else if (item->cls == CC_STATIC && item->type == CT_STRING) { + cf_journal_block(ptr, item->number * sizeof(char*)); + bzero(ptr, item->number * sizeof(char*)); + } else + return "The item is not a list, dynamic array, bitmap, or string"; + return NULL; +} + +static int +cmp_items(void *i1, void *i2, struct cf_item *item) +{ + ASSERT(item->cls == CC_STATIC); + i1 += (uintptr_t) item->ptr; + i2 += (uintptr_t) item->ptr; + if (item->type == CT_STRING) + return strcmp(* (char**) i1, * (char**) i2); + else // all numeric types + return memcmp(i1, i2, cf_type_size(item->type, &item->u)); +} + +static void * +find_list_node(clist *list, void *query, struct cf_section *sec, u32 mask) +{ + CLIST_FOR_EACH(cnode *, n, *list) + { + uint found = 1; + for (uint i=0; i<32; i++) + if (mask & (1<cfg+i)) + { + found = 0; + break; + } + if (found) + return n; + } + return NULL; +} + +static char * +record_selector(struct cf_item *item, struct cf_section *sec, u32 *mask) +{ + uint nr = sec->flags & SEC_FLAG_NUMBER; + if (item >= sec->cfg && item < sec->cfg + nr) // setting an attribute relative to this section + { + uint i = item - sec->cfg; + if (i >= 32) + return "Cannot select list nodes by this attribute"; + if (sec->cfg[i].cls != CC_STATIC) + return "Selection can only be done based on basic attributes"; + *mask |= 1 << i; + } + return NULL; +} + +static char * +opening_brace(struct cf_context *cc, struct cf_item *item, void *ptr, enum cf_operation op) +{ + if (cc->stack_level >= MAX_STACK_SIZE-1) + return "Too many nested sections"; + enum cf_operation pure_op = op & OP_MASK; + cc->stack[++cc->stack_level] = (struct item_stack) { + .sec = NULL, + .base_ptr = NULL, + .op = pure_op, + .list = NULL, + .mask = 0, + .item = NULL, + }; + if (!item) // unknown is ignored; we just need to trace recursion + return NULL; + cc->stack[cc->stack_level].sec = item->u.sec; + if (item->cls == CC_SECTION) + { + if (pure_op != OP_SET) + return "Only SET operation can be used with a section"; + cc->stack[cc->stack_level].base_ptr = ptr; + cc->stack[cc->stack_level].op = OP_EDIT | OP_2ND; // this list operation does nothing + } + else if (item->cls == CC_LIST) + { + cc->stack[cc->stack_level].base_ptr = cf_malloc(item->u.sec->size); + cf_init_section(item->name, item->u.sec, cc->stack[cc->stack_level].base_ptr, 1); + cc->stack[cc->stack_level].list = ptr; + cc->stack[cc->stack_level].item = item; + if (pure_op == OP_ALL) + return "Operation ALL cannot be applied on lists"; + else if (pure_op < OP_REMOVE) { + add_to_list(ptr, cc->stack[cc->stack_level].base_ptr, pure_op); + cc->stack[cc->stack_level].op |= OP_2ND; + } else + cc->stack[cc->stack_level].op |= OP_1ST; + } + else + return "Opening brace can only be used on sections and lists"; + return NULL; +} + +static char * +closing_brace(struct cf_context *cc, struct item_stack *st, enum cf_operation op, int number, char **pars) +{ + if (st->op == OP_CLOSE) // top-level + return "Unmatched } parenthesis"; + if (!st->sec) { // dummy run on unknown section + if (!(op & OP_OPEN)) + cc->stack_level--; + return NULL; + } + enum cf_operation pure_op = st->op & OP_MASK; + if (st->op & OP_1ST) + { + st->list = find_list_node(st->list, st->base_ptr, st->sec, st->mask); + if (!st->list) + return "Cannot find a node matching the query"; + if (pure_op != OP_REMOVE) + { + if (pure_op == OP_EDIT) + st->base_ptr = st->list; + else if (pure_op == OP_AFTER || pure_op == OP_BEFORE) + cf_init_section(st->item->name, st->sec, st->base_ptr, 1); + else if (pure_op == OP_COPY) { + if (st->sec->flags & SEC_FLAG_CANT_COPY) + return cf_printf("Item %s cannot be copied", st->item->name); + memcpy(st->base_ptr, st->list, st->sec->size); // strings and dynamic arrays are shared + if (st->sec->copy) + TRY( st->sec->copy(st->base_ptr, st->list) ); + } else + ASSERT(0); + if (op & OP_OPEN) { // stay at the same recursion level + st->op = (st->op | OP_2ND) & ~OP_1ST; + add_to_list(st->list, st->base_ptr, pure_op); + return NULL; + } + int taken; // parse parameters on 1 line immediately + TRY( interpret_section(st->sec, number, pars, &taken, st->base_ptr, 1) ); + number -= taken; + pars += taken; + // and fall-thru to the 2nd phase + } + add_to_list(st->list, st->base_ptr, pure_op); + } + cc->stack_level--; + if (number) + return "No parameters expected after the }"; + else if (op & OP_OPEN) + return "No { is expected"; + else + return NULL; +} + +static struct cf_item * +find_item(struct cf_section *curr_sec, const char *name, char **msg, void **ptr) +{ + struct cf_context *cc = cf_get_context(); + *msg = NULL; + if (name[0] == '^') // absolute name instead of relative + name++, curr_sec = &cc->sections, *ptr = NULL; + if (!curr_sec) // don't even search in an unknown section + return NULL; + while (1) + { + if (curr_sec != &cc->sections) + cf_add_dirty(curr_sec, *ptr); + char *c = strchr(name, '.'); + if (c) + *c++ = 0; + struct cf_item *ci = cf_find_subitem(curr_sec, name); + if (!ci->cls) + { + if (!(curr_sec->flags & SEC_FLAG_UNKNOWN)) // ignore silently unknown top-level sections and unknown attributes in flagged sections + *msg = cf_printf("Unknown item %s", name); + return NULL; + } + *ptr += (uintptr_t) ci->ptr; + if (!c) + return ci; + if (ci->cls != CC_SECTION) + { + *msg = cf_printf("Item %s is not a section", name); + return NULL; + } + curr_sec = ci->u.sec; + name = c; + } +} + +static char * +interpret_add(char *name, struct cf_item *item, int number, char **pars, int *takenp, void *ptr, enum cf_operation op) +{ + switch (item->cls) { + case CC_DYNAMIC: + return interpret_add_dynamic(item, number, pars, takenp, ptr, op); + case CC_LIST: + return interpret_add_list(item, number, pars, takenp, ptr, op); + case CC_BITMAP: + return interpret_add_bitmap(item, number, pars, takenp, ptr, op); + default: + return cf_printf("Operation %s not supported on attribute %s", cf_op_names[op], name); + } +} + +char * +cf_interpret_line(struct cf_context *cc, char *name, enum cf_operation op, int number, char **pars) +{ + char *msg; + if ((op & OP_MASK) == OP_CLOSE) + return closing_brace(cc, cc->stack+cc->stack_level, op, number, pars); + void *ptr = cc->stack[cc->stack_level].base_ptr; + struct cf_item *item = find_item(cc->stack[cc->stack_level].sec, name, &msg, &ptr); + if (msg) + return msg; + if (cc->stack[cc->stack_level].op & OP_1ST) + TRY( record_selector(item, cc->stack[cc->stack_level].sec, &cc->stack[cc->stack_level].mask) ); + if (op & OP_OPEN) { // the operation will be performed after the closing brace + if (number) + return "Cannot open a block after a parameter has been passed on a line"; + return opening_brace(cc, item, ptr, op); + } + if (!item) // ignored item in an unknown section + return NULL; + op &= OP_MASK; + + int taken = 0; // process as many parameters as possible + switch (op) { + case OP_CLEAR: + case OP_ALL: + msg = interpret_set_all(item, ptr, op); + break; + case OP_SET: + msg = interpret_set_item(item, number, pars, &taken, ptr, 1); + break; + case OP_RESET: + msg = interpret_set_all(item, ptr, OP_CLEAR); + if (!msg) + msg = interpret_add(name, item, number, pars, &taken, ptr, OP_APPEND); + break; + default: + msg = interpret_add(name, item, number, pars, &taken, ptr, op); + } + if (msg) + return msg; + if (taken < number) + return cf_printf("Too many parameters: %d>%d", number, taken); + + return NULL; +} + +char * +cf_find_item(const char *name, struct cf_item *item) +{ + struct cf_context *cc = cf_get_context(); + char *msg; + void *ptr = NULL; + struct cf_item *ci = find_item(&cc->sections, name, &msg, &ptr); + if (msg) + return msg; + if (ci) { + *item = *ci; + item->ptr = ptr; + } else + bzero(item, sizeof(struct cf_item)); + return NULL; +} + +char * +cf_modify_item(struct cf_item *item, enum cf_operation op, int number, char **pars) +{ + char *msg; + int taken = 0; + switch (op) { + case OP_SET: + msg = interpret_set_item(item, number, pars, &taken, item->ptr, 1); + break; + case OP_CLEAR: + case OP_ALL: + msg = interpret_set_all(item, item->ptr, op); + break; + case OP_APPEND: + case OP_PREPEND: + switch (item->cls) { + case CC_DYNAMIC: + msg = interpret_add_dynamic(item, number, pars, &taken, item->ptr, op); + break; + case CC_LIST: + msg = interpret_add_list(item, number, pars, &taken, item->ptr, op); + break; + case CC_BITMAP: + msg = interpret_add_bitmap(item, number, pars, &taken, item->ptr, op); + break; + default: + return "The attribute does not support append/prepend"; + } + break; + case OP_REMOVE: + if (item->cls == CC_BITMAP) + msg = interpret_add_bitmap(item, number, pars, &taken, item->ptr, op); + else + return "Only applicable on bitmaps"; + break; + default: + return "Unsupported operation"; + } + if (msg) + return msg; + if (taken < number) + return "Too many parameters"; + return NULL; +} + +void +cf_init_stack(struct cf_context *cc) +{ + if (!cc->sections_initialized++) { + cc->sections.flags |= SEC_FLAG_UNKNOWN; + cc->sections.size = 0; // size of allocated array used to be stored here + cf_init_section(NULL, &cc->sections, NULL, 0); + } + cc->stack_level = 0; + cc->stack[0] = (struct item_stack) { + .sec = &cc->sections, + .base_ptr = NULL, + .op = OP_CLOSE, + .list = NULL, + .mask = 0, + .item = NULL + }; +} + +int +cf_done_stack(struct cf_context *cc) +{ + return (cc->stack_level > 0); +} diff --git a/libucw/ucw/conf-journal.c b/libucw/ucw/conf-journal.c new file mode 100644 index 0000000..7d3ccdc --- /dev/null +++ b/libucw/ucw/conf-journal.c @@ -0,0 +1,128 @@ +/* + * UCW Library -- Configuration files: journaling + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2012 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include +#include + +#include + +struct old_pools { + struct old_pools *prev; + struct mempool *pool; +}; // link-list of older cf_pool's + +struct cf_journal_item { + struct cf_journal_item *prev; + byte *ptr; + uint len; + byte copy[0]; +}; + +void +cf_set_journalling(int enable) +{ + struct cf_context *cc = cf_get_context(); + ASSERT(!cc->journal); + cc->enable_journal = enable; +} + +void +cf_journal_block(void *ptr, uint len) +{ + struct cf_context *cc = cf_get_context(); + if (!cc->enable_journal) + return; + struct cf_journal_item *ji = cf_malloc(sizeof(struct cf_journal_item) + len); + ji->prev = cc->journal; + ji->ptr = ptr; + ji->len = len; + memcpy(ji->copy, ptr, len); + cc->journal = ji; +} + +void +cf_journal_swap(void) + // swaps the contents of the memory and the journal, and reverses the list +{ + struct cf_context *cc = cf_get_context(); + struct cf_journal_item *curr, *prev, *next; + for (next=NULL, curr=cc->journal; curr; next=curr, curr=prev) + { + prev = curr->prev; + curr->prev = next; + for (uint i=0; ilen; i++) + { + byte x = curr->copy[i]; + curr->copy[i] = curr->ptr[i]; + curr->ptr[i] = x; + } + } + cc->journal = next; +} + +struct cf_journal_item * +cf_journal_new_transaction(uint new_pool) +{ + struct cf_context *cc = cf_get_context(); + if (new_pool) + cc->pool = mp_new(1<<10); + struct cf_journal_item *oldj = cc->journal; + cc->journal = NULL; + return oldj; +} + +void +cf_journal_commit_transaction(uint new_pool, struct cf_journal_item *oldj) +{ + struct cf_context *cc = cf_get_context(); + if (new_pool) + { + struct old_pools *p = cf_malloc(sizeof(struct old_pools)); + p->prev = cc->pools; + p->pool = cc->pool; + cc->pools = p; + } + if (oldj) + { + struct cf_journal_item **j = &cc->journal; + while (*j) + j = &(*j)->prev; + *j = oldj; + } +} + +void +cf_journal_rollback_transaction(uint new_pool, struct cf_journal_item *oldj) +{ + struct cf_context *cc = cf_get_context(); + if (!cc->enable_journal) + return; + cf_journal_swap(); + cc->journal = oldj; + if (new_pool) + { + mp_delete(cc->pool); + cc->pool = cc->pools ? cc->pools->pool : NULL; + } +} + +void +cf_journal_delete(void) +{ + struct cf_context *cc = cf_get_context(); + for (struct old_pools *p=cc->pools; p; p=cc->pools) + { + cc->pools = p->prev; + mp_delete(p->pool); + } +} diff --git a/libucw/ucw/conf-parse.c b/libucw/ucw/conf-parse.c new file mode 100644 index 0000000..fc18504 --- /dev/null +++ b/libucw/ucw/conf-parse.c @@ -0,0 +1,171 @@ +/* + * UCW Library -- Configuration files: parsers for basic types + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2006 Martin Mares + * (c) 2019 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include + +#include +#include +#include + +struct unit { + uint name; // one-letter name of the unit + uint den; // fraction + u64 num; +}; + +static const struct unit units[] = { + { 'd', 1, 86400 }, + { 'h', 1, 3600 }, + { 'k', 1, 1000 }, + { 'm', 1, 1000000 }, + { 'g', 1, 1000000000 }, + { 't', 1, 1000000000000LLU }, + { 'K', 1, 1<<10 }, + { 'M', 1, 1<<20 }, + { 'G', 1, 1<<30 }, + { 'T', 1, 1LLU<<40 }, + { '%', 100, 1 }, + { 0, 0, 0 } +}; + +static const struct unit * +lookup_unit(const char *value, const char *end, char **msg) +{ + if (end && *end) { + if (end == value || end[1] || *end >= '0' && *end <= '9') + *msg = "Invalid number"; + else { + for (const struct unit *u=units; u->name; u++) + if ((char)u->name == *end) + return u; + *msg = "Invalid unit"; + } + } + return NULL; +} + +static char cf_rngerr[] = "Number out of range"; + +char * +cf_parse_int(const char *str, int *ptr) +{ + char *msg = NULL; + if (!*str) + msg = "Missing number"; + else { + const struct unit *u; + char *end; + errno = 0; + uint x = strtoul(str, &end, 0); + if (errno == ERANGE) + msg = cf_rngerr; + else if (u = lookup_unit(str, end, &msg)) { + u64 y = (u64)x * u->num; + if (y % u->den) + msg = "Number is not an integer"; + else { + y /= u->den; + if (y > 0xffffffff) + msg = cf_rngerr; + *ptr = y; + } + } else + *ptr = x; + } + return msg; +} + +char * +cf_parse_u64(const char *str, u64 *ptr) +{ + char *msg = NULL; + if (!*str) + msg = "Missing number"; + else { + const struct unit *u; + char *end; + errno = 0; + u64 x = strtoull(str, &end, 0); + if (errno == ERANGE) + msg = cf_rngerr; + else if (u = lookup_unit(str, end, &msg)) { + if (x > ~(u64)0 / u->num) + msg = "Number out of range"; + else { + x *= u->num; + if (x % u->den) + msg = "Number is not an integer"; + else + *ptr = x / u->den; + } + } else + *ptr = x; + } + return msg; +} + +char * +cf_parse_double(const char *str, double *ptr) +{ + char *msg = NULL; + if (!*str) + msg = "Missing number"; + else { + const struct unit *u; + double x; + uint read_chars; + if (sscanf(str, "%lf%n", &x, &read_chars) != 1) + msg = "Invalid number"; + else if (u = lookup_unit(str, str + read_chars, &msg)) + *ptr = x * u->num / u->den; + else + *ptr = x; + } + return msg; +} + +char * +cf_parse_ip(const char *p, u32 *varp) +{ + if (!*p) + return "Missing IP address"; + uint x = 0; + char *p2; + if (*p == '0' && (p[1] | 32) == 'x' && Cxdigit(p[2])) { + errno = 0; + x = strtoul(p, &p2, 16); + if (errno == ERANGE || x > 0xffffffff) + goto error; + p = p2; + } + else + for (uint i = 0; i < 4; i++) { + if (i) { + if (*p++ != '.') + goto error; + } + if (!Cdigit(*p)) + goto error; + errno = 0; + uint y = strtoul(p, &p2, 10); + if (errno == ERANGE || p2 == (char*) p || y > 255) + goto error; + p = p2; + x = (x << 8) + y; + } + *varp = x; + return *p ? "Trailing characters" : NULL; +error: + return "Invalid IP address"; +} + diff --git a/libucw/ucw/conf-section.c b/libucw/ucw/conf-section.c new file mode 100644 index 0000000..30f5898 --- /dev/null +++ b/libucw/ucw/conf-section.c @@ -0,0 +1,200 @@ +/* + * UCW Library -- Configuration files: sections + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2014 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include +#include +#include + +#include + +/* Dirty sections */ + +void +cf_add_dirty(struct cf_section *sec, void *ptr) +{ + struct cf_context *cc = cf_get_context(); + dirtsec_grow(&cc->dirty, cc->dirties+1); + struct dirty_section *dest = cc->dirty.ptr + cc->dirties; + if (cc->dirties && dest[-1].sec == sec && dest[-1].ptr == ptr) + return; + dest->sec = sec; + dest->ptr = ptr; + cc->dirties++; +} + +#define ASORT_PREFIX(x) dirtsec_##x +#define ASORT_KEY_TYPE struct dirty_section +#define ASORT_LT(x,y) x.sec < y.sec || x.sec == y.sec && x.ptr < y.ptr +#include + +static void +sort_dirty(struct cf_context *cc) +{ + if (cc->dirties <= 1) + return; + dirtsec_sort(cc->dirty.ptr, cc->dirties); + // and compress the list + struct dirty_section *read = cc->dirty.ptr + 1, *write = cc->dirty.ptr + 1, *limit = cc->dirty.ptr + cc->dirties; + while (read < limit) { + if (read->sec != read[-1].sec || read->ptr != read[-1].ptr) { + if (read != write) + *write = *read; + write++; + } + read++; + } + cc->dirties = write - cc->dirty.ptr; +} + +/* Initialization */ + +struct cf_item * +cf_find_subitem(struct cf_section *sec, const char *name) +{ + struct cf_item *ci = sec->cfg; + for (; ci->cls; ci++) + if (!strcasecmp(ci->name, name)) + return ci; + return ci; +} + +static void +inspect_section(struct cf_section *sec) +{ + sec->flags = 0; + struct cf_item *ci; + for (ci=sec->cfg; ci->cls; ci++) + if (ci->cls == CC_SECTION) { + inspect_section(ci->u.sec); + sec->flags |= ci->u.sec->flags & (SEC_FLAG_DYNAMIC | SEC_FLAG_CANT_COPY); + } else if (ci->cls == CC_LIST) { + inspect_section(ci->u.sec); + sec->flags |= SEC_FLAG_DYNAMIC | SEC_FLAG_CANT_COPY; + } else if (ci->cls == CC_DYNAMIC || ci->cls == CC_BITMAP) + sec->flags |= SEC_FLAG_DYNAMIC; + else if (ci->cls == CC_PARSER) { + sec->flags |= SEC_FLAG_CANT_COPY; + if (ci->number < 0) + sec->flags |= SEC_FLAG_DYNAMIC; + } + if (sec->copy) + sec->flags &= ~SEC_FLAG_CANT_COPY; + sec->flags |= ci - sec->cfg; // record the number of entries +} + +void +cf_declare_rel_section(const char *name, struct cf_section *sec, void *ptr, uint allow_unknown) +{ + struct cf_context *cc = cf_obtain_context(); + if (!cc->sections.cfg) + { + cc->sections.size = 50; + cc->sections.cfg = xmalloc_zero(cc->sections.size * sizeof(struct cf_item)); + } + struct cf_item *ci = cf_find_subitem(&cc->sections, name); + if (ci->cls) + die("Cannot register section %s twice", name); + ci->cls = CC_SECTION; + ci->name = name; + ci->number = 1; + ci->ptr = ptr; + ci->u.sec = sec; + inspect_section(sec); + if (allow_unknown) + sec->flags |= SEC_FLAG_UNKNOWN; + ci++; + if (ci - cc->sections.cfg >= (int) cc->sections.size) + { + cc->sections.cfg = xrealloc(cc->sections.cfg, 2*cc->sections.size * sizeof(struct cf_item)); + bzero(cc->sections.cfg + cc->sections.size, cc->sections.size * sizeof(struct cf_item)); + cc->sections.size *= 2; + } +} + +void +cf_declare_section(const char *name, struct cf_section *sec, uint allow_unknown) +{ + cf_declare_rel_section(name, sec, NULL, allow_unknown); +} + +void +cf_init_section(const char *name, struct cf_section *sec, void *ptr, uint do_bzero) +{ + if (do_bzero) { + ASSERT(sec->size); + bzero(ptr, sec->size); + } + for (struct cf_item *ci=sec->cfg; ci->cls; ci++) + if (ci->cls == CC_SECTION) + cf_init_section(ci->name, ci->u.sec, ptr + (uintptr_t) ci->ptr, 0); + else if (ci->cls == CC_LIST) + clist_init(ptr + (uintptr_t) ci->ptr); + else if (ci->cls == CC_DYNAMIC) { + void **dyn = ptr + (uintptr_t) ci->ptr; + if (!*dyn) // replace NULL by an empty array + *dyn = GARY_FOREVER_EMPTY; + } + if (sec->init) { + char *msg = sec->init(ptr); + if (msg) + die("Cannot initialize section %s: %s", name, msg); + } +} + +static char * +commit_section(struct cf_section *sec, void *ptr, uint commit_all) +{ + struct cf_context *cc = cf_get_context(); + char *err; + + for (struct cf_item *ci=sec->cfg; ci->cls; ci++) + if (ci->cls == CC_SECTION) { + if ((err = commit_section(ci->u.sec, ptr + (uintptr_t) ci->ptr, commit_all))) { + msg(L_ERROR, "Cannot commit section %s: %s", ci->name, err); + return "commit of a subsection failed"; + } + } else if (ci->cls == CC_LIST) { + uint idx = 0; + CLIST_FOR_EACH(cnode *, n, * (clist*) (ptr + (uintptr_t) ci->ptr)) + if (idx++, err = commit_section(ci->u.sec, n, commit_all)) { + msg(L_ERROR, "Cannot commit node #%d of list %s: %s", idx, ci->name, err); + return "commit of a list failed"; + } + } + if (sec->commit) { + /* We have to process the whole tree of sections even if just a few changes + * have been made, because there are dependencies between commit-hooks and + * hence we need to call them in a fixed order. */ +#define ARY_LT_X(ary,i,x) ary[i].sec < x.sec || ary[i].sec == x.sec && ary[i].ptr < x.ptr + struct dirty_section comp = { sec, ptr }; + uint pos = BIN_SEARCH_FIRST_GE_CMP(cc->dirty.ptr, cc->dirties, comp, ARY_LT_X); + + if (commit_all + || (pos < cc->dirties && cc->dirty.ptr[pos].sec == sec && cc->dirty.ptr[pos].ptr == ptr)) + return sec->commit(ptr); + } + return 0; +} + +int +cf_commit_all(enum cf_commit_mode cm) +{ + struct cf_context *cc = cf_get_context(); + sort_dirty(cc); + if (cm == CF_NO_COMMIT) + return 0; + if (commit_section(&cc->sections, NULL, cm == CF_COMMIT_ALL)) + return 1; + cc->dirties = 0; + return 0; +} diff --git a/libucw/ucw/conf-test.c b/libucw/ucw/conf-test.c new file mode 100644 index 0000000..8ec5dd9 --- /dev/null +++ b/libucw/ucw/conf-test.c @@ -0,0 +1,260 @@ +/* + * Insane tester of reading configuration files + * + * (c) 2006 Robert Spalek + * (c) 2012--2014 Martin Mares + * (c) 2014 Pavel Charvat + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int verbose; +static int reload; + +struct sub_sect_1 { + cnode n; + char *name; + time_t t; + char *level; + int confidence[2]; + double *list; +}; + +static struct sub_sect_1 sec1 = { {}, "Charlie", 0, "WBAFC", { 0, -1}, NULL }; + +static char * +init_sec_1(struct sub_sect_1 *s) +{ + if (s == &sec1) // this is a static variable; skip clearing + return NULL; + s->name = "unknown"; + s->level = "default"; + s->confidence[0] = 5; + s->confidence[1] = 6; + // leave s->list==NULL + return NULL; +} + +static char * +commit_sec_1(struct sub_sect_1 *s) +{ + if (s->confidence[0] < 0 || s->confidence[0] > 10) + return "Well, this can't be"; + return NULL; +} + +static char * +time_parser(uint number, char **pars, time_t *ptr) +{ + *ptr = number ? atoi(pars[0]) : time(NULL); + return NULL; +} + +static struct cf_section cf_sec_1 = { + CF_TYPE(struct sub_sect_1), + CF_INIT(init_sec_1), + CF_COMMIT(commit_sec_1), +#define F(x) PTR_TO(struct sub_sect_1, x) + CF_ITEMS { + CF_STRING("name", F(name)), + //CF_PARSER("t", F(t), time_parser, 0), + CF_STRING("level", F(level)), + CF_INT_ARY("confidence", F(confidence[0]), 2), // XXX: the [0] is needed for the sake of type checking + CF_DOUBLE_DYN("list", F(list), 100), + CF_END + } +#undef F +}; + +static uint nr1 = 15; +static int *nrs1; +static int nrs2[5]; +static char *str1 = "no worries"; +static char **str2; +static u64 u1 = 0xCafeBeefDeadC00ll; +static double d1 = -1.1; +static clist secs; +static time_t t1, t2; +static u32 ip; +static int *look; +static u16 numbers[10] = { 2, 100, 1, 5 }; +static u32 bitmap1 = 0xff; +static u32 bitmap2 = 3; +static intmax_t intmax; + +static char * +parse_u16(char *string, u16 *ptr) +{ + uint a; + char *msg = cf_parse_int(string, &a); + if (msg) + return msg; + if (a >= (1<<16)) + return "Come on, man, this doesn't fit to 16 bits"; + *ptr = a; + return NULL; +} + +static void +dump_u16(struct fastbuf *fb, u16 *ptr) +{ + bprintf(fb, "%d ", *ptr); +} + +static struct cf_user_type u16_type = { + .size = sizeof(u16), + .name = "u16", + .parser = (cf_parser1*) parse_u16, + .dumper = (cf_dumper1*) dump_u16 +}; + +static char * +init_top(void *ptr UNUSED) +{ + for (uint i=0; i<5; i++) + { + struct sub_sect_1 *s = xmalloc(sizeof(struct sub_sect_1)); // XXX: cannot by cf_malloc(), because it's deleted when cf_reload()'ed + cf_init_section("slaves", &cf_sec_1, s, 1); + s->confidence[1] = i; + clist_add_tail(&secs, &s->n); + } + return NULL; +} + +static char * +commit_top(void *ptr UNUSED) +{ + if (nr1 != 15) + return "Don't touch my variable!"; + return NULL; +} + +static const char * const alphabet[] = { "alpha", "beta", "gamma", "delta", NULL }; +static struct cf_section cf_top = { + CF_INIT(init_top), + CF_COMMIT(commit_top), + CF_ITEMS { + CF_UINT("nr1", &nr1), + CF_INT_DYN("nrs1", &nrs1, 1000), + CF_INT_ARY("nrs2", nrs2, 5), + CF_STRING("str1", &str1), + CF_STRING_DYN("str2", &str2, 20), + CF_U64("u1", &u1), + CF_DOUBLE("d1", &d1), + CF_PARSER("FirstTime", &t1, time_parser, -1), + CF_PARSER("SecondTime", &t2, time_parser, 1), + CF_SECTION("master", &sec1, &cf_sec_1), + CF_LIST("slaves", &secs, &cf_sec_1), + CF_IP("ip", &ip), + CF_LOOKUP_DYN("look", &look, alphabet, 1000), + CF_USER_ARY("numbers", numbers, &u16_type, 10), + CF_XTYPE("intmax", &intmax, &xt_intmax), + CF_BITMAP_INT("bitmap1", &bitmap1), + CF_BITMAP_LOOKUP("bitmap2", &bitmap2, ((const char* const[]) { + "one", "two", "three", "four", "five", "six", "seven", "eight", + "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "seventeen", + "eighteen", "nineteen", "twenty", NULL // hidden joke here + })), + CF_END + } +}; + +static char short_opts[] = CF_SHORT_OPTS "rv"; +static struct option long_opts[] = { + CF_LONG_OPTS + {"reload", 0, 0, 'r'}, + {"verbose", 0, 0, 'v'}, + {NULL, 0, 0, 0} +}; + +static char *help = "\ +Usage: conf-test [ctxt] [nojournal] \n\ +\n\ +Options:\n" CF_USAGE "\ +-r, --reload\t\tReload configuration\n\ +-v, --verbose\t\tBe verbose\n\ +"; + +static void NONRET +usage(char *msg, ...) +{ + va_list va; + va_start(va, msg); + if (msg) + vfprintf(stderr, msg, va); + fputs(help, stderr); + exit(1); +} + +int +main(int argc, char *argv[]) +{ + log_init(argv[0]); + struct cf_context *cc = NULL, *prev = NULL; + + // Special arguments which have to be parsed before cf_getopt() + while (argc > 1) { + if (!strcmp(argv[1], "ctxt")) { + cc = cf_new_context(); + prev = cf_switch_context(cc); + argc--, argv++; + } else if (!strcmp(argv[1], "nojournal")) { + cf_set_journalling(0); + argc--, argv++; + } else + break; + } + + cf_declare_section("top", &cf_top, 0); + cf_def_file = "ucw/conf-test.cf"; + + // Create and initialize dynamic arrays + GARY_INIT(nrs1, 6); + memcpy(nrs1, (int []) { 5, 5, 4, 3, 2, 1 }, 6 * sizeof(int)); + GARY_INIT(str2, 2); + str2[0] = "Alice"; + str2[1] = "Bob"; + GARY_INIT(look, 2); + look[0] = 2; + look[1] = 1; + GARY_INIT(sec1.list, 3); + memcpy(sec1.list, (double []) { 1e4, -1e-4, 8 }, 3 * sizeof(double)); + + int opt; + while ((opt = cf_getopt(argc, argv, short_opts, long_opts, NULL)) >= 0) + switch (opt) { + case 'r': reload++; break; + case 'v': verbose++; break; + default: usage("unknown option %c\n", opt); + } + if (optind < argc) + usage("too many parameters (%d more)\n", argc-optind); + + if (reload) { + cf_reload(NULL); + cf_reload(NULL); + } + + if (verbose) { + struct fastbuf *out = bfdopen(1, 1<<14); + cf_dump_sections(out); + bclose(out); + } + + if (cc) { + cf_switch_context(prev); + cf_delete_context(cc); + } + + return 0; +} diff --git a/libucw/ucw/conf-test.cf b/libucw/ucw/conf-test.cf new file mode 100644 index 0000000..33a9643 --- /dev/null +++ b/libucw/ucw/conf-test.cf @@ -0,0 +1,52 @@ +# test config file +#include ucw/conf-test.t ; top.xa=1 +#include 'non-existent file'; #top.xa=1 +Top { + nr1=16 #!!! + nrs1 2 3 5 \ + 7 11 13 \ + \ + 17M + nrs2 3 3k 3 3 3 ; \ + str1 "hello,\t\x2bworld%%\n" + str2 'Hagenuk, + the best' "\ + " qu'est-ce que c'est? + u1 0xbadcafebadbeefc0 + str2:prepend prepended + str2:append appended + d1 7% + d1 -1.14e-25 + firsttime ; secondtime 56 + ^top.master:set alice HB8+ + slaves:clear + ip 0xa + ip 195.113.31.123 + look Alpha + look:prepend Beta GAMMA + numbers 11000 65535 + bitmap1 31 + bitmap1:remove 3 3 + bitmap2:all + bitmap2:remove eleven twelve one + intmax 1000000000 +};;;;;; + +unknown.ignored :-) + +top.slaves:reset cairns gpua 7 7 -10% +10% +top.slaves daintree rafc 4 5 -171% +top.slaves coogee pum 9 8 +top.slaves:prepend {name=bondi; level=\ + "PUG"; confidence 10 10} +top.slaves:remove {name daintree} +top.slaveS:edit {level PUG} Bondi PUG! +top.slaveS:before {level pum}{ + confidence 2 + list 123 456 789 +} +top.slaves:copy {name coogee} Coogee2 PUM + +topp.a=15 +top.nr1= ' 15' +a { ;-D } diff --git a/libucw/ucw/conf.h b/libucw/ucw/conf.h new file mode 100644 index 0000000..acd7460 --- /dev/null +++ b/libucw/ucw/conf.h @@ -0,0 +1,633 @@ +/* + * UCW Library -- Configuration files + * + * (c) 2001--2006 Robert Spalek + * (c) 2003--2014 Martin Mares + * (c) 2014 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_CONF_H +#define _UCW_CONF_H + +#include +#include + +#ifdef CONFIG_UCW_CLEAN_ABI +#define cf_close_group ucw_cf_close_group +#define cf_declare_rel_section ucw_cf_declare_rel_section +#define cf_declare_section ucw_cf_declare_section +#define cf_delete_context ucw_cf_delete_context +#define cf_dump_sections ucw_cf_dump_sections +#define cf_find_item ucw_cf_find_item +#define cf_get_pool ucw_cf_get_pool +#define cf_init_section ucw_cf_init_section +#define cf_journal_block ucw_cf_journal_block +#define cf_journal_commit_transaction ucw_cf_journal_commit_transaction +#define cf_journal_new_transaction ucw_cf_journal_new_transaction +#define cf_journal_rollback_transaction ucw_cf_journal_rollback_transaction +#define cf_load ucw_cf_load +#define cf_malloc ucw_cf_malloc +#define cf_malloc_zero ucw_cf_malloc_zero +#define cf_modify_item ucw_cf_modify_item +#define cf_new_context ucw_cf_new_context +#define cf_open_group ucw_cf_open_group +#define cf_parse_double ucw_cf_parse_double +#define cf_parse_int ucw_cf_parse_int +#define cf_parse_ip ucw_cf_parse_ip +#define cf_parse_u64 ucw_cf_parse_u64 +#define cf_printf ucw_cf_printf +#define cf_reload ucw_cf_reload +#define cf_revert ucw_cf_revert +#define cf_set ucw_cf_set +#define cf_set_journalling ucw_cf_set_journalling +#define cf_strdup ucw_cf_strdup +#define cf_switch_context ucw_cf_switch_context +#endif + +struct mempool; + +/*** + * [[conf_ctxt]] + * Configuration contexts + * ~~~~~~~~~~~~~~~~~~~~~~ + * + * The state of the configuration parser is stored within a configuration context. + * One such context is automatically created during initialization of the library + * and you need not care about more, as long as you use a single configuration file. + * + * In full generality, you can define as many contexts as you wish and switch + * between them. Each thread has its own pointer to the current context, which + * must not be shared with other threads. + ***/ + +/** Create a new configuration context. **/ +struct cf_context *cf_new_context(void); + +/** + * Free a configuration context. The context must not be set as current + * for any thread, nor can it be the default context. + * + * All configuration settings made within the context are rolled back + * (except when journalling is turned off). All memory allocated on behalf + * of the context is freed, which includes memory obtained by calls to + * @cf_malloc(). + **/ +void cf_delete_context(struct cf_context *cc); + +/** + * Make the given configuration context current and return the previously + * active context. Both the new and the old context may be NULL. + **/ +struct cf_context *cf_switch_context(struct cf_context *cc); + +/*** + * [[conf_load]] + * Safe configuration loading + * ~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * These functions can be used to to safely load or reload configuration. + */ + +/** + * Load configuration from @file. + * Returns a non-zero value upon error. In that case, all changes to the + * configuration specified in the file are undone. + **/ +int cf_load(const char *file); + +/** + * Reload configuration from @file, replace the old one. + * If @file is NULL, reload all loaded configuration files and re-apply + * bits of configuration passed to @cf_set(). + * Returns a non-zero value upon error. In that case, all configuration + * settings are rolled back to the state before calling this function. + **/ +int cf_reload(const char *file); + +/** + * Parse some part of configuration passed in @string. + * The syntax is the same as in the <>. + * Returns a non-zero value upon error. In that case, all changes to the + * configuration specified by the already executed parts of the string + * are undone. + **/ +int cf_set(const char *string); + +/** + * Sometimes, the configuration is split to multiple files and when only + * some of the are loaded, the settings are not consistent -- for example, + * they might have been rejected by a commit hook, because a mandatory setting + * is missing. + * + * This function opens a configuration group, in which multiple files can be + * loaded and all commit hooks are deferred until the group is closed. + **/ +void cf_open_group(void); + +/** + * Close a group opened by @cf_open_group(). Returns a non-zero value upon error, + * which usually means that a commit hook has failed. + **/ +int cf_close_group(void); + +/** + * Return all configuration items to their initial state before loading the + * configuration file. If journalling is disabled, it does nothing. + **/ +void cf_revert(void); + +/*** === Data types [[conf_types]] ***/ + +enum cf_class { /** Class of the configuration item. **/ + CC_END, // end of list + CC_STATIC, // single variable or static array + CC_DYNAMIC, // dynamically allocated array + CC_PARSER, // arbitrary parser function + CC_SECTION, // section appears exactly once + CC_LIST, // list with 0..many nodes + CC_BITMAP // of up to 32 items +}; + +enum cf_type { /** Type of a single value. **/ + CT_INT, CT_U64, CT_DOUBLE, // number types + CT_IP, // IP address + CT_STRING, // string type + CT_LOOKUP, // in a string table + CT_USER, // user-defined type (obsolete) + CT_XTYPE // extended type +}; + +struct fastbuf; + +/** + * A parser function gets an array of (strdup'ed) strings and a pointer with + * the customized information (most likely the target address). It can store + * the parsed value anywhere in any way it likes, however it must first call + * @cf_journal_block() on the overwritten memory block. It returns an error + * message or NULL if everything is all right. + **/ +typedef char *cf_parser(uint number, char **pars, void *ptr); +/** + * A parser function for user-defined types gets a string and a pointer to + * the destination variable. It must store the value within [ptr,ptr+size), + * where size is fixed for each type. It should not call @cf_journal_block(). + **/ +typedef char *cf_parser1(char *string, void *ptr); +/** + * An init- or commit-hook gets a pointer to the section or NULL if this + * is the global section. It returns an error message or NULL if everything + * is all right. The init-hook should fill in default values (needed for + * dynamically allocated nodes of link lists or for filling global variables + * that are run-time dependent). The commit-hook should perform sanity + * checks and postprocess the parsed values. Commit-hooks must call + * @cf_journal_block() too. Caveat! init-hooks for static sections must not + * use @cf_malloc() but normal <>. + **/ +typedef char *cf_hook(void *ptr); +/** + * Dumps the contents of a variable of a user-defined type. + **/ +typedef void cf_dumper1(struct fastbuf *fb, void *ptr); +/** + * Similar to init-hook, but it copies attributes from another list node + * instead of setting the attributes to default values. You have to provide + * it if your node contains parsed values and/or sub-lists. + **/ +typedef char *cf_copier(void *dest, void *src); + +struct cf_user_type { /** Structure to store information about user-defined variable type. **/ + uint size; // of the parsed attribute + char *name; // name of the type (for dumping) + cf_parser1 *parser; // how to parse it + cf_dumper1 *dumper; // how to dump the type +}; + +struct cf_section; +struct cf_item { /** Single configuration item. **/ + const char *name; // case insensitive + int number; // length of an array or #parameters of a parser (negative means at most) + void *ptr; // pointer to a global variable or an offset in a section + union cf_union { + struct cf_section *sec; // declaration of a section or a list + cf_parser *par; // parser function + const char * const *lookup; // NULL-terminated sequence of allowed strings for lookups + struct cf_user_type *utype; // specification of the user-defined type (obsolete) + const struct xtype *xtype; // specification of the extended type + } u; + enum cf_class cls:16; // attribute class + enum cf_type type:16; // type of a static or dynamic attribute +}; + +struct cf_section { /** A section. **/ + uint size; // 0 for a global block, sizeof(struct) for a section + cf_hook *init; // fills in default values (no need to bzero) + cf_hook *commit; // verifies parsed data (optional) + cf_copier *copy; // copies values from another instance (optional, no need to copy basic attributes) + struct cf_item *cfg; // CC_END-terminated array of items + uint flags; // for internal use only +}; + +/*** + * [[conf_macros]] + * Convenience macros + * ~~~~~~~~~~~~~~~~~~ + * + * You could create the structures manually, but you can use these macros to + * save some typing. + */ + +/*** + * Declaration of <> + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + * + * These macros can be used to configure the <> + * structure. + ***/ + +/** + * Data type of a section. + * If you store the section into a structure, use this macro. + * + * Storing a section into a structure is useful mostly when you may have multiple instances of the + * section (eg. <>). + * + * Example: + * + * struct list_node { + * cnode n; // This one is for the list itself + * char *name; + * uint value; + * }; + * + * static struct clist nodes; + * + * static struct cf_section node = { + * CF_TYPE(struct list_node), + * CF_ITEMS { + * CF_STRING("name", PTR_TO(struct list_node, name)), + * CF_UINT("value", PTR_TO(struct list_node, value)), + * CF_END + * } + * }; + * + * static struct cf_section section = { + * CF_LIST("node", &nodes, &node), + * CF_END + * }; + * + * You could use <> or <> + * macros to create arrays. + */ +#define CF_TYPE(s) .size = sizeof(s) +/** + * An init <>. + * You can use this to initialize dynamically allocated items (for a dynamic array or list). + * The hook returns an error message or NULL if everything was OK. + */ +#define CF_INIT(f) .init = (cf_hook*) f +/** + * A commit <>. + * You can use this one to check sanity of loaded data and postprocess them. + * You must call @cf_journal_block() if you change anything. + * + * Return error message or NULL if everything went OK. + **/ +#define CF_COMMIT(f) .commit = (cf_hook*) f +/** + * A <>. + * You need to provide one for too complicated sections where a memcpy is not + * enough to copy it properly. It happens, for example, when you have a dynamically + * allocated section containing a list of other sections. + * + * You return an error message or NULL if you succeed. + **/ +#define CF_COPY(f) .copy = (cf_copier*) f /** **/ +#define CF_ITEMS .flags = 0, .cfg = ( struct cf_item[] ) /** List of sub-items. **/ +#define CF_END { .cls = CC_END } /** End of the structure. **/ +/*** + * Declaration of a configuration item + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + * + * Each of these describe single <>. They are mostly + * for internal use, do not use them directly unless you really know what you are doing. + ***/ + +/** + * Static array of items. + * Expects you to allocate the memory and provide pointer to it. + **/ +#define CF_STATIC(n,p,T,t,c) { .cls = CC_STATIC, .type = CT_##T, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,t*) } +/** + * Dynamic array of items. + * Expects you to provide pointer to your pointer to data and it will allocate new memory for it + * and set your pointer to it. + **/ +#define CF_DYNAMIC(n,p,T,t,c) { .cls = CC_DYNAMIC, .type = CT_##T, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,t**) } +#define CF_PARSER(n,p,f,c) { .cls = CC_PARSER, .name = n, .number = c, .ptr = p, .u.par = (cf_parser*) f } /** A low-level parser. **/ +#define CF_SECTION(n,p,s) { .cls = CC_SECTION, .name = n, .number = 1, .ptr = p, .u.sec = s } /** A sub-section. **/ +#define CF_LIST(n,p,s) { .cls = CC_LIST, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,clist*), .u.sec = s } /** A list with sub-items. **/ +#define CF_BITMAP_INT(n,p) { .cls = CC_BITMAP, .type = CT_INT, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,u32*) } /** A bitmap. **/ +#define CF_BITMAP_LOOKUP(n,p,t) { .cls = CC_BITMAP, .type = CT_LOOKUP, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,u32*), .u.lookup = t } /** A bitmap with named bits. **/ +/*** + * Basic configuration items + * ^^^^^^^^^^^^^^^^^^^^^^^^^ + * + * They describe basic data types used in the configuration. This should be enough for + * most real-life purposes. + * + * The parameters are as follows: + * + * * @n -- name of the item. + * * @p -- pointer to the variable where it shall be stored. + * * @c -- count. + **/ +#define CF_INT(n,p) CF_STATIC(n,p,INT,int,1) /** Single `int` value. **/ +#define CF_INT_ARY(n,p,c) CF_STATIC(n,p,INT,int,c) /** Static array of integers. **/ +#define CF_INT_DYN(n,p,c) CF_DYNAMIC(n,p,INT,int,c) /** Dynamic array of integers. **/ +#define CF_UINT(n,p) CF_STATIC(n,p,INT,uint,1) /** Single `uint` (`unsigned`) value. **/ +#define CF_UINT_ARY(n,p,c) CF_STATIC(n,p,INT,uint,c) /** Static array of unsigned integers. **/ +#define CF_UINT_DYN(n,p,c) CF_DYNAMIC(n,p,INT,uint,c) /** Dynamic array of unsigned integers. **/ +#define CF_U64(n,p) CF_STATIC(n,p,U64,u64,1) /** Single unsigned 64bit integer (`u64`). **/ +#define CF_U64_ARY(n,p,c) CF_STATIC(n,p,U64,u64,c) /** Static array of u64s. **/ +#define CF_U64_DYN(n,p,c) CF_DYNAMIC(n,p,U64,u64,c) /** Dynamic array of u64s. **/ +#define CF_DOUBLE(n,p) CF_STATIC(n,p,DOUBLE,double,1) /** Single instance of `double`. **/ +#define CF_DOUBLE_ARY(n,p,c) CF_STATIC(n,p,DOUBLE,double,c) /** Static array of doubles. **/ +#define CF_DOUBLE_DYN(n,p,c) CF_DYNAMIC(n,p,DOUBLE,double,c) /** Dynamic array of doubles. **/ +#define CF_IP(n,p) CF_STATIC(n,p,IP,u32,1) /** Single IPv4 address. **/ +#define CF_IP_ARY(n,p,c) CF_STATIC(n,p,IP,u32,c) /** Static array of IP addresses. **/. +#define CF_IP_DYN(n,p,c) CF_DYNAMIC(n,p,IP,u32,c) /** Dynamic array of IP addresses. **/ + +/* FIXME: Backwards compatibility only, should not be used at is will be removed soon. */ +#define CF_UNS CF_UINT +#define CF_UNS_ARY CF_UINT_ARY +#define CF_UNS_DYN CF_UINT_DYN + +/** + * A string. + * You provide a pointer to a `char *` variable and it will fill it with + * dynamically allocated string. For example: + * + * static char *string = "Default string"; + * + * static struct cf_section section = { + * CF_ITEMS { + * CF_STRING("string", &string), + * CF_END + * } + * }; + **/ +#define CF_STRING(n,p) CF_STATIC(n,p,STRING,char*,1) +#define CF_STRING_ARY(n,p,c) CF_STATIC(n,p,STRING,char*,c) /** Static array of strings. **/ +#define CF_STRING_DYN(n,p,c) CF_DYNAMIC(n,p,STRING,char*,c) /** Dynamic array of strings. **/ +/** + * One string out of a predefined set. + * You provide the set as an array of strings terminated by NULL (similar to @argv argument + * of main()) as the @t parameter. + * + * The configured variable (pointer to `int`) is set to index of the string. + * So, it works this way: + * + * static *strings[] = { "First", "Second", "Third", NULL }; + * + * static int variable; + * + * static struct cf_section section = { + * CF_ITEMS { + * CF_LOOKUP("choice", &variable, strings), + * CF_END + * } + * }; + * + * Now, if the configuration contains `choice "Second"`, `variable` will be set to 1. + **/ +#define CF_LOOKUP(n,p,t) { .cls = CC_STATIC, .type = CT_LOOKUP, .name = n, .number = 1, .ptr = CHECK_PTR_TYPE(p,int*), .u.lookup = t } +/** + * Static array of strings out of predefined set. + **/ +#define CF_LOOKUP_ARY(n,p,t,c) { .cls = CC_STATIC, .type = CT_LOOKUP, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,int*), .u.lookup = t } +/** + * Dynamic array of strings out of predefined set. + **/ +#define CF_LOOKUP_DYN(n,p,t,c) { .cls = CC_DYNAMIC, .type = CT_LOOKUP, .name = n, .number = c, .ptr = CHECK_PTR_TYPE(p,int**), .u.lookup = t } +/** + * A user-defined type. + * See <> section if you want to know more. + **/ +#define CF_USER(n,p,t) { .cls = CC_STATIC, .type = CT_USER, .name = n, .number = 1, .ptr = p, .u.utype = t } +/** + * Static array of user-defined types (all of the same type). + * See <> section. + **/ +#define CF_USER_ARY(n,p,t,c) { .cls = CC_STATIC, .type = CT_USER, .name = n, .number = c, .ptr = p, .u.utype = t } +/** + * Dynamic array of user-defined types. + * See <> section. + **/ +#define CF_USER_DYN(n,p,t,c) { .cls = CC_DYNAMIC, .type = CT_USER, .name = n, .number = c, .ptr = p, .u.utype = t } +/** + * An extended type. + * See <> if you want to know more. + **/ +#define CF_XTYPE(n,p,t) { .cls = CC_STATIC, .type = CT_XTYPE, .name = n, .number = 1, .ptr = p, .u.xtype = t } +/** + * Static array of extended types (all of the same type). + * See <>. + **/ +#define CF_XTYPE_ARY(n,p,t,c) { .cls = CC_STATIC, .type = CT_XTYPE, .name = n, .number = c, .ptr = p, .u.xtype = t } +/** + * Dynamic array of extended types. + * See <>. + **/ +#define CF_XTYPE_DYN(n,p,t,c) { .cls = CC_DYNAMIC, .type = CT_XTYPE, .name = n, .number = c, .ptr = p, .u.xtype = t } + +/** + * Any number of dynamic array elements + **/ +#define CF_ANY_NUM -0x7fffffff + +#define DARY_LEN(a) GARY_SIZE(a) /** Length of an dynamic array. An alias for `GARY_SIZE`. **/ + +/*** + * [[alloc]] + * Memory allocation + * ~~~~~~~~~~~~~~~~~ + * + * Each configuration context has one or more <>, where all + * data related to the configuration are stored. + * + * The following set of functions allocate from these pools. The allocated memory + * is valid as long as the current configuration (when the configuration file is + * reloaded or rolled back, or the context is deleted, it gets lost). + * + * Memory allocated from within custom parsers should be allocated from the pools. + * + * Please note that the pool is not guaranteed to exist before you call cf_load(), + * cf_set(), or cf_getopt() on the particular context. + ***/ +struct mempool *cf_get_pool(void); /** Return a pointer to the current configuration pool. **/ +void *cf_malloc(uint size); /** Returns @size bytes of memory allocated from the current configuration pool. **/ +void *cf_malloc_zero(uint size); /** Like @cf_malloc(), but zeroes the memory. **/ +char *cf_strdup(const char *s); /** Copy a string into @cf_malloc()ed memory. **/ +char *cf_printf(const char *fmt, ...) FORMAT_CHECK(printf,1,2); /** printf() into @cf_malloc()ed memory. **/ + +/*** + * [[journal]] + * Undo journal + * ~~~~~~~~~~~~ + * + * The configuration system uses a simple journaling mechanism, which makes + * it possible to undo changes to configuration. A typical example is loading + * of configuration by cf_load(): internally, it creates a transaction, applies + * all changes specified by the configuration and if one of them fails, the whole + * journal is replayed to restore the whole original state. Similarly, cf_reload() + * uses the journal to switch between configurations. + * + * In most cases, you need not care about the journal, except when you need + * to change some data from a <>, or if you want to call cf_modify_item() and then + * undo the changes. + ***/ +/** + * This function can be used to disable the whole journalling mechanism. + * It saves some memory, but it makes undoing of configuration changes impossible, + * which breaks for example cf_reload(). + **/ +void cf_set_journalling(int enable); +/** + * When a block of memory is about to be changed, put the old value + * into journal with this function. You need to call it from a <> + * if you change anything. It is used internally by low-level parsers. + * <> do not need to call it, it is called + * before them. + **/ +void cf_journal_block(void *ptr, uint len); +#define CF_JOURNAL_VAR(var) cf_journal_block(&(var), sizeof(var)) // Store a single value into the journal + +struct cf_journal_item; /** Opaque identifier of the journal state. **/ +/** + * Starts a new transaction. It returns the current state so you can + * get back to it. The @new_pool parameter tells if a new memory pool + * should be created and used from now. + **/ +struct cf_journal_item *cf_journal_new_transaction(uint new_pool); +/** + * Marks current state as a complete transaction. The @new_pool + * parameter tells if the transaction was created with new memory pool + * (the parameter must be the same as the one with + * @cf_journal_new_transaction() was called with). The @oldj parameter + * is the journal state returned from last + * @cf_journal_new_transaction() call. + **/ +void cf_journal_commit_transaction(uint new_pool, struct cf_journal_item *oldj); +/** + * Returns to an old journal state, reverting anything the current + * transaction did. The @new_pool parameter must be the same as the + * one you used when you created the transaction. The @oldj parameter + * is the journal state you got from @cf_journal_new_transaction() -- + * it is the state to return to. + **/ +void cf_journal_rollback_transaction(uint new_pool, struct cf_journal_item *oldj); + +/*** + * [[declare]] + * Section declaration + * ~~~~~~~~~~~~~~~~~~~ + **/ + +/** + * Plug another top-level section into the configuration system. + * @name is the name in the configuration file, + * @sec is pointer to the section description. + * If @allow_unknown is set to 0 and a variable not described in @sec + * is found in the configuration file, it produces an error. + * If you set it to 1, all such variables are ignored. + * + * Please note that a single section definition cannot be used in multiple + * configuration contexts simultaneously. + **/ +void cf_declare_section(const char *name, struct cf_section *sec, uint allow_unknown); +/** + * Like @cf_declare_section(), but instead of item pointers, the section + * contains offsets relative to @ptr. In other words, it does the same + * as `CF_SECTION`, but for top-level sections. + **/ +void cf_declare_rel_section(const char *name, struct cf_section *sec, void *ptr, uint allow_unknown); +/** + * If you have a section in a structure and you want to initialize it + * (eg. if you want a copy of default values outside the configuration), + * you can use this. It initializes it recursively. + * + * This is used mostly internally. You probably do not need it. + **/ +void cf_init_section(const char *name, struct cf_section *sec, void *ptr, uint do_bzero); + +/*** + * [[bparser]] + * Parsers for basic types + * ~~~~~~~~~~~~~~~~~~~~~~~ + * + * Each of them gets a string to parse and pointer to store the value. + * It returns either NULL or error message. + * + * The parsers support units. See <>. + ***/ +char *cf_parse_int(const char *str, int *ptr); /** Parser for integers. **/ +char *cf_parse_u64(const char *str, u64 *ptr); /** Parser for 64 unsigned integers. **/ +char *cf_parse_double(const char *str, double *ptr); /** Parser for doubles. **/ +char *cf_parse_ip(const char *p, u32 *varp); /** Parser for IP addresses. **/ + +/*** + * [[conf_direct]] + * Direct access + * ~~~~~~~~~~~~~ + * + * Direct access to configuration items. + * You probably should not need this, but in your do, you have to handle + * <> yourself. + ***/ + +/** + * List of operations used on items. + * This macro is used to generate internal source code, + * but you may be interested in the list of operations it creates. + * + * Each operation corresponds to the same-named operation + * described in <>. + **/ +#define CF_OPERATIONS T(CLOSE) T(SET) T(CLEAR) T(ALL) \ + T(APPEND) T(PREPEND) T(REMOVE) T(EDIT) T(AFTER) T(BEFORE) T(COPY) T(RESET) + /* Closing brace finishes previous block. + * Basic attributes (static, dynamic, parsed) can be used with SET. + * Dynamic arrays can be used with SET, APPEND, PREPEND. + * Sections can be used with SET. + * Lists can be used with everything. */ +#define T(x) OP_##x, +enum cf_operation { CF_OPERATIONS }; /** Allowed operations on items. See <> for list (they have an `OP_` prefix -- it means you use `OP_SET` instead of just `SET`). **/ +#undef T + +/** + * Searches for a configuration item called @name. + * If it is found, it is copied into @item and NULL is returned. + * Otherwise, an error is returned and @item is zeroed. + **/ +char *cf_find_item(const char *name, struct cf_item *item); +/** + * Performs a single operation on a given item. + **/ +char *cf_modify_item(struct cf_item *item, enum cf_operation op, int number, char **pars); + +/*** + * [[conf_dump]] + * Debug dumping + * ~~~~~~~~~~~~~ + ***/ + +struct fastbuf; +/** + * Write the current state of all configuration items into @fb. + **/ +void cf_dump_sections(struct fastbuf *fb); + +#endif diff --git a/libucw/ucw/config.h b/libucw/ucw/config.h new file mode 100644 index 0000000..a4768b7 --- /dev/null +++ b/libucw/ucw/config.h @@ -0,0 +1,51 @@ +/* + * UCW Library -- Configuration-Dependent Definitions + * + * (c) 1997--2012 Martin Mares + * (c) 2006 Robert Spalek + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_CONFIG_H +#define _UCW_CONFIG_H + +/* Configuration switches */ + +#include "autoconf.h" + +/* Tell libc we're going to use all extensions available */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +/* Types (based on standard C99 integers) */ + +#include +#include + +typedef uint8_t byte; /** Exactly 8 bits, unsigned **/ +typedef uint8_t u8; /** Exactly 8 bits, unsigned **/ +typedef int8_t s8; /** Exactly 8 bits, signed **/ +typedef uint16_t u16; /** Exactly 16 bits, unsigned **/ +typedef int16_t s16; /** Exactly 16 bits, signed **/ +typedef uint32_t u32; /** Exactly 32 bits, unsigned **/ +typedef int32_t s32; /** Exactly 32 bits, signed **/ +typedef uint64_t u64; /** Exactly 64 bits, unsigned **/ +typedef int64_t s64; /** Exactly 64 bits, signed **/ + +typedef unsigned int uint; /** A better pronounceable alias for `unsigned int` **/ +typedef s64 timestamp_t; /** Milliseconds since an unknown epoch **/ + +// FIXME: This should be removed soon +typedef uint uns; /** Backwards compatible alias for `uint' ***/ + +#ifdef CONFIG_UCW_LARGE_FILES +typedef s64 ucw_off_t; /** File position (either 32- or 64-bit, depending on `CONFIG_UCW_LARGE_FILES`). **/ +#else +typedef s32 ucw_off_t; +#endif + +#endif diff --git a/libucw/ucw/crc-tables.c b/libucw/ucw/crc-tables.c new file mode 100644 index 0000000..bb05d32 --- /dev/null +++ b/libucw/ucw/crc-tables.c @@ -0,0 +1,467 @@ +/* + * CRC32 (Castagnoli 1993) -- Tables + * + * Based on Michael E. Kounavis and Frank L. Berry: A Systematic Approach + * to Building High Performance Software-based CRC Generators + * (Proceedings of the 10th IEEE Symposium on Computers and Communications 2005) + * + * Includes code from http://sourceforge.net/projects/slicing-by-8/, + * which carried the following copyright notice: + * + * Copyright (c) 2004-2006 Intel Corporation - All Rights Reserved + * + * This software program is licensed subject to the BSD License, + * available at http://www.opensource.org/licenses/bsd-license.html + * + * Adapted for LibUCW by Martin Mares in 2012. + */ + +#include +#include + +/* + * The following CRC lookup table was generated automagically + * using the following model parameters: + * + * Generator Polynomial = ................. 0x1EDC6F41 + * Generator Polynomial Length = .......... 32 bits + * Reflected Bits = ....................... TRUE + * Table Generation Offset = .............. 32 bits + * Number of Slices = ..................... 8 slices + * Slice Lengths = ........................ 8 8 8 8 8 8 8 8 + * Directory Name = ....................... .\ + * File Name = ............................ 8x256_tables.c + */ + +const u32 crc_tableil8_o32[256] = +{ + 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB, + 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, + 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384, + 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B, + 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, + 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA, + 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A, + 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595, + 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957, + 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198, + 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38, + 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7, + 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789, + 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46, + 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6, + 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829, + 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93, + 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C, + 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC, + 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, + 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D, + 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982, + 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, + 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED, + 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F, + 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0, + 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540, + 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F, + 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1, + 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E, + 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E, + 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351 +}; + +/* + * end of the CRC lookup table crc_tableil8_o32 + */ + + + +/* + * The following CRC lookup table was generated automagically + * using the following model parameters: + * + * Generator Polynomial = ................. 0x1EDC6F41 + * Generator Polynomial Length = .......... 32 bits + * Reflected Bits = ....................... TRUE + * Table Generation Offset = .............. 32 bits + * Number of Slices = ..................... 8 slices + * Slice Lengths = ........................ 8 8 8 8 8 8 8 8 + * Directory Name = ....................... .\ + * File Name = ............................ 8x256_tables.c + */ + +const u32 crc_tableil8_o40[256] = +{ + 0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945, + 0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, 0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD, + 0x3FC5F181, 0x2C6769F6, 0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4, + 0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, 0xEC5B53E5, 0xFFF9CB92, 0xCB1E630B, 0xD8BCFB7C, + 0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B, 0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47, + 0xE29F20BA, 0xF13DB8CD, 0xC5DA1054, 0xD6788823, 0xAC154166, 0xBFB7D911, 0x8B507188, 0x98F2E9FF, + 0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, 0x0EC4735F, 0x1D66EB28, 0x298143B1, 0x3A23DBC6, + 0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2, 0x93D0B0E7, 0x80722890, 0xB4958009, 0xA737187E, + 0xFF17C604, 0xECB55E73, 0xD852F6EA, 0xCBF06E9D, 0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41, + 0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, 0x2C896460, 0x3F2BFC17, 0x0BCC548E, 0x186ECCF9, + 0xC0D23785, 0xD370AFF2, 0xE797076B, 0xF4359F1C, 0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0, + 0x5DC6F43D, 0x4E646C4A, 0x7A83C4D3, 0x69215CA4, 0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78, + 0x809C2506, 0x933EBD71, 0xA7D915E8, 0xB47B8D9F, 0xCE1644DA, 0xDDB4DCAD, 0xE9537434, 0xFAF1EC43, + 0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27, 0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB, + 0xBF59D487, 0xACFB4CF0, 0x981CE469, 0x8BBE7C1E, 0xF1D3B55B, 0xE2712D2C, 0xD69685B5, 0xC5341DC2, + 0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, 0x6CC776E3, 0x7F65EE94, 0x4B82460D, 0x5820DE7A, + 0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260, 0xB5499B25, 0xA6EB0352, 0x920CABCB, 0x81AE33BC, + 0x66D73941, 0x7575A136, 0x419209AF, 0x523091D8, 0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004, + 0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, 0x8A8C6AA4, 0x992EF2D3, 0xADC95A4A, 0xBE6BC23D, + 0x5912C8C0, 0x4AB050B7, 0x7E57F82E, 0x6DF56059, 0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185, + 0x844819FB, 0x97EA818C, 0xA30D2915, 0xB0AFB162, 0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE, + 0x195CDA43, 0x0AFE4234, 0x3E19EAAD, 0x2DBB72DA, 0x57D6BB9F, 0x447423E8, 0x70938B71, 0x63311306, + 0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3, 0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F, + 0x26992BC2, 0x353BB3B5, 0x01DC1B2C, 0x127E835B, 0x68134A1E, 0x7BB1D269, 0x4F567AF0, 0x5CF4E287, + 0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, 0x4A5E5D21, 0x59FCC556, 0x6D1B6DCF, 0x7EB9F5B8, + 0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC, 0xD74A9E99, 0xC4E806EE, 0xF00FAE77, 0xE3AD3600, + 0x3B11CD7C, 0x28B3550B, 0x1C54FD92, 0x0FF665E5, 0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439, + 0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, 0xE88F6F18, 0xFB2DF76F, 0xCFCA5FF6, 0xDC68C781, + 0x7B5FDFFF, 0x68FD4788, 0x5C1AEF11, 0x4FB87766, 0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA, + 0xE64B1C47, 0xF5E98430, 0xC10E2CA9, 0xD2ACB4DE, 0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502, + 0x449A2E7E, 0x5738B609, 0x63DF1E90, 0x707D86E7, 0x0A104FA2, 0x19B2D7D5, 0x2D557F4C, 0x3EF7E73B, + 0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F, 0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483 +}; + +/* + * end of the CRC lookup table crc_tableil8_o40 + */ + + + +/* + * The following CRC lookup table was generated automagically + * using the following model parameters: + * + * Generator Polynomial = ................. 0x1EDC6F41 + * Generator Polynomial Length = .......... 32 bits + * Reflected Bits = ....................... TRUE + * Table Generation Offset = .............. 32 bits + * Number of Slices = ..................... 8 slices + * Slice Lengths = ........................ 8 8 8 8 8 8 8 8 + * Directory Name = ....................... .\ + * File Name = ............................ 8x256_tables.c + */ + +const u32 crc_tableil8_o48[256] = +{ + 0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, 0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469, + 0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, 0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC, + 0x70A27D8A, 0xD5E3EFF4, 0x3FCD2F87, 0x9A8CBDF9, 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3, + 0x48F3434F, 0xEDB2D131, 0x079C1142, 0xA2DD833C, 0xD62DE755, 0x736C752B, 0x9942B558, 0x3C032726, + 0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67, 0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D, + 0xD915C5D1, 0x7C5457AF, 0x967A97DC, 0x333B05A2, 0x47CB61CB, 0xE28AF3B5, 0x08A433C6, 0xADE5A1B8, + 0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, 0x0F382284, 0xAA79B0FA, 0x40577089, 0xE516E2F7, + 0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828, 0x37691C41, 0x92288E3F, 0x78064E4C, 0xDD47DC32, + 0xC76580D9, 0x622412A7, 0x880AD2D4, 0x2D4B40AA, 0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0, + 0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, 0x61EA1A06, 0xC4AB8878, 0x2E85480B, 0x8BC4DA75, + 0xB7C7FD53, 0x12866F2D, 0xF8A8AF5E, 0x5DE93D20, 0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A, + 0x8F96C396, 0x2AD751E8, 0xC0F9919B, 0x65B803E5, 0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF, + 0x26217BCD, 0x8360E9B3, 0x694E29C0, 0xCC0FBBBE, 0xB8FFDFD7, 0x1DBE4DA9, 0xF7908DDA, 0x52D11FA4, + 0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B, 0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161, + 0x56830647, 0xF3C29439, 0x19EC544A, 0xBCADC634, 0xC85DA25D, 0x6D1C3023, 0x8732F050, 0x2273622E, + 0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, 0xF00C9C98, 0x554D0EE6, 0xBF63CE95, 0x1A225CEB, + 0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730, 0x15F9D359, 0xB0B84127, 0x5A968154, 0xFFD7132A, + 0xB3764986, 0x1637DBF8, 0xFC191B8B, 0x595889F5, 0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF, + 0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, 0x655BAED3, 0xC01A3CAD, 0x2A34FCDE, 0x8F756EA0, + 0xC3D4340C, 0x6695A672, 0x8CBB6601, 0x29FAF47F, 0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065, + 0x6A638C57, 0xCF221E29, 0x250CDE5A, 0x804D4C24, 0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E, + 0x5232B292, 0xF77320EC, 0x1D5DE09F, 0xB81C72E1, 0xCCEC1688, 0x69AD84F6, 0x83834485, 0x26C2D6FB, + 0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE, 0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4, + 0x2290CF18, 0x87D15D66, 0x6DFF9D15, 0xC8BE0F6B, 0xBC4E6B02, 0x190FF97C, 0xF321390F, 0x5660AB71, + 0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, 0xD29C5380, 0x77DDC1FE, 0x9DF3018D, 0x38B293F3, + 0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C, 0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48, 0x00E3AD36, + 0x3CE08A10, 0x99A1186E, 0x738FD81D, 0xD6CE4A63, 0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79, + 0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, 0x9A6F10CF, 0x3F2E82B1, 0xD50042C2, 0x7041D0BC, + 0xAD060C8E, 0x08479EF0, 0xE2695E83, 0x4728CCFD, 0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7, + 0x9557324B, 0x3016A035, 0xDA386046, 0x7F79F238, 0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622, + 0xDDA47104, 0x78E5E37A, 0x92CB2309, 0x378AB177, 0x437AD51E, 0xE63B4760, 0x0C158713, 0xA954156D, + 0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2, 0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8 +}; + +/* + * end of the CRC lookup table crc_tableil8_o48 + */ + + + +/* + * The following CRC lookup table was generated automagically + * using the following model parameters: + * + * Generator Polynomial = ................. 0x1EDC6F41 + * Generator Polynomial Length = .......... 32 bits + * Reflected Bits = ....................... TRUE + * Table Generation Offset = .............. 32 bits + * Number of Slices = ..................... 8 slices + * Slice Lengths = ........................ 8 8 8 8 8 8 8 8 + * Directory Name = ....................... .\ + * File Name = ............................ 8x256_tables.c + */ + +const u32 crc_tableil8_o56[256] = +{ + 0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, 0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA, + 0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF, 0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C, + 0xE964B13D, 0x34211B85, 0x560392BC, 0x8B463804, 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7, + 0x1F20D2DB, 0xC2657863, 0xA047F15A, 0x7D025BE2, 0x6402E328, 0xB9474990, 0xDB65C0A9, 0x06206A11, + 0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2, 0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41, + 0x2161776D, 0xFC24DDD5, 0x9E0654EC, 0x4343FE54, 0x5A43469E, 0x8706EC26, 0xE524651F, 0x3861CFA7, + 0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, 0x45639445, 0x98263EFD, 0xFA04B7C4, 0x27411D7C, + 0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69, 0xB327F7A3, 0x6E625D1B, 0x0C40D422, 0xD1057E9A, + 0xABA65FE7, 0x76E3F55F, 0x14C17C66, 0xC984D6DE, 0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D, + 0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, 0x26C00DF2, 0xFB85A74A, 0x99A72E73, 0x44E284CB, + 0x42C2EEDA, 0x9F874462, 0xFDA5CD5B, 0x20E067E3, 0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610, + 0xB4868D3C, 0x69C32784, 0x0BE1AEBD, 0xD6A40405, 0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6, + 0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED, 0x1EA1C255, 0x07A17A9F, 0xDAE4D027, 0xB8C6591E, 0x6583F3A6, + 0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3, 0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040, + 0x95E7FA51, 0x48A250E9, 0x2A80D9D0, 0xF7C57368, 0xEEC5CBA2, 0x3380611A, 0x51A2E823, 0x8CE7429B, + 0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, 0x1881A844, 0xC5C402FC, 0xA7E68BC5, 0x7AA3217D, + 0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006, 0x2982F8CC, 0xF4C75274, 0x96E5DB4D, 0x4BA071F5, + 0xA4E4AAD9, 0x79A10061, 0x1B838958, 0xC6C623E0, 0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213, + 0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, 0xC0E649F1, 0x1DA3E349, 0x7F816A70, 0xA2C4C0C8, + 0x4D801BE4, 0x90C5B15C, 0xF2E73865, 0x2FA292DD, 0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E, + 0x8585DDB4, 0x58C0770C, 0x3AE2FE35, 0xE7A7548D, 0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E, + 0x73C1BE52, 0xAE8414EA, 0xCCA69DD3, 0x11E3376B, 0x08E38FA1, 0xD5A62519, 0xB784AC20, 0x6AC10698, + 0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0, 0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443, + 0x9AA50F6F, 0x47E0A5D7, 0x25C22CEE, 0xF8878656, 0xE1873E9C, 0x3CC29424, 0x5EE01D1D, 0x83A5B7A5, + 0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, 0x8224A72B, 0x5F610D93, 0x3D4384AA, 0xE0062E12, + 0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07, 0x7460C4CD, 0xA9256E75, 0xCB07E74C, 0x16424DF4, + 0x106227E5, 0xCD278D5D, 0xAF050464, 0x7240AEDC, 0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F, + 0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, 0x9D0475F0, 0x4041DF48, 0x22635671, 0xFF26FCC9, + 0x2E238253, 0xF36628EB, 0x9144A1D2, 0x4C010B6A, 0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99, + 0xD867E1B5, 0x05224B0D, 0x6700C234, 0xBA45688C, 0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F, + 0xC747336E, 0x1A0299D6, 0x782010EF, 0xA565BA57, 0xBC65029D, 0x6120A825, 0x0302211C, 0xDE478BA4, + 0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1, 0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842 +}; + +/* + * end of the CRC lookup table crc_tableil8_o56 + */ + + + +/* + * The following CRC lookup table was generated automagically + * using the following model parameters: + * + * Generator Polynomial = ................. 0x1EDC6F41 + * Generator Polynomial Length = .......... 32 bits + * Reflected Bits = ....................... TRUE + * Table Generation Offset = .............. 32 bits + * Number of Slices = ..................... 8 slices + * Slice Lengths = ........................ 8 8 8 8 8 8 8 8 + * Directory Name = ....................... .\ + * File Name = ............................ 8x256_tables.c + */ + +const u32 crc_tableil8_o64[256] = +{ + 0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, 0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44, + 0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65, 0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5, + 0x8F2261D3, 0xB7330E7F, 0xFF00BE8B, 0xC711D127, 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97, + 0x4A456A42, 0x725405EE, 0x3A67B51A, 0x0276DAB6, 0xAA00D4F2, 0x9211BB5E, 0xDA220BAA, 0xE2336406, + 0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3, 0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13, + 0xDECFBEC6, 0xE6DED16A, 0xAEED619E, 0x96FC0E32, 0x3E8A0076, 0x069B6FDA, 0x4EA8DF2E, 0x76B9B082, + 0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, 0x74CF6A34, 0x4CDE0598, 0x04EDB56C, 0x3CFCDAC0, + 0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1, 0xB1A861A5, 0x89B90E09, 0xC18ABEFD, 0xF99BD151, + 0x37516AAE, 0x0F400502, 0x4773B5F6, 0x7F62DA5A, 0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA, + 0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, 0x1273DF8F, 0x2A62B023, 0x625100D7, 0x5A406F7B, + 0xB8730B7D, 0x806264D1, 0xC851D425, 0xF040BB89, 0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539, + 0x7D1400EC, 0x45056F40, 0x0D36DFB4, 0x3527B018, 0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8, + 0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1, 0x64CA6F0D, 0xCCBC6149, 0xF4AD0EE5, 0xBC9EBE11, 0x848FD1BD, + 0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C, 0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C, + 0xA3DBBE2A, 0x9BCAD186, 0xD3F96172, 0xEBE80EDE, 0x439E009A, 0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E, + 0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, 0x86F90B0B, 0xBEE864A7, 0xF6DBD453, 0xCECABBFF, + 0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8, 0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4, 0xC6D4DB18, + 0xABC5DECD, 0x93D4B161, 0xDBE70195, 0xE3F66E39, 0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089, + 0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, 0x01C50A3F, 0x39D46593, 0x71E7D567, 0x49F6BACB, + 0x24E7BF1E, 0x1CF6D0B2, 0x54C56046, 0x6CD40FEA, 0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A, + 0x750A600B, 0x4D1B0FA7, 0x0528BF53, 0x3D39D0FF, 0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F, + 0xB06D6B9A, 0x887C0436, 0xC04FB4C2, 0xF85EDB6E, 0x5028D52A, 0x6839BA86, 0x200A0A72, 0x181B65DE, + 0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C, 0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C, + 0x3F4F0A49, 0x075E65E5, 0x4F6DD511, 0x777CBABD, 0xDF0AB4F9, 0xE71BDB55, 0xAF286BA1, 0x9739040D, + 0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, 0xB9B60142, 0x81A76EEE, 0xC994DE1A, 0xF185B1B6, + 0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497, 0x7CD10AD3, 0x44C0657F, 0x0CF3D58B, 0x34E2BA27, + 0xD6D1DE21, 0xEEC0B18D, 0xA6F30179, 0x9EE26ED5, 0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065, + 0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, 0xF3F36B00, 0xCBE204AC, 0x83D1B458, 0xBBC0DBF4, + 0x425B0AA5, 0x7A4A6509, 0x3279D5FD, 0x0A68BA51, 0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1, + 0x873C0134, 0xBF2D6E98, 0xF71EDE6C, 0xCF0FB1C0, 0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70, + 0xCD796B76, 0xF56804DA, 0xBD5BB42E, 0x854ADB82, 0x2D3CD5C6, 0x152DBA6A, 0x5D1E0A9E, 0x650F6532, + 0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013, 0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3 +}; + +/* + * end of the CRC lookup table crc_tableil8_o64 + */ + + + +/* + * The following CRC lookup table was generated automagically + * using the following model parameters: + * + * Generator Polynomial = ................. 0x1EDC6F41 + * Generator Polynomial Length = .......... 32 bits + * Reflected Bits = ....................... TRUE + * Table Generation Offset = .............. 32 bits + * Number of Slices = ..................... 8 slices + * Slice Lengths = ........................ 8 8 8 8 8 8 8 8 + * Directory Name = ....................... .\ + * File Name = ............................ 8x256_tables.c + */ + +const u32 crc_tableil8_o72[256] = +{ + 0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, 0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD, + 0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, 0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2, + 0xC00C303E, 0x2F3C5B27, 0x1B8090FD, 0xF4B0FBE4, 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93, + 0xA00A2821, 0x4F3A4338, 0x7B8688E2, 0x94B6E3FB, 0x12FF1F56, 0xFDCF744F, 0xC973BF95, 0x2643D48C, + 0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57, 0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20, + 0xE5F20E92, 0x0AC2658B, 0x3E7EAE51, 0xD14EC548, 0x570739E5, 0xB83752FC, 0x8C8B9926, 0x63BBF23F, + 0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, 0xF70D11C4, 0x183D7ADD, 0x2C81B107, 0xC3B1DA1E, + 0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576, 0x970B09DB, 0x783B62C2, 0x4C87A918, 0xA3B7C201, + 0x0E045BEB, 0xE13430F2, 0xD588FB28, 0x3AB89031, 0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746, + 0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, 0xDCF77483, 0x33C71F9A, 0x077BD440, 0xE84BBF59, + 0xCE086BD5, 0x213800CC, 0x1584CB16, 0xFAB4A00F, 0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778, + 0xAE0E73CA, 0x413E18D3, 0x7582D309, 0x9AB2B810, 0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67, + 0x8BF04D66, 0x64C0267F, 0x507CEDA5, 0xBF4C86BC, 0x39057A11, 0xD6351108, 0xE289DAD2, 0x0DB9B1CB, + 0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3, 0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4, + 0x4BFC7D58, 0xA4CC1641, 0x9070DD9B, 0x7F40B682, 0xF9094A2F, 0x16392136, 0x2285EAEC, 0xCDB581F5, + 0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, 0x990F5230, 0x763F3929, 0x4283F2F3, 0xADB399EA, + 0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C, 0xAEFD80A1, 0x41CDEBB8, 0x75712062, 0x9A414B7B, + 0x7C0EAFC9, 0x933EC4D0, 0xA7820F0A, 0x48B26413, 0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364, + 0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, 0x6EF1B09F, 0x81C1DB86, 0xB57D105C, 0x5A4D7B45, + 0xBC029FF7, 0x5332F4EE, 0x678E3F34, 0x88BE542D, 0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A, + 0x99FCA15B, 0x76CCCA42, 0x42700198, 0xAD406A81, 0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6, + 0xF9FAB944, 0x16CAD25D, 0x22761987, 0xCD46729E, 0x4B0F8E33, 0xA43FE52A, 0x90832EF0, 0x7FB345E9, + 0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF, 0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8, + 0x39F6897A, 0xD6C6E263, 0xE27A29B9, 0x0D4A42A0, 0x8B03BE0D, 0x6433D514, 0x508F1ECE, 0xBFBF75D7, + 0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, 0xA0F9DB4A, 0x4FC9B053, 0x7B757B89, 0x94451090, + 0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8, 0xC0FFC355, 0x2FCFA84C, 0x1B736396, 0xF443088F, + 0xD200DC03, 0x3D30B71A, 0x098C7CC0, 0xE6BC17D9, 0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE, + 0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, 0x00F3F36B, 0xEFC39872, 0xDB7F53A8, 0x344F38B1, + 0x97F8FAB0, 0x78C891A9, 0x4C745A73, 0xA344316A, 0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D, + 0xF7FEE2AF, 0x18CE89B6, 0x2C72426C, 0xC3422975, 0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02, + 0x57F4CA8E, 0xB8C4A197, 0x8C786A4D, 0x63480154, 0xE501FDF9, 0x0A3196E0, 0x3E8D5D3A, 0xD1BD3623, + 0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B, 0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C +}; + +/* + * end of the CRC lookup table crc_tableil8_o72 + */ + + + +/* + * The following CRC lookup table was generated automagically + * using the following model parameters: + * + * Generator Polynomial = ................. 0x1EDC6F41 + * Generator Polynomial Length = .......... 32 bits + * Reflected Bits = ....................... TRUE + * Table Generation Offset = .............. 32 bits + * Number of Slices = ..................... 8 slices + * Slice Lengths = ........................ 8 8 8 8 8 8 8 8 + * Directory Name = ....................... .\ + * File Name = ............................ 8x256_tables.c + */ + +const u32 crc_tableil8_o80[256] = +{ + 0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, 0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089, + 0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B, 0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA, + 0x9C5BFAA6, 0xF458D66E, 0x4C5DA336, 0x245E8FFE, 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F, + 0xD27607F5, 0xBA752B3D, 0x02705E65, 0x6A7372AD, 0x7796C224, 0x1F95EEEC, 0xA7909BB4, 0xCF93B77C, + 0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5, 0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334, + 0x73767EEE, 0x1B755226, 0xA370277E, 0xCB730BB6, 0xD696BB3F, 0xBE9597F7, 0x0690E2AF, 0x6E93CE67, + 0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, 0x04E0BCCA, 0x6CE39002, 0xD4E6E55A, 0xBCE5C992, + 0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110, 0x4ACD4199, 0x22CE6D51, 0x9ACB1809, 0xF2C834C1, + 0x7AB7077A, 0x12B42BB2, 0xAAB15EEA, 0xC2B27222, 0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3, + 0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, 0x917A3FF8, 0xF9791330, 0x417C6668, 0x297F4AA0, + 0xE6ECFDDC, 0x8EEFD114, 0x36EAA44C, 0x5EE98884, 0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55, + 0xA8C1008F, 0xC0C22C47, 0x78C7591F, 0x10C475D7, 0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006, + 0x47EC84C7, 0x2FEFA80F, 0x97EADD57, 0xFFE9F19F, 0xE20C4116, 0x8A0F6DDE, 0x320A1886, 0x5A09344E, + 0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC, 0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D, + 0xDBB77E61, 0xB3B452A9, 0x0BB127F1, 0x63B20B39, 0x7E57BBB0, 0x16549778, 0xAE51E220, 0xC652CEE8, + 0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, 0x307A46E3, 0x58796A2B, 0xE07C1F73, 0x887F33BB, + 0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC, 0x508ECB25, 0x388DE7ED, 0x808892B5, 0xE88BBE7D, + 0xBB43F3A7, 0xD340DF6F, 0x6B45AA37, 0x034686FF, 0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E, + 0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, 0xCCD53183, 0xA4D61D4B, 0x1CD36813, 0x74D044DB, + 0x27180901, 0x4F1B25C9, 0xF71E5091, 0x9F1D7C59, 0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988, + 0xC8358D49, 0xA036A181, 0x1833D4D9, 0x7030F811, 0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0, + 0x8618701A, 0xEE1B5CD2, 0x561E298A, 0x3E1D0542, 0x23F8B5CB, 0x4BFB9903, 0xF3FEEC5B, 0x9BFDC093, + 0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7, 0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766, + 0x1A438ABC, 0x7240A674, 0xCA45D32C, 0xA246FFE4, 0xBFA34F6D, 0xD7A063A5, 0x6FA516FD, 0x07A63A35, + 0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, 0x2A39CC5F, 0x423AE097, 0xFA3F95CF, 0x923CB907, + 0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185, 0x6414310C, 0x0C171DC4, 0xB412689C, 0xDC114454, + 0x1382F328, 0x7B81DFE0, 0xC384AAB8, 0xAB878670, 0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1, + 0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, 0xF84FCBAA, 0x904CE762, 0x2849923A, 0x404ABEF2, + 0xB2828A33, 0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B, 0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA, + 0xFCAF7760, 0x94AC5BA8, 0x2CA92EF0, 0x44AA0238, 0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9, + 0x2ED97095, 0x46DA5C5D, 0xFEDF2905, 0x96DC05CD, 0x8B39B544, 0xE33A998C, 0x5B3FECD4, 0x333CC01C, + 0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E, 0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F +}; + +/* + * end of the CRC lookup table crc_tableil8_o80 + */ + + + +/* + * The following CRC lookup table was generated automagically + * using the following model parameters: + * + * Generator Polynomial = ................. 0x1EDC6F41 + * Generator Polynomial Length = .......... 32 bits + * Reflected Bits = ....................... TRUE + * Table Generation Offset = .............. 32 bits + * Number of Slices = ..................... 8 slices + * Slice Lengths = ........................ 8 8 8 8 8 8 8 8 + * Directory Name = ....................... .\ + * File Name = ............................ 8x256_tables.c + */ + +const u32 crc_tableil8_o88[256] = +{ + 0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, 0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504, + 0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3, 0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE, + 0x847609B4, 0xCD4A7493, 0x160EF3FA, 0x5F328EDD, 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0, + 0xC64D0D6E, 0x8F717049, 0x5435F720, 0x1D098A07, 0xE7508F03, 0xAE6CF224, 0x7528754D, 0x3C14086A, + 0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0, 0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D, + 0x4F3B6143, 0x06071C64, 0xDD439B0D, 0x947FE62A, 0x6E26E32E, 0x271A9E09, 0xFC5E1960, 0xB5626447, + 0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, 0xA86BEE40, 0xE1579367, 0x3A13140E, 0x732F6929, + 0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E, 0xEA50EA9A, 0xA36C97BD, 0x782810D4, 0x31146DF3, + 0x1A00CB32, 0x533CB615, 0x8878317C, 0xC1444C5B, 0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36, + 0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, 0x79264D85, 0x301A30A2, 0xEB5EB7CB, 0xA262CAEC, + 0x9E76C286, 0xD74ABFA1, 0x0C0E38C8, 0x453245EF, 0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782, + 0xDC4DC65C, 0x9571BB7B, 0x4E353C12, 0x07094135, 0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358, + 0x1700AEAB, 0x5E3CD38C, 0x857854E5, 0xCC4429C2, 0x361D2CC6, 0x7F2151E1, 0xA465D688, 0xED59ABAF, + 0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18, 0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75, + 0x9376A71F, 0xDA4ADA38, 0x010E5D51, 0x48322076, 0xB26B2572, 0xFB575855, 0x2013DF3C, 0x692FA21B, + 0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, 0xF05021A8, 0xB96C5C8F, 0x6228DBE6, 0x2B14A6C1, + 0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D, 0x151C1409, 0x5C20692E, 0x8764EE47, 0xCE589360, + 0x763A92BE, 0x3F06EF99, 0xE44268F0, 0xAD7E15D7, 0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA, + 0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, 0x916A1DBD, 0xD856609A, 0x0312E7F3, 0x4A2E9AD4, + 0xF24C9B0A, 0xBB70E62D, 0x60346144, 0x29081C63, 0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E, + 0x3901F3FD, 0x703D8EDA, 0xAB7909B3, 0xE2457494, 0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9, + 0x7B3AF727, 0x32068A00, 0xE9420D69, 0xA07E704E, 0x5A27754A, 0x131B086D, 0xC85F8F04, 0x8163F223, + 0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20, 0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D, + 0xFF4CFE93, 0xB67083B4, 0x6D3404DD, 0x240879FA, 0xDE517CFE, 0x976D01D9, 0x4C2986B0, 0x0515FB97, + 0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, 0x0F1CDF3B, 0x4620A21C, 0x9D642575, 0xD4585852, + 0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5, 0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF, 0x96635C88, + 0xAA7754E2, 0xE34B29C5, 0x380FAEAC, 0x7133D38B, 0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6, + 0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, 0xC951D255, 0x806DAF72, 0x5B29281B, 0x1215553C, + 0x230138CF, 0x6A3D45E8, 0xB179C281, 0xF845BFA6, 0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB, + 0x613A3C15, 0x28064132, 0xF342C65B, 0xBA7EBB7C, 0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911, + 0xA777317B, 0xEE4B4C5C, 0x350FCB35, 0x7C33B612, 0x866AB316, 0xCF56CE31, 0x14124958, 0x5D2E347F, + 0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8, 0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5 +}; + +/* + * end of the CRC lookup table crc_tableil8_o88 + */ + diff --git a/libucw/ucw/crc-tables.h b/libucw/ucw/crc-tables.h new file mode 100644 index 0000000..d93d337 --- /dev/null +++ b/libucw/ucw/crc-tables.h @@ -0,0 +1,37 @@ +/* + * CRC32 (Castagnoli 1993) -- Tables + * + * Based on Michael E. Kounavis and Frank L. Berry: A Systematic Approach + * to Building High Performance Software-based CRC Generators + * (Proceedings of the 10th IEEE Symposium on Computers and Communications 2005) + * + * Includes code from http://sourceforge.net/projects/slicing-by-8/, + * which carried the following copyright notice: + * + * Copyright (c) 2004-2006 Intel Corporation - All Rights Reserved + * + * This software program is licensed subject to the BSD License, + * available at http://www.opensource.org/licenses/bsd-license.html + * + * Adapted for LibUCW by Martin Mares in 2012. + */ + +#ifdef CONFIG_UCW_CLEAN_ABI +#define crc_tableil8_o32 ucw_crc_tableil8_o32 +#define crc_tableil8_o40 ucw_crc_tableil8_o40 +#define crc_tableil8_o48 ucw_crc_tableil8_o48 +#define crc_tableil8_o56 ucw_crc_tableil8_o56 +#define crc_tableil8_o64 ucw_crc_tableil8_o64 +#define crc_tableil8_o72 ucw_crc_tableil8_o72 +#define crc_tableil8_o80 ucw_crc_tableil8_o80 +#define crc_tableil8_o88 ucw_crc_tableil8_o88 +#endif + +extern const u32 crc_tableil8_o32[256]; +extern const u32 crc_tableil8_o40[256]; +extern const u32 crc_tableil8_o48[256]; +extern const u32 crc_tableil8_o56[256]; +extern const u32 crc_tableil8_o64[256]; +extern const u32 crc_tableil8_o72[256]; +extern const u32 crc_tableil8_o80[256]; +extern const u32 crc_tableil8_o88[256]; diff --git a/libucw/ucw/crc.c b/libucw/ucw/crc.c new file mode 100644 index 0000000..070a261 --- /dev/null +++ b/libucw/ucw/crc.c @@ -0,0 +1,196 @@ +/* + * CRC32 (Castagnoli 1993) + * + * Based on Michael E. Kounavis and Frank L. Berry: A Systematic Approach + * to Building High Performance Software-based CRC Generators + * (Proceedings of the 10th IEEE Symposium on Computers and Communications 2005) + * + * Includes code from http://sourceforge.net/projects/slicing-by-8/, + * which carried the following copyright notice: + * + * Copyright (c) 2004-2006 Intel Corporation - All Rights Reserved + * + * This software program is licensed subject to the BSD License, + * available at http://www.opensource.org/licenses/bsd-license.html + * + * Adapted for LibUCW by Martin Mares in 2012. + */ + +#include +#include +#include + +static void crc32_update_by1(crc32_context *ctx, const byte *buf, uint len) +{ + u32 crc = ctx->state; + while (len--) + crc = crc_tableil8_o32[(crc ^ *buf++) & 0x000000FF] ^ (crc >> 8); + ctx->state = crc; +} + +static void crc32_update_by4(crc32_context *ctx, const byte *buf, uint len) +{ + uint init_bytes, words; + u32 crc = ctx->state; + u32 term1, term2; + const u32 *buf32; + + // Special case + if (len < 4) + goto small; + + // Align start address to a multiple of 4 bytes + init_bytes = ((uintptr_t) buf) & 3; + if (init_bytes) + { + init_bytes = 4 - init_bytes; + len -= init_bytes; + while (init_bytes--) + crc = crc_tableil8_o32[(crc ^ *buf++) & 0x000000FF] ^ (crc >> 8); + } + + // Process 4 bytes at a time + words = len/4; + len -= 4*words; + buf32 = (const u32 *) buf; + while (words--) + { + crc ^= *buf32++; + term1 = crc_tableil8_o56[crc & 0x000000FF] ^ crc_tableil8_o48[(crc >> 8) & 0x000000FF]; + term2 = crc >> 16; + crc = term1 ^ + crc_tableil8_o40[term2 & 0x000000FF] ^ + crc_tableil8_o32[(term2 >> 8) & 0x000000FF]; + } + + // Process remaining up to 7 bytes + buf = (const byte *) buf32; +small: + while (len--) + crc = crc_tableil8_o32[(crc ^ *buf++) & 0x000000FF] ^ (crc >> 8); + + ctx->state = crc; +} + +static void crc32_update_by8(crc32_context *ctx, const byte *buf, uint len) +{ + uint init_bytes, quads; + u32 crc = ctx->state; + u32 term1, term2; + const u32 *buf32; + + // Special case + if (len < 8) + goto small; + + // Align start address to a multiple of 8 bytes + init_bytes = ((uintptr_t) buf) & 7; + if (init_bytes) + { + init_bytes = 8 - init_bytes; + len -= init_bytes; + while (init_bytes--) + crc = crc_tableil8_o32[(crc ^ *buf++) & 0x000000FF] ^ (crc >> 8); + } + + // Process 8 bytes at a time + quads = len/8; + len -= 8*quads; + buf32 = (const u32 *) buf; + while (quads--) + { + crc ^= *buf32++; + term1 = crc_tableil8_o88[crc & 0x000000FF] ^ + crc_tableil8_o80[(crc >> 8) & 0x000000FF]; + term2 = crc >> 16; + crc = term1 ^ + crc_tableil8_o72[term2 & 0x000000FF] ^ + crc_tableil8_o64[(term2 >> 8) & 0x000000FF]; + term1 = crc_tableil8_o56[*buf32 & 0x000000FF] ^ + crc_tableil8_o48[(*buf32 >> 8) & 0x000000FF]; + + term2 = *buf32 >> 16; + crc = crc ^ + term1 ^ + crc_tableil8_o40[term2 & 0x000000FF] ^ + crc_tableil8_o32[(term2 >> 8) & 0x000000FF]; + buf32++; + } + + // Process remaining up to 7 bytes + buf = (const byte *) buf32; +small: + while (len--) + crc = crc_tableil8_o32[(crc ^ *buf++) & 0x000000FF] ^ (crc >> 8); + + ctx->state = crc; +} + +void +crc32_init(crc32_context *ctx, uint crc_mode) +{ + ctx->state = 0xffffffff; + switch (crc_mode) + { + case CRC_MODE_DEFAULT: + ctx->update_func = crc32_update_by4; + break; + case CRC_MODE_SMALL: + ctx->update_func = crc32_update_by1; + break; + case CRC_MODE_BIG: + ctx->update_func = crc32_update_by8; + break; + default: + ASSERT(0); + } +} + +u32 +crc32_hash_buffer(const byte *buf, uint len) +{ + crc32_context ctx; + crc32_init(&ctx, CRC_MODE_DEFAULT); + crc32_update(&ctx, buf, len); + return crc32_final(&ctx); +} + +#ifdef TEST + +#include +#include + +int main(int argc, char **argv) +{ + if (argc != 5) + die("Usage: crc-t "); + uint alg = atoi(argv[1]); + uint len = atoi(argv[2]); + uint block = atoi(argv[3]); + uint iters = atoi(argv[4]); + + byte *buf = xmalloc(len); + for (uint i=0; i> 5) ^ (i >> 11); + + for (uint i=0; i in 2012. + */ + +#ifndef _UCW_CRC_H +#define _UCW_CRC_H + +#ifdef CONFIG_UCW_CLEAN_ABI +#define crc32_hash_buffer ucw_crc32_hash_buffer +#define crc32_init ucw_crc32_init +#endif + +/** + * Internal CRC calculator context. + * You should use it just as an opaque handle only. + */ +typedef struct crc32_context { + u32 state; + void (*update_func)(struct crc32_context *ctx, const byte *buf, uint len); +} crc32_context; + +/** + * Initialize new calculation of CRC in a given context. + * @crc_mode selects which algorithm should be used. + **/ +void crc32_init(crc32_context *ctx, uint crc_mode); + +/** + * Algorithm used for CRC calculation. The algorithms differ by the amount + * of precomputed tables they use. Bigger tables imply faster calculation + * at the cost of an increased cache footprint. + **/ +enum crc_mode { + CRC_MODE_DEFAULT, /* Default algorithm (4K table) */ + CRC_MODE_SMALL, /* Optimize for small data (1K table) */ + CRC_MODE_BIG, /* Optimize for large data (8K table) */ + CRC_MODE_MAX, +}; + +/** Feed @len bytes starting at @buf to the CRC calculator. **/ +static inline void crc32_update(crc32_context *ctx, const byte *buf, uint len) +{ + ctx->update_func(ctx, buf, len); +} + +/** Finish calculation and return the CRC value. **/ +static inline u32 crc32_final(crc32_context *ctx) +{ + return ctx->state ^ 0xffffffff; +} + +/** + * A convenience one-shot function for CRC. + * It is equivalent to this snippet of code: + * + * crc32_context ctx; + * crc32_init(&ctx, CRC_MODE_DEFAULT); + * crc32_update(&ctx, buf, len); + * return crc32_final(&ctx); + */ +u32 crc32_hash_buffer(const byte *buf, uint len); + +#endif diff --git a/libucw/ucw/crc.t b/libucw/ucw/crc.t new file mode 100644 index 0000000..9b2bb8c --- /dev/null +++ b/libucw/ucw/crc.t @@ -0,0 +1,25 @@ +# Tests for crc + +Name: Default/small +Run: ../obj/ucw/crc-t 0 123456 37 3 +Out: d620f179 + +Name: Default/big +Run: ../obj/ucw/crc-t 0 123456 10037 3 +Out: d620f179 + +Name: Small/small +Run: ../obj/ucw/crc-t 1 123456 37 3 +Out: d620f179 + +Name: Small/big +Run: ../obj/ucw/crc-t 1 123456 10037 3 +Out: d620f179 + +Name: Large/small +Run: ../obj/ucw/crc-t 2 123456 37 3 +Out: d620f179 + +Name: Large/big +Run: ../obj/ucw/crc-t 2 123456 10037 3 +Out: d620f179 diff --git a/libucw/ucw/daemon-ctrl.c b/libucw/ucw/daemon-ctrl.c new file mode 100644 index 0000000..6834b0c --- /dev/null +++ b/libucw/ucw/daemon-ctrl.c @@ -0,0 +1,222 @@ +/* + * UCW Library -- Daemon Control + * + * (c) 2012 Martin Mares + * (c) 2014 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static enum daemon_control_status +daemon_control_err(struct daemon_control_params *dc, char *msg, ...) +{ + va_list args; + va_start(args, msg); + vsnprintf(dc->error_msg, DAEMON_ERR_LEN, msg, args); + va_end(args); + return DAEMON_STATUS_ERROR; +} + +static enum daemon_control_status +daemon_read_pid(struct daemon_control_params *dc, int will_wait, int *pidp) +{ + // We expect successfully locked guard file, so no foreign process + // can be inside daemon_control() and therefore also in daemon_init() + // or daemon_run(). Only these results are then possible: + // + // -- DAEMON_STATUS_ERROR -- some local failure + // -- DAEMON_STATUS_NOT_RUNNING -- no daemon is running + // -- DAEMON_STATUS_STALE -- crashed daemon + // -- DAEMON_STATUS_OK, pid > 0 -- running daemon with known PID + // -- DAEMON_STATUS_OK, pid == 0 -- just exiting daemon, after ftruncate() + + enum daemon_control_status st = DAEMON_STATUS_NOT_RUNNING; + *pidp = 0; + + int pid_fd = open(dc->pid_file, O_RDONLY); + if (pid_fd < 0) + { + if (errno == ENOENT) + return st; + return daemon_control_err(dc, "Cannot open PID file `%s': %m", dc->pid_file); + } + + while (flock(pid_fd, LOCK_SH | (will_wait ? 0 : LOCK_NB)) < 0) + { + if (errno == EWOULDBLOCK) + { + st = DAEMON_STATUS_OK; + break; + } + else if (errno != EINTR) + { + daemon_control_err(dc, "Cannot lock PID file `%s': %m", dc->pid_file); + goto fail; + } + } + + char buf[16]; + int n = read(pid_fd, buf, sizeof(buf)); + if (n < 0) + { + daemon_control_err(dc, "Error reading `%s': %m", dc->pid_file); + goto fail; + } + if (n == (int) sizeof(buf)) + { + daemon_control_err(dc, "PID file `%s' is too long", dc->pid_file); + goto fail; + } + buf[n] = 0; + + if (!n) + { + close(pid_fd); + return st; + } + + if (st != DAEMON_STATUS_OK) + { + close(pid_fd); + return DAEMON_STATUS_STALE; + } + + int pid; + const char *next; + if (str_to_int(&pid, buf, &next, 10) || strcmp(next, "\n")) + { + daemon_control_err(dc, "PID file `%s' does not contain a valid PID", dc->pid_file); + goto fail; + } + + close(pid_fd); + *pidp = pid; + return DAEMON_STATUS_OK; + +fail: + close(pid_fd); + return DAEMON_STATUS_ERROR; +} + +enum daemon_control_status +daemon_control(struct daemon_control_params *dc) +{ + int guard_fd = open(dc->guard_file, O_RDWR | O_CREAT, 0666); + if (guard_fd < 0) + return daemon_control_err(dc, "Cannot open guard file `%s': %m", dc->guard_file); + if (flock(guard_fd, LOCK_EX) < 0) + return daemon_control_err(dc, "Cannot lock guard file `%s': %m", dc->guard_file); + + // Read the PID file + int pid, sig; + enum daemon_control_status st = daemon_read_pid(dc, 0, &pid); + if (st == DAEMON_STATUS_ERROR) + goto done; + + switch (dc->action) + { + case DAEMON_CONTROL_CHECK: + break; + case DAEMON_CONTROL_START: + if (st == DAEMON_STATUS_OK) + st = DAEMON_STATUS_ALREADY_DONE; + else + { + pid_t pp = fork(); + if (pp < 0) + { + st = daemon_control_err(dc, "Cannot fork: %m"); + goto done; + } + if (!pp) + { + close(guard_fd); + execvp(dc->argv[0], dc->argv); + fprintf(stderr, "Cannot execute `%s': %m\n", dc->argv[0]); + exit(DAEMON_STATUS_ERROR); + } + int stat; + int ec = waitpid(pp, &stat, 0); + if (ec < 0) + { + st = daemon_control_err(dc, "Cannot wait: %m"); + goto done; + } + if (WIFEXITED(stat) && WEXITSTATUS(stat) == DAEMON_STATUS_ERROR) + { + st = daemon_control_err(dc, "Cannot execute the daemon"); + goto done; + } + char ecmsg[EXIT_STATUS_MSG_SIZE]; + if (format_exit_status(ecmsg, stat)) + { + st = daemon_control_err(dc, "Daemon %s %s", dc->argv[0], ecmsg); + goto done; + } + enum daemon_control_status st2 = daemon_read_pid(dc, 0, &pid); + if (st2 != DAEMON_STATUS_OK && st2 != DAEMON_STATUS_NOT_RUNNING) + st = daemon_control_err(dc, "Daemon %s failed to write the PID file `%s'", dc->argv[0], dc->pid_file); + else if (st != DAEMON_STATUS_STALE) + st = DAEMON_STATUS_OK; + } + break; + case DAEMON_CONTROL_STOP: + if (st != DAEMON_STATUS_OK) + { + if (st == DAEMON_STATUS_NOT_RUNNING) + st = DAEMON_STATUS_ALREADY_DONE; + goto done; + } + if (pid) + { + sig = dc->signal ? : SIGTERM; + if (kill(pid, sig) < 0 && errno != ESRCH) + { + st = daemon_control_err(dc, "Cannot send signal %d: %m", sig); + goto done; + } + } + else + { + // Just exiting daemon => we can safely wait without sending any signal + } + st = daemon_read_pid(dc, 1, &pid); + if (st != DAEMON_STATUS_ERROR) + st = DAEMON_STATUS_OK; + break; + case DAEMON_CONTROL_SIGNAL: + if (!pid) + return DAEMON_STATUS_NOT_RUNNING; + sig = dc->signal ? : SIGHUP; + if (kill(pid, sig) >= 0) + st = DAEMON_STATUS_OK; + else if (errno == ESRCH) + st = DAEMON_STATUS_NOT_RUNNING; + else + st = daemon_control_err(dc, "Cannot send signal %d: %m", sig); + break; + default: + ASSERT(0); + } + +done: + close(guard_fd); + return st; +} diff --git a/libucw/ucw/daemon.c b/libucw/ucw/daemon.c new file mode 100644 index 0000000..cfaa05e --- /dev/null +++ b/libucw/ucw/daemon.c @@ -0,0 +1,291 @@ +/* + * UCW Library -- Daemonization + * + * (c) 2012--2014 Martin Mares + * (c) 2014 Pavel Charvat + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void +daemon_resolve_ugid(struct daemon_params *dp) +{ + // Resolve user name + const char *u = dp->run_as_user; + struct passwd *pw = NULL; + if (u) + { + if (u[0] == '#') + { + uint id; + const char *err = str_to_uint(&id, u, NULL, 10 | STN_WHOLE); + if (err) + die("Cannot parse user `%s': %s", u, err); + dp->run_as_uid = id; + dp->want_setuid = 1; + } + else + { + pw = getpwnam(u); + if (!pw) + die("No such user `%s'", u); + dp->run_as_uid = pw->pw_uid; + dp->want_setuid = 1; + } + } + + // Resolve group name + const char *g = dp->run_as_group; + struct group *gr; + if (g) + { + if (g[0] == '#') + { + uint id; + const char *err = str_to_uint(&id, g, NULL, 10 | STN_WHOLE); + if (err) + die("Cannot parse group `%s': %s", g, err); + dp->run_as_gid = id; + dp->want_setgid = 1; + } + else + { + gr = getgrnam(g); + if (!gr) + die("No such group `%s'", g); + dp->run_as_gid = gr->gr_gid; + dp->want_setgid = 1; + } + } + else if (pw) + { + dp->run_as_gid = pw->pw_gid; + dp->want_setgid = 2; + } +} + +void +daemon_switch_ugid(struct daemon_params *dp) +{ + if (dp->want_setgid && setresgid(dp->run_as_gid, dp->run_as_gid, dp->run_as_gid) < 0) + die("Cannot set GID to %d: %m", (int) dp->run_as_gid); + if (dp->want_setgid > 1 && initgroups(dp->run_as_user, dp->run_as_gid) < 0) + die("Cannot initialize groups: %m"); + if (dp->want_setuid && setresuid(dp->run_as_uid, dp->run_as_uid, dp->run_as_uid) < 0) + die("Cannot set UID to %d: %m", (int) dp->run_as_uid); +} + +void +daemon_init(struct daemon_params *dp) +{ + daemon_resolve_ugid(dp); + + if (dp->flags & DAEMON_FLAG_SIMULATE) + return; + + if (dp->pid_file) + { + // Check that PID file path is absolute + if (!(dp->flags & DAEMON_FLAG_PRESERVE_CWD) && dp->pid_file[0] != '/') + die("Path to PID file `%s' must be absolute", dp->pid_file); + + // Open PID file + dp->pid_fd = open(dp->pid_file, O_RDWR | O_CREAT, 0666); + if (dp->pid_fd < 0) + die("Cannot open `%s': %m", dp->pid_file); + int fl = fcntl(dp->pid_fd, F_GETFD); + if (fl < 0 || fcntl(dp->pid_fd, F_SETFD, fl | FD_CLOEXEC)) + die("Cannot set FD_CLOEXEC: %m"); + + // Try to lock it with an exclusive lock + if (flock(dp->pid_fd, LOCK_EX | LOCK_NB) < 0) + { + if (errno == EINTR || errno == EWOULDBLOCK) + die("Daemon is already running (`%s' locked)", dp->pid_file); + else + die("Cannot lock `%s': %m", dp->pid_file); + } + + // Make a note that the daemon is starting + if (ftruncate(dp->pid_fd, 0) < 0 || + write(dp->pid_fd, "(starting)\n", 11) != 11) + die("Error writing `%s': %m", dp->pid_file); + } +} + +void +daemon_run(struct daemon_params *dp, void (*body)(struct daemon_params *dp)) +{ + if (dp->flags & DAEMON_FLAG_SIMULATE) + { + body(dp); + return; + } + + // Switch GID and UID + daemon_switch_ugid(dp); + + // Create a new session and close stdio + setsid(); + close(0); + if (open("/dev/null", O_RDWR, 0) < 0 || + dup2(0, 1) < 0) + die("Cannot redirect stdio to `/dev/null': %m"); + + // Set umask to a reasonable value + umask(022); + + // Do not hold the current working directory + if (!(dp->flags & DAEMON_FLAG_PRESERVE_CWD)) + { + if (chdir("/") < 0) + die("Cannot chdir to root: %m"); + } + + // Create pipe to synchronize child process with master and avoid possible + // collision between writing of PID and daemon_exit() + int pipe_fd[2]; + if (dp->pid_file && pipe(pipe_fd) < 0) + die("Cannot create pipe: %m"); + + // Fork + pid_t pid = fork(); + if (pid < 0) + die("Cannot fork: %m"); + if (!pid) + { + // Wait for master process to finish writing of PID + if (dp->pid_file) + { + byte pipe_buf[1]; + close(pipe_fd[1]); + if (read(pipe_fd[0], pipe_buf, 1) < 0) + die("Cannot read pipe: %m"); + close(pipe_fd[0]); + } + + // We still keep the PID file open and thus locked + body(dp); + exit(0); + } + + // Write PID + if (dp->pid_file) + { + char buf[32]; + int c = snprintf(buf, sizeof(buf), "%d\n", (int) pid); + ASSERT(c <= (int) sizeof(buf)); + if (lseek(dp->pid_fd, 0, SEEK_SET) < 0 || + write(dp->pid_fd, buf, c) != c || + ftruncate(dp->pid_fd, c) || + close(dp->pid_fd) < 0) + die("Cannot write PID to `%s': %m", dp->pid_file); + close(pipe_fd[0]); + close(pipe_fd[1]); + } +} + +void +daemon_exit(struct daemon_params *dp) +{ + if (dp->flags & DAEMON_FLAG_SIMULATE) + return; + + if (dp->pid_file) + { + if (ftruncate(dp->pid_fd, 0)) + die("Error truncating `%s': %m", dp->pid_file); + close(dp->pid_fd); + } +} + +#ifdef TEST + +#include + +static volatile sig_atomic_t terminate; + +static void term_handler(int sig UNUSED) +{ + msg(L_INFO | L_SIGHANDLER, "SIGTERM received, terminating in a while"); + terminate = 1; +} + +static void hup_handler(int sig UNUSED) +{ + msg(L_INFO | L_SIGHANDLER, "SIGHUP received"); +} + +static void body(struct daemon_params *dp) +{ + log_fork(); + msg(L_INFO, "Daemon is running"); + msg(L_INFO, "uid=%d/%d gid=%d/%d", (int) getuid(), (int) geteuid(), (int) getgid(), (int) getegid()); + + struct sigaction sa_term = { .sa_handler = term_handler }; + struct sigaction sa_hup = { .sa_handler = hup_handler }; + if (sigaction(SIGTERM, &sa_term, NULL) < 0 || + sigaction(SIGHUP, &sa_hup, NULL) < 0) + ASSERT(0); + + while (!terminate) + { + if (!sleep(60)) + { + msg(L_INFO, "Timeout elapsed, terminating in a while"); + break; + } + } + + sleep(2); + msg(L_INFO, "Daemon is shutting down"); + daemon_exit(dp); +} + +int main(int argc, char **argv) +{ + struct daemon_params dp = { + .pid_file = "/tmp/123", + }; + + int opt; + while ((opt = getopt(argc, argv, "p:u:g:")) >= 0) + switch (opt) + { + case 'p': + dp.pid_file = optarg; + break; + case 'u': + dp.run_as_user = optarg; + break; + case 'g': + dp.run_as_group = optarg; + break; + default: + die("Invalid arguments"); + } + + daemon_init(&dp); + daemon_run(&dp, body); + msg(L_INFO, "Main program has ended"); + return 0; +} + +#endif diff --git a/libucw/ucw/daemon.h b/libucw/ucw/daemon.h new file mode 100644 index 0000000..25a34f8 --- /dev/null +++ b/libucw/ucw/daemon.h @@ -0,0 +1,131 @@ +/* + * UCW Library -- Daemonization + * + * (c) 2012--2014 Martin Mares + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _UCW_DAEMON_H +#define _UCW_DAEMON_H + +#include + +#ifdef CONFIG_UCW_CLEAN_ABI +#define daemon_control ucw_daemon_control +#define daemon_exit ucw_daemon_exit +#define daemon_init ucw_daemon_init +#define daemon_resolve_ugid ucw_daemon_resolve_ugid +#define daemon_run ucw_daemon_run +#define daemon_switch_ugid ucw_daemon_switch_ugid +#endif + +/** Parameters passed to the daemon helper. **/ +struct daemon_params { + uint flags; // DAEMON_FLAG_xxx + const char *pid_file; // A path to PID file (optional) + const char *run_as_user; // User name or "#uid" (optional) + const char *run_as_group; // Group name or "#gid" (optional) + + // Internal + uid_t run_as_uid; + uid_t run_as_gid; + int want_setuid; + int want_setgid; + int pid_fd; +}; + +/** Flags passed to the daemon helper. **/ +enum daemon_flags { + DAEMON_FLAG_PRESERVE_CWD = 1, // Skip chdir("/") + DAEMON_FLAG_SIMULATE = 2, // Simulate daemonization (avoid fork etc.) +}; + +/** + * Daemon initialization. Should be run after parsing of options. + * It resolves the UID and GID to run with and locks the PID file. + * Upon error, it calls @die(). + **/ +void daemon_init(struct daemon_params *dp); + +/** + * Run the daemon. Should be run when everything is initialized. It forks off + * a new process and does all necessary setup. Inside the new process, it calls + * @body (and when it returns, it exits the process). In the original process, it writes + * the PID file and returns. + * + * When `DAEMON_FLAG_SIMULATE` is set, it justs calls @body. This is useful + * for running of daemons in a debugger. + **/ +void daemon_run(struct daemon_params *dp, void (*body)(struct daemon_params *dp)); + +/** + * Clean up when the daemon is about to exit. It removes the PID file. + **/ +void daemon_exit(struct daemon_params *dp); + +/** + * Parse `run_as_user` and `run_as_group` and remember the results in internal fields. + * This is called automatically by daemon_init(), but also provided as a separate + * function in case you want to use daemon_switch_ugid(). Upon parse error, it calls die(). + **/ +void daemon_resolve_ugid(struct daemon_params *dp); + +/** + * Switch user and group as specified by the `run_as_user` and `run_as_group`. + * This is performed automatically by daemon_run(), but sometimes you might want to + * switch the user and group separately. In this case, you have to call daemon_resolve_ugid() + * beforehand. + **/ +void daemon_switch_ugid(struct daemon_params *dp); + +#define DAEMON_ERR_LEN 256 + +/** Parameters passed to @daemon_control() **/ +struct daemon_control_params { + const char *pid_file; // A path to PID file + const char *guard_file; // A path to guard file + int action; // Action to perform (DAEMON_CONTROL_xxx) + char * const *argv; // Daemon's arguments, NULL-terminated (for DAEMON_CONTROL_START) + int signal; // Signal to send (for DAEMON_CONTROL_SIGNAL) + char error_msg[DAEMON_ERR_LEN]; // A detailed error message returned (for DAEMON_STATUS_ERROR) +}; + +enum daemon_control_action { + DAEMON_CONTROL_CHECK = 1, + DAEMON_CONTROL_START, + DAEMON_CONTROL_STOP, + DAEMON_CONTROL_SIGNAL, +}; + +/** + * Perform an action on a daemon: + * + * * `DAEMON_CONTROL_START` to start the daemon + * * `DAEMON_CONTROL_STOP` to stop the daemon (send `SIGTERM` or `dc->signal` if non-zero) + * * `DAEMON_CONTROL_CHECK` to check that the daemon is running + * * `DAEMON_CONTROL_SIGNAL` to send a signal to the daemon (send `SIGHUP` or `dc->signal` if non-zero) + * + * The function returns a status code: + * + * * `DAEMON_STATUS_OK` if the action has been performed successfully + * * `DAEMON_STATUS_ALREADY_DONE` if the daemon is already in the requested state + * * `DAEMON_STATUS_NOT_RUNNING` if the action failed, because the daemon is not running + * * `DAEMON_STATUS_ERROR` if the action failed for some other reason (in this case, + * `dc->error_msg` contains a full error message) + * * `DAEMON_STATUS_STALE` if the daemon was in an undefined state (e.g., a stale PID file); + * for `DAEMON_CONTROL_START`, it means success + **/ +enum daemon_control_status daemon_control(struct daemon_control_params *dc); + +// XXX: Also used as exit codes of the ucw-daemon-control utility. +enum daemon_control_status { + DAEMON_STATUS_OK = 0, + DAEMON_STATUS_ALREADY_DONE = 100, + DAEMON_STATUS_NOT_RUNNING = 101, + DAEMON_STATUS_ERROR = 102, + DAEMON_STATUS_STALE = 103, +}; + +#endif diff --git a/libucw/ucw/default.cfg b/libucw/ucw/default.cfg new file mode 100644 index 0000000..825baeb --- /dev/null +++ b/libucw/ucw/default.cfg @@ -0,0 +1,86 @@ +# Configuration variables of the UCW library and their default values +# (c) 2005--2015 Martin Mares +# (c) 2020--2022 Pavel Charvat + +# Version of the whole package +Set("UCW_VERSION" => "6.5.14"); +Set("UCW_VERSION_CODE" => 6005014); + +# Name of libraries in packages (libucw$UCW_ABI_SUFFIX.so.0.0, etc.) +Set("UCW_ABI_SUFFIX" => "-6.5"); + +# Compile everything with debug information and ASSERT's +UnSet("CONFIG_DEBUG"); + +# Environment variable with a switch to abort() on die(). +# Any non-empty value of the variable turns this feature on. +Set("CONFIG_UCW_ENV_VAR_DIE_BY_ABORT" => "UCW_DIE_BY_ABORT"); + +# Enable aggressive optimizations depending on exact CPU type (don't use for portable packages) +UnSet("CONFIG_EXACT_CPU"); + +# Support files >2GB +Set("CONFIG_UCW_LARGE_FILES"); + +# Use shared libraries +UnSet("CONFIG_SHARED"); + +# If your system can't reset getopt with 'optind = 0', you need to compile our internal copy +# of GNU libc's getopt. This should not be necessary on GNU libc. +UnSet("CONFIG_UCW_OWN_GETOPT"); + +# Install libraries and their API includes +UnSet("CONFIG_INSTALL_API"); + +# Build with support for multi-threaded programs +Set("CONFIG_UCW_THREADS" => 1); + +# Include Perl modules +Set("CONFIG_UCW_PERL" => 1); + +# Include Perl modules written in C +UnSet("CONFIG_UCW_PERL_MODULES"); + +# Include support utilities for shell scripts +Set("CONFIG_UCW_SHELL_UTILS" => 1); + +# Include utilities +Set("CONFIG_UCW_UTILS" => 1); + +# Include obsolete ucw-daemon-helper utility +UnSet("CONFIG_UCW_OBSOLETE_DAEMON_HELPER"); + +# Default configuration file +UnSet("CONFIG_UCW_DEFAULT_CONFIG"); + +# Environment variable with configuration file +UnSet("CONFIG_UCW_ENV_VAR_CONFIG"); + +# Allow use of direct IO on files +Set("CONFIG_UCW_DIRECT_IO"); +Set("CONFIG_UCW_FB_DIRECT"); + +# Use thread-local storage (needs GCC-support, default: auto-detect) +# Set("CONFIG_UCW_TLS"); + +# Use epoll (needs support in libc and kernel, default: auto-detect) +# Set("CONFIG_UCW_EPOLL"); + +# Use getrandom (needs support in libc and kernel, default: auto-detect) +# Set("CONFIG_UCW_GETRANDOM"); + +# Use monotonic clock (default: yes on Linux, no elsewhere) +# Set("CONFIG_UCW_MONOTONIC_CLOCK"); + +# Which regular expression library should be used? If none is selected, we use BSD regex from libc. +UnSet("CONFIG_UCW_POSIX_REGEX"); +UnSet("CONFIG_UCW_PCRE"); + +# Compile tools used for debugging and testing of LibUCW +UnSet("CONFIG_UCW_DEBUG_TOOLS"); + +# Clean up library ABI by attaching "ucw_" prefix to all externally visible symbols +Set("CONFIG_UCW_CLEAN_ABI" => 1); + +# Return success +1; diff --git a/libucw/ucw/doc/Makefile b/libucw/ucw/doc/Makefile new file mode 100644 index 0000000..26afcd6 --- /dev/null +++ b/libucw/ucw/doc/Makefile @@ -0,0 +1,35 @@ +# Makefile for the UCW documentation, (c) 2008 Michal Vaner + +DIRS+=ucw/doc + +UCW_DOCS=basics log fastbuf index config configure install basecode hash docsys conf mempool eltpool mainloop generic growbuf unaligned lists chartype unicode prime binsearch heap binheap compress sort hashtable relnotes trans string time daemon signal varint opt alloc gary table xtypes +UCW_INDEX=$(o)/ucw/doc/def_index.html +UCW_DOCS_HTML=$(addprefix $(o)/ucw/doc/,$(addsuffix .html,$(UCW_DOCS))) + +UCW_MANPAGES=ucw-tableprinter.5 +UCW_MANPAGES_HTML=$(patsubst %,$(o)/ucw/doc/%.html,$(UCW_MANPAGES)) +UCW_MANPAGES_NROFF=$(addprefix $(o)/ucw/doc/,$(UCW_MANPAGES)) +MANPAGES+=$(UCW_MANPAGES_NROFF) +UCW_DOCS_HTML+=$(UCW_MANPAGES_HTML) + +$(UCW_INDEX): DOC_HEAD=$(s)/ucw/doc/def_index.txt +$(UCW_INDEX): DOC_LIST=$(patsubst %,$(o)/ucw/doc/%.deflist,$(UCW_DOCS)) +$(UCW_INDEX) $(UCW_DOCS_HTML): DOC_MODULE=ucw + +DOCS+=$(UCW_DOCS_HTML) +DOC_INDICES+=$(UCW_INDEX) +DOC_MODULES+=ucw + +ifdef CONFIG_DOC +INSTALL_TARGETS+=install-libucw-docs install-libucw-man +endif + +.PHONY: install-libucw-docs install-libucw-man + +install-libucw-docs: $(UCW_INDEX) $(UCW_DOCS_HTML) + install -d -m 755 $(DESTDIR)$(INSTALL_DOC_DIR)/ucw/ + install -m 644 $^ $(DESTDIR)$(INSTALL_DOC_DIR)/ucw/ + +install-libucw-man: $(UCW_MANPAGES_NROFF) + install -d -m 755 $(DESTDIR)$(INSTALL_MAN_DIR)/man1/ + install -m 644 $(UCW_MANPAGES_NROFF) $(DESTDIR)$(INSTALL_MAN_DIR)/man1/ diff --git a/libucw/ucw/doc/alloc.txt b/libucw/ucw/doc/alloc.txt new file mode 100644 index 0000000..adb233f --- /dev/null +++ b/libucw/ucw/doc/alloc.txt @@ -0,0 +1,24 @@ +Generic allocators +================== + +Sometimes, we want to define data structures, whose memory allocation can be +parametrized. If we wish to squeeze out the last bit of performance, we +tie the structure to a certain allocator in compile time (as we do for + <>). If performance is not so critical, allocators +can be swapped in run time. + +This module defines a generic interface to memory allocators. You can use +the following pre-defined allocators, or define some of your own. + +* <> +* <> +* <> + +These data structures accept an allocator (more will come later): + +* Growing arrays + +ucw/alloc.h +----------- + +!!ucw/alloc.h diff --git a/libucw/ucw/doc/basecode.txt b/libucw/ucw/doc/basecode.txt new file mode 100644 index 0000000..d4e307e --- /dev/null +++ b/libucw/ucw/doc/basecode.txt @@ -0,0 +1,76 @@ +Base64 and Base224 encodings +============================ + +These modules can be used to encode and decode data to and from +base64 (described in RFC 3548) and base224 (not described in any +standard, uses all non-control characters, briefly described in +a comment at the beginning of `ucw/base224.c`). + +- <> +- <> +- <> +- <> + +[[base64]] +ucw/base64.h +------------ +!!ucw/base64.h + +[[base224]] +ucw/base224.h +------------- +!!ucw/base224.h + +[[usage]] +Usage +----- + +- You may want to encode small block of known size. Just allocate the + output buffer and feed the data to the function. + + byte output[BASE64_ENC_LENGTH(input_size)]; + uint output_size = base64_encode(output, input, input_size); + +- Decoding can be done in similar way. It is enough to have output + buffer of the same size as the input one. + +- Encoding of a stream of unknown or large size can be split into + chunks. The input chunk size must be multiple of `BASE64_IN_CHUNK`. + The output will be corresponding multiple of `BASE64_OUT_CHUNK`. + + uint input_size; + byte input[BASE64_IN_CHUNK * 10]; + while(input_size = read_chunk(input, BASE64_IN_CHUNK * 10)) { + byte output[BASE64_OUT_CHUNK * 10]; + uint output_size = base64_encode(output, input, input_size); + use_chunk(output, output_size); + } + +- Decoding of a stream is done in the same way, just swap + `BASE64_IN_CHUNK` and `BASE64_OUT_CHUNK` (you feed the decode + function with `BASE64_OUT_CHUNK` multiple and get `BASE64_IN_CHUNK` + multiple). + +The base224 has similar interface, therefore you can use it the same +way as base64. + +[[basecode]] +The basecode utility +-------------------- +You can use the encoding/decoding routines from command line, trough +`ucw-basecode` command. You have to specify the operation by a command +line argument and give it the data on standard input. The arguments +are: + +- `-e`: Encode to base64. +- `-d`: Decode from base64. +- `-E`: Encode to base224. +- `-D`: Decode from base224. + +Furthermore, you can provide `--prefix` argument. If you do, the +output (when encoding) will be split to lines by default number of +chunks and the value of prefix will be prepended to each of them. +When decoding, it removes the prefix from the beginning of line. + +You can override the default number of blocks for line-splitting by +`--blocks` argument. diff --git a/libucw/ucw/doc/basics.txt b/libucw/ucw/doc/basics.txt new file mode 100644 index 0000000..ab1c94a --- /dev/null +++ b/libucw/ucw/doc/basics.txt @@ -0,0 +1,38 @@ +LibUCW Basics +============= + +Every program using LibUCW should start with `#include ` which +brings in the most frequently used library functions, macros and types. +This should be done before you include any of the system headers, since +`lib.h` defines the feature macros of the system C library. + +Portability +----------- + +LibUCW is written in C99 with a couple of GNU extensions mixed in where needed. +It currently requires the GNU C compiler version 4.0 or newer, but most modules +should be very easy to adapt to a different C99 compiler. (A notable exception +is `stkstring.h`, which is heavily tied to GNU extensions.) + +The library has been developed on Linux with the GNU libc and it is known to run +on Darwin, too. The authors did not try using it on other systems, but most of +the code is written for a generic POSIX system, so porting to any UNIX-like system +should be a piece of cake. + +Threads +------- +Generally, functions in the UCW library are reentrant as long as you call them +on different data. Accessing the same object from different threads is not supported, +unless otherwise told. This also applies to functions acting on any kind of global state. + +ucw/lib.h +--------- + +!!ucw/lib.h + +ucw/config.h +------------ +This header contains the standard set of types used by LibUCW. It is automatically +included by `ucw/lib.h`. + +!!ucw/config.h diff --git a/libucw/ucw/doc/binheap.txt b/libucw/ucw/doc/binheap.txt new file mode 100644 index 0000000..adc2601 --- /dev/null +++ b/libucw/ucw/doc/binheap.txt @@ -0,0 +1,19 @@ +Binomial heaps +============== + +* <> +* <> +* <> + +[[intro]] +Introduction +------------ + +Binomial heap is a data structure that supports for example efficient merge of two heaps, insertions, deletions or access to the minimum element. +All these operations are logarithimc in the worst case. If the merge is not significat, it is usually better to use simplier <>. + +They are defined in `ucw/binheap.h` as <>, some common definitions are also in `ucw/binheap-node.h`. + +!!ucw/binheap-node.h + +!!ucw/binheap.h diff --git a/libucw/ucw/doc/binsearch.txt b/libucw/ucw/doc/binsearch.txt new file mode 100644 index 0000000..211bd73 --- /dev/null +++ b/libucw/ucw/doc/binsearch.txt @@ -0,0 +1,51 @@ +Binary search +============= + +* <> +* <> + +!!ucw/binsearch.h + +[[examples]] +Examples +-------- + +You can find few examples of binary search usage. Although we define only few macros, they can be used +for several different cases, for example to find lower elements in a (non-)decreasing array or even to find +elements in a (non-)increasing array. + + static int inc[10] = { 1, 4, 4, 5, 6, 10, 11, 20, 25, 50 }; + static const char *str[5] = { "aaa", "abc", "bflmpsvz", "rep", "rep" }; + static int dec[3] = { 5, 2, 1 }; + + // find the first equal element + printf("%d\n", BIN_SEARCH_EQ(inc, 10, 4)); // prints 1 + printf("%d\n", BIN_SEARCH_EQ(inc, 10, 15)); // prints -1 (not found) + + // find the first greater or equal element + printf("%d\n", BIN_SEARCH_GE(inc, 10, 9)); // prints 5 + printf("%d\n", BIN_SEARCH_GE(inc, 10, 10)); // prints 5 + printf("%d\n", BIN_SEARCH_GE(inc, 10, 4)); // prints 1 + printf("%d\n", BIN_SEARCH_GE(inc, 10, 99)); // prints 10 (not found) + + // find the last equal element (or -1 if does not exist) + #define CMP_LE(ary, i, x) ((ary[i]) <= (x)) + int i = BIN_SEARCH_FIRST_GE_CMP(inc, 10, 4, CMP_LE); + printf("%d\n", (i && inc[i - 1] == 4) ? i - 1 : -1); // prints 2 + + // find the first greater element + printf("%d\n", BIN_SEARCH_FIRST_GE_CMP(inc, 10, 25, CMP_LE)); // prints 9 + + // find the last lower or equal element (or -1 if does not exist) + printf("%d\n", BIN_SEARCH_FIRST_GE_CMP(inc, 10, 25, CMP_LE) - 1); // prints 8 + + // find the last lower element (or -1 if does not exist) + printf("%d\n", BIN_SEARCH_FIRST_GE(inc, 10, 25) - 1); // prints 7 + + // find the first greater or equal string + #define CMP_STR(ary, i, x) (strcmp((ary[i]), (x)) < 0) + printf("%d\n", BIN_SEARCH_GE_CMP(str, 5, "bfl", CMP_STR)); // prints 2 + + // find the first lower or equal element in the non-increasing array + #define CMP_GT(ary, i, x) ((ary[i]) > (x)) + printf("%d\n", BIN_SEARCH_FIRST_GE_CMP(dec, 3, 4, CMP_GT)); // prints 1 diff --git a/libucw/ucw/doc/chartype.txt b/libucw/ucw/doc/chartype.txt new file mode 100644 index 0000000..a19560d --- /dev/null +++ b/libucw/ucw/doc/chartype.txt @@ -0,0 +1,4 @@ +Single-byte characters +====================== + +!!ucw/chartype.h diff --git a/libucw/ucw/doc/compress.txt b/libucw/ucw/doc/compress.txt new file mode 100644 index 0000000..d452763 --- /dev/null +++ b/libucw/ucw/doc/compress.txt @@ -0,0 +1,17 @@ +Compression +=========== + +The library contains a compression routine, called LiZaRd. It is +modified Lempel-Ziv 77 method with slightly worse compression ratio, +but with faster compression and decompression (compression is few times +faster than zlib, decompression is slightly slower than memcpy()). + +The data format and inspiration for code comes from the LZO project +(which couldn't be used due to licence problems). They might be +compatible, but no-one tested that. + +- <> +- <> +- <> + +!!ucw/lizard.h diff --git a/libucw/ucw/doc/conf.txt b/libucw/ucw/doc/conf.txt new file mode 100644 index 0000000..813a183 --- /dev/null +++ b/libucw/ucw/doc/conf.txt @@ -0,0 +1,327 @@ +Configuration parser +==================== + +Libucw contains a parser for configuration files. The syntax of the +configuration files is described in <>, here we explain the +interface of the parser. + +Basically, you write a description of the configuration file syntax, +which maps configuration items to variables of your program. Then +Then you run the parser and it fills your variables with the values +from the configuration file. + +The descriptions are modular. The configuration can be split to sections, +each section declared at a separate place. You can also define your own +data types. + +- <> + * <> + * <> +- <> + * <> + * <> + * <> + * <> +- <> + * <> + * <> + * <> + * <> + * <> + * <> + * <> + * <> + * <> + * <> +- <> + * <> (obsolete) + * <> (obsolete) + +[[example]] +Example +------- +If you want to just load simple configuration, this is the part you +want to read. This simple example should give you the overview. Look +at the <> section to see list of +supported data types, sections, etc. + +[[ex_cfile]] +Suppose you have configuration file with the following content and you +want to load it: + + HelloWorld { + Text "Hello planet" + Count 3 + } + +[[ex_structure]] +The structure +~~~~~~~~~~~~~ +First, you declare the structure and let the configuration parser know +it exists. + + #include + #include + + static char *hw_text = "Hello world"; + static int hw_count = 1; + static int hw_wait_answer = 0; + + static struct cf_section hw_config = { + CF_ITEMS { + CF_STRING("Text", &hw_text), + CF_INT("Count", &hw_count), + CF_INT("WaitAnswer", &hw_wait_answer), + CF_END + } + }; + + static void CONSTRUCTOR hw_init(void) { + cf_declare_section("HelloWorld", &hw_config, 0); + } + +The variables are used to store the loaded values. Their initial +values work as defaults, if nothing else is loaded. The hw_config() +structure assigns the variables to configuration names. The hw_init() +function (because of the `CONSTRUCTOR` macro) is run before main() +is called and it tells the parser that the section exists (alternatively, +you can call @cf_declare_section() at the start of your main()). + +You can plug in as many configuration sections as you like, from +various places across your code. + +[[ex_load]] +Loading configuration +~~~~~~~~~~~~~~~~~~~~~ +You can load the configuration explicitly by calling @cf_load(). +That can be convenient when writing a library, but in normal programs, +you can ask the <> to handle it for you. + +A typical example follows, please see the <> for details. + + #include + #include + + static struct opt_section options = { + OPT_ITEMS { + // More options can be specified here + OPT_HELP("Configuration options:"), + OPT_CONF_OPTIONS, + OPT_END + } + }; + + int main(int argc, char **argv) + { + cf_def_file = "default.cf"; + opt_parse(&options, argv+1); + // Configuration file is already loaded here + return 0; + } + +[[deep]] +Getting deeper +-------------- + +Since the configuration system is somehow complicated, this part gives +you a little overview of what you can find and where. + +[[conf_multi]] +Arrays and lists +~~~~~~~~~~~~~~~~ + +It is sometime needed to have multiple items of the same type. There +are three ways to do that: + +*Static arrays*:: + An array with fixed maximum length. You provide + the length and already allocated array which is filled with items. + The configuration may contain less than the maximum length items. ++ +For example, you can have an static array of five unsigned integers: ++ + static uint array[] = { 1, 2, 3, 4, 5 }; ++ + static struct cf_section section = { + CF_ITEMS { + CF_UINT_ARY("array", array, 5), + CF_END + } + }; + +*Dynamic arrays*:: + Similar to static array, but you provide pointer + to pointer to the given item (eg. if you want dynamic array of + integers, you give `**int`). The parser allocates a <> + of the required size. ++ +If you want dynamic array of strings, you would use: ++ + static char *array[]; ++ + static struct cf_section section = { + CF_ITEMS { + CF_STRING_DYN("array", &array, CF_ANY_NUM), + CF_END + } + }; + +*Lists*:: + Linked lists based on <>. You provide description + of single node and pointer to the + <> variable. All the nodes will + be created dynamically and put there. ++ +First element of your structure must be <>. ++ +The first example is list of strings and uses <>: ++ + static struct clist list; ++ + static struct cf_section section = { + CF_ITEMS { + CF_LIST("list", &list, &cf_string_list_config), + CF_END + } + }; ++ +Another example, describing how to create more complicated list node +than just a string can be found at the <> macro. + +[[reload]] +Reloading configuration +~~~~~~~~~~~~~~~~~~~~~~~ + +The configuration system allows you to reload configuration at +runtime. The new config changes the values against the default values. +It means, if the default value for variable `A` is `10`, the currently +loaded config sets it to `42` and the new config does not talk about +this variable, `A` will have a value of `10` after a successful load. + +Furthermore, if the loading of a new configuration fails, the current +configuration is preserved. + +All this is done with <>. The load of the +first config creates a journal entry. If you try to load some new +configuration, it is partially rolled back to defaults (the rollback +happens, but instead of removing the journal entry, another journal +entry is added for the rollback). If the loading succeeds, the two +journal entries are removed and a new one, for the new configuration, +is added. If it fails, the first one is replayed and the rollback +entry is removed. + +See @cf_reload(). + +[[custom_parser]] +Creating custom parsers +~~~~~~~~~~~~~~~~~~~~~~~ + +If you need to parse some data type the configuration system can't +handle, you can write your own <> +and use <> macro to declare a new option. + +There is also an obsolete way to write a custom parser. +Before you start, you should know a few things. + +The parser needs to support <>. To accomplish that, +you have to use the <> for memory allocation. + +Now, you need a function with the same signature as +<>. Parse the first parameter (the +string) and store the data in the second parameter. You may want to +write a dumper function, with signature of +<> (needed for debug dumps). + +Fill in a structure <> and use the +new data type in your configuration description with +<> macro as its @t parameter. + +You do not need to call @cf_journal_block() on the variable you store +the result. It is true you change it, but it was stored to journal +before your parser function was called. + +[[hooks]] +Hooks +~~~~~ + +The configuration system supports hooks. They are used to initialize the +configuration (if simple default value of variable is not enough) and +to check the sanity of loaded data. + +Each hook is of type <> and you can include +them in configuration description using <> and +<> macros. + +The hooks should follow similar guidelines as custom parsers (well, +init hooks do not need to call @cf_journal_block()) to support +journalling. If you change nothing in the commit hook, you do not need +to care about the journalling either. + +You may use the return value to inform about errors. Just return the +error message, or NULL if everything went well. + +Another similar function is a copy function. It is very similar to a +hook and is used when the item is copied and is too complicated to use +simple memcpy(). Its type is <> and is +specified by the <> macro. It's return value is +the same as the one of a hook. + +[[conf_h]] +ucw/conf.h +---------- + +This header file contains the public interface of the configuration module. + +!!ucw/conf.h + +[[getopt_h]] +ucw/getopt.h +------------ + +This header contains routines for parsing command line arguments and +loading the default configuration. + +In new programs, please consider using the new <> +instead. The getopt interface is already considered obsolete and may +be removed in the future. + +!!ucw/getopt.h + +Example +~~~~~~~ +Typically, @cf_getopt() is used as follows: it works like +the traditional @getopt_long() from the C library, but it also handles +configuration files. + + #include + #include + #include + + static char short_opts[] = CF_SHORT_OPTS "v"; + static struct option long_opts[] = { + CF_LONG_OPTS + { "verbose", 0, 0, 'v' }, + { NULL, 0, 0, 0 } + }; + + static int verbose; + + int main(int argc, char *argv[]) { + cf_def_file = "default.cf"; + int opt; + while((opt = cf_getopt(argc, argv, short_opts, long_opts, NULL)) >= 0) + switch(opt) { + case 'v': verbose = 1; break; + default: fprintf("Unknown option %c\n", opt); return 1; + } + } + +The `short_opts` and `long_opts` variables describe the command line +arguments. Notice the `CF_SHORT_OPTS` and `CF_LONG_OPTS` macros. They +add the `-S` and `-C` options for the configuration parser as described +in <>. These options are handled internally by @cf_getopt(). + +You can rely on the configuration files having been loaded before the +first of your program's options is parsed. diff --git a/libucw/ucw/doc/config.txt b/libucw/ucw/doc/config.txt new file mode 100644 index 0000000..6b32c5a --- /dev/null +++ b/libucw/ucw/doc/config.txt @@ -0,0 +1,196 @@ +Configuration files +=================== + +This document describes run-time configuration of libucw-based +programs using config files. For compile-time configuration, +see <>. + +[[terminology]] +Terminology +----------- + +Configuration items of all modules are organized into sections. +The sections form a tree structure with top-level sections corresponding +to program modules. + +Each configuration item belongs to one of the following classes: + + 1. single value or a fixed-length array of values + 2. variable-length array of values + 3. subsection with several nested attributes + 4. list of nodes, each being an instance of a subsection + 5. bitmap of small integers (0..31) or fixed list of strings + 6. exceptions (items with irregular syntax; however, they always + appear as a sequence of strings, only the semantics differ) + +Both fixed- and variable-length arrays consist of items of the same +type. The basic types supported by the configuration mechanism are: + + 1. 32-bit integer + 2. 64-bit integer + 3. floating point number + 4. IP address + 5. string + 6. choice (one of a fixed list of strings) + +Program modules can define their own special types (such as network +masks or attribute names) and decide how are they parsed. + +[[format]] +Format of configuration files +----------------------------- + +Configuration files are text files that usually set one attribute per +line, though it is possible to split one assignment into multiple lines +and/or assign several attributes in one line. The basic format of an +assignment command is + + name value1 value2 ... valueN + +or + + name=value1 value2 ... valueN + +The end of line means also end of a command unless it is preceded by a +backslash. On the other hand, a semicolon terminates the command and +another command can start after the semicolon. A hash starts a comment +that lasts until the end of the line. A value can be enclosed in +apostrophes or quotation marks and then it can contain spaces and/or +control characters, otherwise the first space or control character +denotes the end of the value. Values enclosed in quotation marks are +interpreted as C-strings. For example, the following are valid +assignment commands: + + Database "main db\x2b"; Directory='index/'; Weights 100 20 30 \ + 40 50 80 # a comment that is ignored + +Numerical values can be succeeded by a unit. The following units are +supported: + +[[units]] + + d=86400 k=1000 K=1024 + h=3600 m=1000000 M=1048576 + %=0.01 g=1000000000 G=1073741824 + t=1000000000000 T=1099511627776 + +Attributes of a section or a list node can be set in two ways. First, +you can write the name of the section or list, open a bracket, and then +set the attributes inside the section. For example, + + Section1 { + Attr1 value1 + Attr2 value2 + ListNode { #creates a list and adds its first node + Attr3 value3 + Attr4 value4 + } + ListNode { Attr3=value5; Attr4=value6 } + #appends a new node; this is still the same syntax + } + +The second possibility is using a shorter syntax when all attributes of a +section are set on one line in a fixed order. The above example could +be as well written as + + Section1 { + Attr1 value1 + Attr2 value2 + ListNode value3 value4 + ListNode value5 value6 + } + +Of course, you cannot use the latter syntax when the attributes allow +variable numbers of parameters. The parser of the configuration files +checks this possibility. + +If you want to set a single attribute in some section, you can also +refer to the attribute as Section.Attribute. + +Lists support several operations besides adding a new node. You just +have to write a colon immediately after the attribute name, followed by +the name of the operation. The following operations are supported: + +[[operations]] + + List:clear # removes all nodes + List:append { attr1=value1; ... } # adds a new node at the end + List:prepend { attr1=value1; ... } # adds a new node at the beginning + List:remove { attr1=search1 } # find a node and delete it + List:edit { attr1=search1 } { attr1=value1; ... } + # find a node and edit it + List:after { attr1=search1 } { ... } # insert a node after a found node + List:before { attr1=search1 } { ... } # insert a node before a found node + List:copy { attr1=search1 } { ... } # duplicate a node and edit the copy + List:reset { attr=value1; ... } # equivalent to :clear and :append + +You can specify several attributes in the search condition and the nodes +are tested for equality in all these attributes. In the editing +commands, you can either open a second block with overridden attributes, +or specify the new values using the shorter one-line syntax. + +The commands :clear, :append, and :prepend are also supported by var-length +arrays. The command :clear can also be used on string values. The following +operations can be used on bitmaps: :set (which is equal to :append and :prepend), +:remove, :clear, and :all (set all bits). + +[[include]] +Including other files +--------------------- + +To include another file, use the command + + Include another/file + +or if the file needs not to exist + + OptionalInclude another/file + +(Beware that this command has to be the last one on the line.) + +[[command_line]] +Command-line parameters +----------------------- + +The default configuration file (cf_def_file possibly overriden +by environment variable cf_env_file) is read before the program is started. +You can use a -C option to override the name of the configuration file. +If you use this parameter several times, then all those files are loaded +consecutively. A parameter -S can be used to execute a configuration +command directly (after loading the default or specified configuration +file). Example: + + bin/program -Ccf/my-config -S'module.trace=2;module.logfile:clear' ... + +If the program is compiled with debugging information, then one more +parameter `--dumpconfig` is supported. It prints all parsed configuration +items and exits. + +All these switches must be used before any other parameters of the +program. + +[[preprocess]] +Preprocessing +------------- + +During compilation, all configuration files are pre-processed by a simple +C-like preprocessor, which supports `#ifdef`, `#ifndef`, `#if`, +`#elsif`, `#else` and `#endif` directives referring to compile-time +configuration variables (the ones detected by `configure` script, you +can see list of them in `obj/autoconf.h`). `#if` and `#elsif` can contain +any Perl expression where each `CONFIG_xyz` configuration variable is +substituted to 0 or 1 depending on its value. + +The preprocessor also substitutes `@VARIABLE@` by the value of the variable, +which must be defined. + +[[caveats]] +Caveats +------- + +Trying to access an unknown attribute causes an error, but unrecognized +top-level sections are ignored. The reason is that a common config file +is used for a lot of programs which recognize only their own sections. + +Names of sections, attributes and choices are case-insensitive. Units are +case-sensitive. diff --git a/libucw/ucw/doc/configure.txt b/libucw/ucw/doc/configure.txt new file mode 100644 index 0000000..e655228 --- /dev/null +++ b/libucw/ucw/doc/configure.txt @@ -0,0 +1,78 @@ +How to Configure libucw libraries +================================= + +What can be configured +---------------------- +There are two different levels of configuring program +based on the libucw libraries: + + - runtime configuration in configuration files (see <>) + + - compile-time configuration of the libraries: config switches set + before compiling, selecting optional features. + +Where to build +-------------- +If you run configure in the source directory, it prepares for compilation inside +the source tree. In this case, an `obj` subdirectory is created to hold all generated +files (object files, binaries, generated source files etc.) and all final files +are linked to the `run` subdirectory. No other parts of the source tree are written into. + +Alternatively, you can compile in a separate object tree (which is useful when you +want to build several different configurations from a single source tree). In order +to do that, switch to the destination directory and issue `/configure ...`. +This way, configure will create the `obj` and `run` directories locally and set up +a Makefile which refers to the original source tree. + +How to configure +---------------- +To set up compilation, possibly overriding default compile-time +options, just run: + + ./configure [