#! /usr/bin/perl # Read's STIX character list HMTL file and extracts # unicode character names for the various characters # Usage: ./makeUnicode > unicode.pl sub GetTableData { open(INFILE,"STIX character list.html"); my @lines = ; close(INFILE); chomp(@lines); @lines = split(/|/i,join('',@lines)); FindRanges(grep /
*(.*?);? ([^ ]+)-([^ ]+?) *([^<]*)!); $char = sprintf("%05X",hex($char)); $name =~ s/'/\\'/g; $CHARNAME{$char} = $name; } } sub PrintRanges { print "\@ranges = (\n"; foreach $range (@ranges) { print " ['$range->[0]',$range->[1],$range->[2]],\n"; } print ");\n\n"; } sub PrintChars { print "%CHARNAME = (\n"; foreach $char (sort keys %CHARNAME) { print " '$char' => '$CHARNAME{$char}',\n"; } print ");\n"; } GetTableData(); PrintRanges(); PrintChars(); 1;