#!/usr/bin/perl -W # Outputs GSUB rules for replacing Babel-inputted greek characters with their # Unicode value. # In Adobe Feature Language, suitable for use in Fontlab's .fea files. use strict ; use utf8 ; # Character types: breathings, accents, vowels # The void string is considered an accent for convenience with breathings my %charmask ; my $charshift = 8 ; my @breathings = ('greater', 'less') ; my @accents = ('', 'grave', 'quotesingle', 'asciitilde') ; my @vowels = ('a', 'e', 'h', 'i', 'o', 'u', 'w') ; # Unicode masks for characters with breathings $charmask{''} = 0 ; $charmask{'greater'} = 0 ; $charmask{'less'} = 1 ; $charmask{'grave'} = 2 ; $charmask{'quotesingle'} = 4 ; $charmask{'asciitilde'} = 6 ; $charmask{'a'} = 0x1F00 ; $charmask{'e'} = 0x1F10 ; $charmask{'h'} = 0x1F20 ; $charmask{'i'} = 0x1F30 ; $charmask{'o'} = 0x1F40 ; $charmask{'u'} = 0x1F50 ; $charmask{'w'} = 0x1F60 ; # Local variables my $breathing ; my $accent ; my $vowel ; my $uchar ; # First the U+1F00–U+1F6F sequence: breathing accent vowel # We compile the Unicode code points by simply ORing the mask of each element # Note that some of these characters actually don't exist! # But is was easier this way (we can always edit the output afterward) foreach $breathing (@breathings) { foreach $accent (@accents) { foreach $vowel (@vowels) { # Space cadet input scheme ;-) $uchar = $charmask{$breathing} | $charmask{$accent} | $charmask{$vowel} ; printf "sub $breathing $accent $vowel by uni%04X ;\n", $uchar ; # Uppercase characters: the same shifted 8. $uchar = $charmask{$breathing} | $charmask{$accent} | $charmask{$vowel} | $charshift ; printf "sub $breathing $accent %s by uni%04X ;\n", uc($vowel), $uchar ; } } } # The U+1F7x range: lowercase vowels with only one accent. # I have no idea why Unicode decided to put them there ... (especially seen as # the uppercase vowels are somewhere else, and in an even more clumsy # arrangement). # We have to change the masks $charmask{'grave'} = 0 ; $charmask{'quotesingle'} = 1 ; $charmask{'a'} = 0x1F70 ; $charmask{'e'} = 0x1F72 ; $charmask{'h'} = 0x1F74 ; $charmask{'i'} = 0x1F76 ; $charmask{'o'} = 0x1F78 ; $charmask{'u'} = 0x1F7A ; $charmask{'w'} = 0x1F7C ; foreach $vowel (@vowels) { foreach $accent ('grave', 'quotesingle') { $uchar = $charmask{$accent} | $charmask{$vowel} ; printf "sub $accent $vowel by uni%04X ;\n", $uchar ; } } # As announced before, the uppercase counterparts of these 14 characters are in # a delighfully crappy mess. Simply output them one by one. print "sub grave A by uni1FBA ;\n" ; print "sub quotesingle A by uni1FBB ;\n" ; print "sub grave E by uni1FC8 ;\n" ; print "sub quotesingle E by uni1FC9 ;\n" ; print "sub grave H by uni1FCA ;\n" ; print "sub quotesingle H by uni1FCB ;\n" ; print "sub grave I by uni1FDA ;\n" ; print "sub quotesingle I by uni1FDB ;\n" ; print "sub grave U by uni1FEA ;\n" ; print "sub quotesingle U by uni1FEB ;\n" ; print "sub grave W by uni1FFA ;\n" ; print "sub quotesingle W by uni1FFB ;\n" ; # U+1F80–U+1FAF: characters with subscribed iotas and breathings. # We have to change the masks once again. $charmask{'grave'} = 2 ; $charmask{'quotesingle'} = 4 ; $charmask{'a'} = 0x1F80 ; $charmask{'h'} = 0x1F90 ; $charmask{'w'} = 0x1FA0 ; foreach $breathing (@breathings) { foreach $accent (@accents) { foreach $vowel ('a', 'h', 'w') # Only these three vowels! { $uchar = $charmask{$breathing} | $charmask{$accent} | $charmask{$vowel} ; printf "sub $breathing $accent $vowel bar by uni%04X ;\n", $uchar ; # Uppercase counterparts $uchar = $charmask{$breathing} | $charmask{$accent} | $charmask{$vowel} | $charshift ; printf "sub $breathing $accent %s bar by uni%04X ;\n", uc($vowel), $uchar ; } } } # And finally, the characters with subscribed iotas but without breathings. # Only nine of them, write them one by one. print "sub grave a bar by uni1FB2 ;\n" ; print "sub a bar by uni1FB3 ;\n" ; print "sub quotesingle a bar by uni1FB4 ;\n" ; print "sub grave h bar by uni1FC2 ;\n" ; print "sub h bar by uni1FC3 ;\n" ; print "sub quotesingle h bar by uni1FC4 ;\n" ; print "sub grave w bar by uni1FD2 ;\n" ; print "sub w bar by uni1FD3 ;\n" ; print "sub quotesingle w bar by uni1FD4 ;\n" ; # And some more with perispomeni ... print "sub asciitilde a by uni1FB6 ;\n" ; print "sub asciitilde a bar by uni1FB7 ;\n" ; print "sub asciitilde h by uni1FC6 ;\n" ; print "sub asciitilde h bar by uni1FC7 ;\n" ; print "sub asciitilde w by uni1FD6 ;\n" ; print "sub asciitilde w bar by uni1FD7 ;\n" ; # Rhos print "sub greater r by uni1FE4 ;\n" ; print "sub less r by uni1FE5 ;\n" ; print "sub less R by uni1FEC ;\n" ; # We leave some over but that should already be useful. Enjoy!