#!/usr/bin/perl -W
# Outputs GSUB rules for replacing Babel-inputted greek characters with their
# Unicode value.
# In Adobe Feature Language, suitable for use in Fontlab's .fea files.

use strict ;
use utf8 ;

# Character types: breathings, accents, vowels
# The void string is considered an accent for convenience with breathings
my %charmask ;
my $charshift = 8 ;
my @breathings = ('greater', 'less') ;
my @accents = ('', 'grave', 'quotesingle', 'asciitilde') ;
my @vowels = ('a', 'e', 'h', 'i', 'o', 'u', 'w') ;

# Unicode masks for characters with breathings
$charmask{''} = 0 ;
$charmask{'greater'} = 0 ;
$charmask{'less'} = 1 ;
$charmask{'grave'} = 2 ;
$charmask{'quotesingle'} = 4 ;
$charmask{'asciitilde'} = 6 ;
$charmask{'a'} = 0x1F00 ;
$charmask{'e'} = 0x1F10 ;
$charmask{'h'} = 0x1F20 ;
$charmask{'i'} = 0x1F30 ;
$charmask{'o'} = 0x1F40 ;
$charmask{'u'} = 0x1F50 ;
$charmask{'w'} = 0x1F60 ;

# Local variables
my $breathing ; my $accent ; my $vowel ;
my $uchar ;

# First the U+1F00–U+1F6F sequence: breathing accent vowel
# We compile the Unicode code points by simply ORing the mask of each element
# Note that some of these characters actually don't exist!
# But is was easier this way (we can always edit the output afterward)
foreach $breathing (@breathings)
{
  foreach $accent (@accents)
  {
    foreach $vowel (@vowels)
    {
      # Space cadet input scheme ;-)
      $uchar = $charmask{$breathing} | $charmask{$accent} | $charmask{$vowel} ;
      printf "sub $breathing $accent $vowel by uni%04X ;\n", $uchar ;

      # Uppercase characters: the same shifted 8.
      $uchar = $charmask{$breathing} | $charmask{$accent}
        | $charmask{$vowel} | $charshift ;
      printf "sub $breathing $accent %s by uni%04X ;\n", uc($vowel), $uchar ;
    }
  }
}

# The U+1F7x range: lowercase vowels with only one accent.
# I have no idea why Unicode decided to put them there ... (especially seen as
# the uppercase vowels are somewhere else, and in an even more clumsy
# arrangement).

# We have to change the masks
$charmask{'grave'} = 0 ;
$charmask{'quotesingle'} = 1 ;
$charmask{'a'} = 0x1F70 ;
$charmask{'e'} = 0x1F72 ;
$charmask{'h'} = 0x1F74 ;
$charmask{'i'} = 0x1F76 ;
$charmask{'o'} = 0x1F78 ;
$charmask{'u'} = 0x1F7A ;
$charmask{'w'} = 0x1F7C ;

foreach $vowel (@vowels)
{
  foreach $accent ('grave', 'quotesingle')
  {
    $uchar = $charmask{$accent} | $charmask{$vowel} ;
    printf "sub $accent $vowel by uni%04X ;\n", $uchar ; }
}

# As announced before, the uppercase counterparts of these 14 characters are in
# a delighfully crappy mess. Simply output them one by one.
print "sub grave A by uni1FBA ;\n" ;
print "sub quotesingle A by uni1FBB ;\n" ;
print "sub grave E by uni1FC8 ;\n" ;
print "sub quotesingle E by uni1FC9 ;\n" ;
print "sub grave H by uni1FCA ;\n" ;
print "sub quotesingle H by uni1FCB ;\n" ;
print "sub grave I by uni1FDA ;\n" ;
print "sub quotesingle I by uni1FDB ;\n" ;
print "sub grave U by uni1FEA ;\n" ;
print "sub quotesingle U by uni1FEB ;\n" ;
print "sub grave W by uni1FFA ;\n" ;
print "sub quotesingle W by uni1FFB ;\n" ;

# U+1F80–U+1FAF: characters with subscribed iotas and breathings.
# We have to change the masks once again.
$charmask{'grave'} = 2 ;
$charmask{'quotesingle'} = 4 ;
$charmask{'a'} = 0x1F80 ;
$charmask{'h'} = 0x1F90 ;
$charmask{'w'} = 0x1FA0 ;
foreach $breathing (@breathings)
{
  foreach $accent (@accents)
  {
    foreach $vowel ('a', 'h', 'w') # Only these three vowels!
    {
      $uchar = $charmask{$breathing} | $charmask{$accent} | $charmask{$vowel} ;
      printf "sub $breathing $accent $vowel bar by uni%04X ;\n", $uchar ;

      # Uppercase counterparts
      $uchar = $charmask{$breathing} | $charmask{$accent}
        | $charmask{$vowel} | $charshift ;
      printf "sub $breathing $accent %s bar by uni%04X ;\n", uc($vowel), $uchar ;
    }
  }
}

# And finally, the characters with subscribed iotas but without breathings.
# Only nine of them, write them one by one.
print "sub grave a bar by uni1FB2 ;\n" ;
print "sub a bar by uni1FB3 ;\n" ;
print "sub quotesingle a bar by uni1FB4 ;\n" ;
print "sub grave h bar by uni1FC2 ;\n" ;
print "sub h bar by uni1FC3 ;\n" ;
print "sub quotesingle h bar by uni1FC4 ;\n" ;
print "sub grave w bar by uni1FD2 ;\n" ;
print "sub w bar by uni1FD3 ;\n" ;
print "sub quotesingle w bar by uni1FD4 ;\n" ;

# And some more with perispomeni ...
print "sub asciitilde a by uni1FB6 ;\n" ;
print "sub asciitilde a bar by uni1FB7 ;\n" ;
print "sub asciitilde h by uni1FC6 ;\n" ;
print "sub asciitilde h bar by uni1FC7 ;\n" ;
print "sub asciitilde w by uni1FD6 ;\n" ;
print "sub asciitilde w bar by uni1FD7 ;\n" ;

# Rhos
print "sub greater r by uni1FE4 ;\n" ;
print "sub less r by uni1FE5 ;\n" ;
print "sub less R by uni1FEC ;\n" ;

# We leave some over but that should already be useful. Enjoy!