/* match.c: pattern matching routines */ #include "rc.h" static int rangematch(const char*, const char*); enum { RANGE_FAIL = -1, RANGE_ERROR = -2 }; /* match() matches a single pattern against a single string. */ /* utf-8 support copyright © 2005 erik quanstrom with the same licencing terms as the rest of rc. since rc doesn't really do utf-8, we are going to pretend, relying on the properties of utf-8 we know that we can 1. get away with byte-wise comparisons as long as we are not insisting that the next byte is the next character. ranges and the ? match operator need to be utf-8-aware. 2. we can compare 2 utf-8 characters without converting to unicode (PITA) by comparing length (longer is greater) and then bytewise. all we require is utf8len. */ static int utf8len(const char* ss){ const unsigned char* s = (unsigned char*)ss; int c; c=*s; if (c<0x80){ return 1; } if (0x80 == (c&0xc0) || 0xc0 == (c&0xe0)){ return 2; } if ((c & 0xf0) == 0xe0){ return 3; } if ((c & 0xf8) == 0xf0){ return 4; } return 1; /* bad */ } static int utf8cmp(const char* s1, int l1, const char* s2, int l2){ int l; int t1; int t2; int i; l = l2-l1; if (l){ return l; } for(i=0; i=utf8cmp(p, l, c, cl)){ matched = 1; } } else if (cl == l) { for(i=0; i != l; i++){ if (p[i] != c[i]){ break; } } matched |= i==l; } } if (matched ^ neg) return p - orig + 1; /* skip the right-bracket */ return RANGE_FAIL; }