--- testing environment for sorters dofile "polishsort.lua" document.whatever = { } document.whatever.words = { } local my = {} function my.gsub (s, patt, repl) patt = lpeg.S(patt) patt = lpeg.Cs((patt / repl + 1)^0) return lpeg.match(patt, s) end --- based on http://www.mail-archive.com/ntg-context@ntg.nl/msg47525.html function document.whatever.sorttext() local dwtext = document.whatever.text --local split = sorters.splitters.utf local split = sorters.splitters.utflower dwtext = my.gsub(dwtext, '\n\t\v"', " ") dwtext = string.explode(dwtext, " +") local dwwords = document.whatever.words for i=1, #dwtext do local current = string.strip(dwtext[i]) if current ~= "" then table.insert(dwwords, { word = current }) end end for i=1, #dwwords do local word = dwwords[i] word.split = split(word.word) end --sorters.sort(dwwords, sorters.comparers.basic) sorters.sort(dwwords, sorters.comparers.polish) end function document.whatever.flushtext() local words = document.whatever.words local previous = false local p_word = false for i=1, #words do local word = words[i] local letter, current = sorters.firstofsplit(word) local letter = utf.lower(letter) if previous ~= current then previous = current context.section(letter) end local c_word = word.word if p_word ~= c_word then context(tostring(i) .. ": " .. c_word) context.par() p_word = c_word end end end function testrun (lang) --f = assert(io.open("anna-utf.txt", "r")) --f = assert(io.open("sltext.txt", "r")) document.whatever.text = [[ polskie słowa dziwnie się szereguje Polskie Słowa Dziwnie Się Szereguje ]] sorters.setlanguage(lang) context.starttext() document.whatever.sorttext() document.whatever.flushtext() context.stoptext() end testrun("pl")