require "lpeg" local C, Cs, Ct, P, R, S, V, match = lpeg.C, lpeg.Cs, lpeg.Ct, lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match local iso_parser rules = P{ [1] = "weight_table", -- Define collation tables as sequences of lines weight_table = V"common_template_table" + V"tailored_table", common_template_table = V"simple_line"^0, tailored_table = V"table_line"^0, -- Define the line types simple_line = (V"symbol_definition" + V"collating_element" + V"weight_assignment" + V"order_end")^-1 * V"line_completion" --/ function (first) io.write("simple: "..first) end , --table_line = V"simple_line" + V"tailoring_line", table_line = V"tailoring_line" + V"simple_line", tailoring_line = (V"reorder_after" + V"order_start" + V"reorder_end" + V"section_definition" + V"reorder_section_after") * V"line_completion" --/ function (first) io.write("tailoring: "..first) end , -- Define the basic syntax for collation weighting symbol_definition = P"collating-symbol" * V"space"^1 * V"symbol_element", symbol_element = V"symbol"-V"symbol_range" + V"symbol_range", symbol_range = V"symbol" * P".." * V"symbol", symbol = V"simple_symbol" + V"ucs_symbol", ucs_symbol = (P"") + (P""), simple_symbol = P"<" * V"identifier" * P">", collating_element = P"collating-element" * V"space"^1 * V"symbol" * V"space"^1 * P"from" * V"space"^1 * V"quoted_symbol_sequence", quoted_symbol_sequence = P'"' * V"simple_weight"^1 * P'"', --weight_assignment = V"simple_weight" + V"symbol_weight", weight_assignment = V"symbol_weight" + V"simple_weight", simple_weight = V"symbol_element" + P"UNDEFINED", symbol_weight = V"symbol_element" * V"space"^1 * V"weight_list", weight_list = V"level_token" * (V"semicolon" * V"level_token")^0, level_token = V"symbol_group" + P"IGNORE", symbol_group = V"symbol_element" + V"quoted_symbol_sequence", order_end = P"order_end", -- Define the tailoring syntax reorder_after = P"reorder-after" * V"space"^1 * V"target_symbol", target_symbol = V"symbol", order_start = P"order_start" * V"space"^1 * V"multiple_level_direction", multiple_level_direction = V"direction" * (V"semicolon" * V"direction")^0 * P",position"^-1, direction = P"forward" + P"backward", reorder_end = P"reorder-end", section_definition = V"section_definition_simple" + V"section_definition_list", section_definition_simple = P"section" * V"space"^1 * V"section_identifier", section_identifier = V"identifier", section_definition_list = P"section" * V"space"^1 * V"section_identifier" * V"space"^1 * V"symbol_list", symbol_list = V"symbol_element" * (V"semicolon" * V"symbol_element")^0, reorder_section_after = P"reorder-section-after" * V"space"^1 * V"section_identifier" * V"space"^1 * V"target_symbol", -- Define low-level tokens used by the rest of the syntax identifier = (V"letter" + V"digit") * V"id_part"^0, id_part = V"letter" + V"digit" + S"-_", line_completion = V"space"^0 * V"comment"^-1 * V"EOL", comment = V"comment_char" * V"character"^0, one_to_eight_digit_hex_string = V"hex_upper"^-8, hex_numeric_string = V"hex_upper"^1, space = S" \t", semicolon = P";", comment_char = P"%", digit = R"09", hex_upper = V"digit" + S"ABCDEF", letter = R"az" + R"AZ", EOL = P"\n", character = 1-V"EOL", } f = io.open("iso14651.txt", "r") tab = f:read("*all") f:close() --rules:print() print(rules:match(tab))