(* $Header: /net/pauillac/caml/repository/bigbro/pcreg.ml,v 1.1 2001/07/16 15:04:04 fpottier Exp $ *) open Pcaml #load "pa_extend.cmo";; #load "q_MLast.cmo";; (* ----------------------------------------------------------------------------------------------------------------- *) (* We begin with an internal utility: a global variable generator, which can be called within grammar rules. The global variables receive names numbered in a linear fashion. There is a possibility of name clashes if another module, which uses the same name generator, is ``opened'' and that module does not have a [.mli] file. It is recommended to always use [.mli] files to describe module interfaces, so these internal variable names will not be exported. *) (* This global variable is used to accumulate global variable declarations while the parser is running. *) let globals = ref [] (* This function allows registering a new global declaration. It can be called within a grammar rule. *) let declare (item : MLast.str_item) = globals := (item, (0, 0) (* dummy location *)) :: !globals (* This function is used to generate a fresh identifier. *) let generate = let count = ref 0 in fun () -> incr count; Printf.sprintf "_regexp_%d" !count (* This hook, which is called once per implementation file, adds the global declarations generated by calls to [declare] at the beginning of the module. *) let _ = EXTEND implem: FIRST [[ (sil, stopped) = NEXT -> let extra = !globals in globals := []; (extra @ sil, stopped) ]]; END (* ----------------------------------------------------------------------------------------------------------------- *) (* This auxiliary function allows generating code for assertions. [assert] is dealt with as a kind of special-purpose syntax extension in O'Caml. However, code in quotations must be expressed in plain (righteous) syntax, which means that it cannot use [assert] directly. Hence, we must use this code (taken from [camlp4]'s [pa_o.ml]) to generate assertions. Note that the generated code depends on the value of [camlp4]'s [-noassert] option. This option is distinct from [ocaml]'s own [-noassert] option. *) let make_assert loc e = let f = <:expr< $str:!Pcaml.input_file$ >> in let bp = <:expr< $int:string_of_int (fst loc)$ >> in let ep = <:expr< $int:string_of_int (snd loc)$ >> in let raiser = <:expr< raise (Assert_failure ($f$, $bp$, $ep$)) >> in if !Pcaml.no_assert then <:expr< () >> else <:expr< if $e$ then () else $raiser$ >> (* ----------------------------------------------------------------------------------------------------------------- *) (* We continue with syntactic extensions which allow dealing with regular expressions easily. The syntax extract s0, s1, ..., sk matching e against r in e' evaluates the expression [e], matches its value against the regular expression [r] using [Pcre.exec], and binds the substrings thus obtained to the patterns [s0], [s1], ..., [sk]. (Each [si] must be either a variable or the wildcard pattern [_].) [Pcre.exec] raises [Not_found] if it doesn't match. The code also contains a dynamic check (using [assert]) which ensures that the number of extracted substrings, namely $k+1$, is consistent with the supplied regular expression. Lastly, the expression [r] must be either a string constant, or a compiled regular expression. If the former, the string is pre-compiled (using a global declaration) into a regular expression. *) let _ = EXTEND GLOBAL: expr; expr: LEVEL "expr1" [[ (p, e, r, l) = [ "extract"; p = LIST1 simplepat SEP ","; "matching"; e = expr; "against"; r = expr -> (p, e, r, loc) ]; (* anonymous sub-rule allows extracting partial location [l] *) "in"; body = expr LEVEL "top" -> (* If the regular expression is a string constant, generate pre-compilation code for it. *) let r = match r with | <:expr< $str:s$ >> -> let name = generate() in declare <:str_item< value $lid:name$ = Pcre.regexp $str:s$ >>; <:expr< $lid:name$ >> | _ -> r in (* Wrap bindings for the substrings around the declaration's body. *) let body, _ = List.fold_left (fun (body, index) name -> begin match name with | Some name -> <:expr< let $lid:name$ = Pcre.get_substring _substrings $int:(string_of_int index)$ in $body$ >> | None -> body end, index + 1 ) (body, 0) p in (* Wrap a dynamic check around the code thus obtained, to ensure that the number of substrings extracted out of the pattern is correct. *) let condition = <:expr< Pcre.num_of_subs _substrings = $int:(string_of_int (List.length p))$ >> in let assertion = make_assert l condition in let body = <:expr< do { $assertion$; $body$ } >> in (* Wrap the actual pattern matching instruction around the code thus obtained. *) <:expr< let _substrings = Pcre.exec ~rex:$r$ $e$ in $body$ >> ]] ; simplepat: [[ x = LIDENT -> Some x | "_" -> None ]] ; END