Hi again.

I have a minimal (?) lexer (attached) working with the grammar below.
For the purpose of this excercise I used ulex. I started with the cduce
lexer and removed all cduce-specific functions. However I'm not enterely
happy.

First I'd like to have another example using ocamllex and not ulex (one
less dependecy), but I guess this is not too hard to do.

Second, I've copy-pasted some code in the lexer to instanciate the
camlp4 modules, but I'm not sure what is required and what is not. I
mean, I can look at the camlp4 modules sigs, but without documentation
there are a lot of functions that I don't really understand. Can anybody
explain the signature of the Loc, Token and Error modules ? 
How these function used within the camlp4 parsing machinery ?
- Token.match_keyword
- Token.extract_string
- Token.Filter.mk
- Token.Filter.filter
- Token.Filter.define_filter
- Token.Filter.keyword_added
- Token.Filter.keyword_removed

Third, I'm not sure if this is the real minimal example I was looking
for. I've the impression I could reuse the Camlp4.PreCast.Loc module,
but I'm not sure if I can reuse the Camlp4.PreCast.Token since it is
linked with the token type definition. I don't think I can reuse/extend 
the caml_token type... Making the lexer extensible would be a great !

Hope this helps.

comments ?

pietro


This is the _tags file to compile it:
---------- _tags -------
"parser.ml": use_camlp4, pp(camlp4of)
"ulexer.ml": pkg_ulex, use_camlp4, syntax_camlp4o
"ulexer.mli": use_camlp4, pkg_ulex
-----------

+ nicolas' universal myocamlbuil.ml

-------------------- parser.ml -----------------------

type t = 
      Seq of t * t
    | Alt of t * t
    | Opt of t 
    | Star of t 
    | Plus of t 
    | Dot 
    | Sym of char

open Ulexer

module RegExGram = Camlp4.Struct.Grammar.Static.Make(Ulexer)

let regex = RegExGram.Entry.mk "regex"

(* I guess I don't need to use KWD *) 

EXTEND RegExGram
  GLOBAL: regex;

  regex: [[ e1 = SELF ; `KWD "|" ; e2 = concat -> Alt(e1,e2)
          | e1 = concat -> e1 ]
  ];

  concat:[[ e1 = SELF ; `KWD ";"; e2 = seq -> Seq(e1,e2)
          | e1 = SELF ; e2 = seq -> Seq(e1,e2)
          | e1 = seq -> e1 ]
  ];

  seq:   [[ e1 = simple ; `KWD "?" -> Opt e1
          | e1 = simple ; `KWD "*" -> Star e1
          | e1 = simple ; `KWD "+" -> Plus e1
          | e1 = simple -> e1 ]
  ];

  simple:[[ `KWD "." -> Dot
          | `KWD "("; e1 = regex; `KWD ")" -> e1
          | `CHAR(s) -> Sym s ]
  ];


END

let from_string s = RegExGram.parse_string regex (Loc.mk "<string>") s

------------------------------------------------------