caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
From: Erik de Castro Lopo <mle+ocaml@mega-nerd.com>
To: caml-list@inria.fr
Subject: Re: [Caml-list] Handling include files using ocamllex
Date: Sun, 5 Aug 2007 14:52:38 +1000	[thread overview]
Message-ID: <20070805145238.76aed5d6.mle+ocaml@mega-nerd.com> (raw)
In-Reply-To: <1186060770.23889.39.camel@rosella.wigram>

skaller wrote:

> I recommend abandoning the idea of passing a 
> lexbuf to a parser: make a dummy lexbuf and pass that to
> keep Ocamlyacc happy, but make sure you never use it.
> 
> Instead, create an Ocaml class with a get_token method,
> and use the closure of that method over the class PLUS
> a dummy lexbuf.

I tried that with a class and ran into all sorts of problems
related to trying to use instance data in the constructor.
In the end, I ditched the class/object but kept your idea
and approached it from a more functional direction which
resulted in this (filename lexstack.ml):

------------------8<------------------8<------------------
(* The Lexstack type. *)
type 'a t =
{   mutable stack : (string * in_channel * Lexing.lexbuf) list ;
    mutable filename : string ;
    mutable chan : in_channel ;
    mutable lexbuf : Lexing.lexbuf ;
    lexfunc : Lexing.lexbuf -> 'a ;
    }

(*
** Create a lexstack with an initial top level filename and the
** lexer function.
*)
let create top_filename lexer_function =
    let chan = open_in top_filename in
    {   stack = [] ; filename = top_filename ; chan = chan ;
        lexbuf = Lexing.from_channel chan ;
        lexfunc = lexer_function
        }

(*
** The the next token. Need to accept an unused dummy lexbuf so that
** a closure consisting of the function and a lexstack can be passed
** to the ocamlyacc generated parser.
*)
let rec get_token ls dummy_lexbuf =
    match ls.lexfunc ls.lexbuf with
    |    Parser.TOK_INCLUDE fname ->
            ls.stack <- (ls.filename, ls.chan, ls.lexbuf) :: ls.stack ;
            ls.filename <- fname ;
            ls.chan <- open_in fname ;
            ls.lexbuf <- Lexing.from_channel ls.chan ;
            get_token ls dummy_lexbuf

    |    Parser.TOK_EOF ->
            (   match ls.stack with
                |    [] -> Parser.TOK_EOF
                |    (fn, ch, lb) :: tail ->
                        ls.filename <- fn ;
                        ls.chan <- ch ;
                        ls.stack <- tail ;
                        get_token ls dummy_lexbuf
                )

    |    anything -> anything


(* Get the current lexeme. *)
let lexeme ls =
    Lexing.lexeme ls.lexbuf

(* Get filename, line number and column number of current lexeme. *)
let current_pos ls =
    let pos = Lexing.lexeme_end_p ls.lexbuf in
    let linepos = pos.Lexing.pos_cnum - pos.Lexing.pos_bol -
        String.length (Lexing.lexeme ls.lexbuf)
        in
    ls.filename, pos.Lexing.pos_lnum, linepos

------------------8<------------------8<------------------

This can then be used like this:

    let lexstack = Lexstack.create filename Scanner.tokenizer in
    let dummy_lexbuf = Lexing.from_string "" in
    try
        Parser.parse (Lexstack.get_token lexstack) dummy_lexbuf
    with
        |    Scanner.Lexical_error s -> raise (E s)
        |    Parsing.Parse_error ->
                let fname, lnum, lpos = Lexstack.current_pos lexstack in
                let errstr = Printf.sprintf
                    "\n\nFile '%s' line %d,  column %d : current token is '%s'.\n"
                    fname lnum lpos (Lexstack.lexeme lexstack) in
                raise (E errstr)

I haven't tested it as thoroughly as I should have, but the
general idea seems to work.

Hopefully this will get hooked up into the Ocaml Weekly News and
then indexed by Google so other people who run into this problem
can find this solution.

Cheers,
Erik
-- 
-----------------------------------------------------------------
Erik de Castro Lopo
-----------------------------------------------------------------
"Copyrighting allows people to benefit from their labours,
but software patents allow the companies with the largest
legal departments to benefit from everyone else's work."
-- Andrew Brown
(http://www.guardian.co.uk/online/comment/story/0,12449,1387575,00.html)


  reply	other threads:[~2007-08-05  4:52 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-08-02 10:09 Erik de Castro Lopo
2007-08-02 10:29 ` [SPAM?][Caml-list] " Christoph Bauer
2007-08-02 10:42   ` [Caml-list] " Erik de Castro Lopo
2007-08-02 13:19   ` [SPAM?][Caml-list] " skaller
2007-08-05  4:52     ` Erik de Castro Lopo [this message]
2007-08-05  5:35       ` [Caml-list] " Erik de Castro Lopo
2007-08-05 10:16       ` skaller
2007-08-05 10:33         ` Erik de Castro Lopo
2007-08-05 11:55           ` Jacques GARRIGUE
2007-08-05 12:17             ` Erik de Castro Lopo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070805145238.76aed5d6.mle+ocaml@mega-nerd.com \
    --to=mle+ocaml@mega-nerd.com \
    --cc=caml-list@inria.fr \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).