Re: [PATCH] Add begin_of_buffer and end_of_buffer hooks for lua pretty printers

From: Matthijs Kooijman <matthijs@stdin.nl>
To: mailing list for ConTeXt users <ntg-context@ntg.nl>
Subject: Re: [PATCH] Add begin_of_buffer and end_of_buffer	hooks for lua pretty printers
Date: Mon, 15 Jun 2009 11:00:11 +0200	[thread overview]
Message-ID: <20090615090010.GN1611@katherina.student.utwente.nl> (raw)
In-Reply-To: <4A34F5B6.1020704@wxs.nl>


[-- Attachment #1.1.1: Type: text/plain, Size: 1179 bytes --]

Hi Hans,

> (no patch nor example of usage attached)
*sigh*, attaching files is always hard :-)

Anyway, I've attached it now, together with an exapmle prettyprinter that uses
it.

I've also slightly changed the patch since my last message. Now,
begin_of_buffer and end_of_buffer are always called, even for single line
\type's. I found that I had to do all kinds of magic to make things work for
\type as well, so this seems cleaner. To be able to differentiate, the type of
buffer ('single', 'buffer' or 'file') and the name of the buffer/file gets
passed to begin/end_of_buffer, though my example prettyprinter does not use
these yet.

Before, \type directly called hooks.flush_line. I've added buffers.typesingle
to be called instead, which calls begin_of_line, flush_line and end_of_line.
To prevent confusing with \type, I've renamed buffers.type to
buffers.typebuffer.

Finally, I've added some comments to the hooks.* functions, to document when
they are called.


Is there any place where lua pretty printers are documented? The wiki seems to
say only "wait for lua", but not how to use them. If there isn't, I'll try to
put something useful on the wiki.

Gr.

Matthijs

[-- Attachment #1.1.2: begin-end-buffer --]
[-- Type: text/plain, Size: 5862 bytes --]

Index: base/buff-ini.lua
===================================================================

--- base.orig/buff-ini.lua	2009-06-15 10:23:58.000000000 +0200
+++ base/buff-ini.lua	2009-06-15 10:44:03.000000000 +0200
@@ -120,7 +120,15 @@
     return first, last, last - first + 1
 end
 
-function buffers.type(name)
+-- Types a single line of text (used by \type)
+function buffers.typesingle(text)
+    hooks.begin_of_buffer('single')
+    hooks.flush_line(text)
+    hooks.end_of_buffer('single')
+end
+
+-- Types text from the named buffer (used by \typebuffer and \starttyping)
+function buffers.typebuffer(name)
     local lines = data[name]
     local action = buffers.typeline
     if lines then
@@ -129,9 +137,11 @@
         end
         local line, n = 0, 0
         local first, last, m = buffers.strip(lines)
+        hooks.begin_of_buffer('buffer', name)
         for i=first,last do
             n, line = action(lines[i], n, m, line)
         end
+        hooks.end_of_buffer('buffer', name)
     end
 end
 
@@ -145,15 +155,18 @@
     return str or ""
 end
 
+-- Types text from the named file (used by \typefile)
 function buffers.typefile(name) -- still somewhat messy, since name can be be suffixless
     local str = buffers.loaddata(name)
     if str and str~= "" then
         local lines = str:splitlines()
         local line, n, action = 0, 0, buffers.typeline
         local first, last, m = buffers.strip(lines)
+        hooks.begin_of_buffer('file', name)
         for i=first,last do
             n, line = action(lines[i], n, m, line)
         end
+        hooks.end_of_buffer('file', name)
     end
 end
 
@@ -301,23 +314,43 @@
 
 -- calling routines, don't change
 
+-- Called at the start of every piece of text that is prettyprinted. Type can
+-- be: 'single', when only a single line is prettyprinted (e.g. \type),
+-- 'buffer', when a buffer is prettyprinted (e.g. \typebuffer) or 'file', when
+-- a file is prettyprinted (e.g. \typefile).
+-- name is the name of the buffer or file, and empty for 'single'. The special
+-- buffer name '_typing_' is used for \starttyping.
+function hooks.begin_of_buffer(type, name)
+    (visualizers[buffers.currentvisualizer].begin_of_buffer or default.begin_of_buffer)(type, name)
+end
+
+-- Called at the end of every piece of text that is prettyprinted.
+function hooks.end_of_buffer(type, name)
+    (visualizers[buffers.currentvisualizer].end_of_buffer or default.end_of_buffer)(type, name)
+end
+
+-- Called for every non-empty line
 function hooks.flush_line(str,nesting)
     str = gsub(str," *[\n\r]+ *"," ") ; -- semi colon needed
     (visualizers[buffers.currentvisualizer].flush_line or default.flush_line)(str,nesting)
 end
 
+-- Called at the start of every non-empty line (before flush_line)
 function hooks.begin_of_line(n)
     (visualizers[buffers.currentvisualizer].begin_of_line or default.begin_of_line)(n)
 end
 
+-- Called at the end of every non-empty line (after flush_line)
 function hooks.end_of_line()
     (visualizers[buffers.currentvisualizer].end_of_line or default.end_of_line)()
 end
 
+-- Called for every empty line
 function hooks.empty_line()
     (visualizers[buffers.currentvisualizer].empty_line or default.empty_line)()
 end
 
+-- Called for every non-empty line. The result is passed to flush_line.
 function hooks.line(str)
     if visualizers.enabletab then
         str = string.tabtospace(str,visualizers.tablength)
@@ -329,6 +362,12 @@
 
 -- defaults
 
+function default.begin_of_buffer(type, name)
+end
+
+function default.end_of_buffer(type, name)
+end
+
 function default.begin_of_line(n)
     texsprint(ctxcatcodes, commands.begin_of_line_command,"{",n,"}")
 end
Index: base/buff-ini.mkiv
===================================================================
--- base.orig/buff-ini.mkiv	2009-06-15 10:26:17.000000000 +0200
+++ base/buff-ini.mkiv	2009-06-15 10:30:03.000000000 +0200
@@ -161,13 +161,13 @@
 
 \def\doprocessbufferverbatim
   {\doinitializeverbatim
-   \ctxlua{buffers.type("\currentbuffer")}}
+   \ctxlua{buffers.typebuffer("\currentbuffer")}}
 
 \def\doprocessbufferlinesverbatim#1#2#3%
   {#2%
    % todo, set up numbers
    \doinitializeverbatim
-   \ctxlua{buffers.type("\currentbuffer")}
+   \ctxlua{buffers.typebuffer("\currentbuffer")}
    #3}
 
 \def\doifelsebuffer#1%
Index: base/buff-ver.mkiv
===================================================================
--- base.orig/buff-ver.mkiv	2009-06-15 10:24:38.000000000 +0200
+++ base/buff-ver.mkiv	2009-06-15 10:31:34.000000000 +0200
@@ -292,7 +292,7 @@
 \def\dodotypeAA#1%
   {\doinitializeverbatim
    \def\obs{\obeyedspace}%
-   \ctxlua{buffers.hooks.flush_line(\!!bs\detokenize{#1}\!!es)}%
+   \ctxlua{buffers.typesingle(\!!bs\detokenize{#1}\!!es)}%
    \egroup}
 
 \def\dodotypeB#1%
@@ -316,7 +316,7 @@
 \def\dodotypeCC#1%
   {\doinitializeverbatim
    \ifx\obeycharacters\setupprettytype % temp hack, we need a proper signal
-     \ctxlua{buffers.hooks.flush_line([\!!bs\detokenize{#1}\!!es,true)}%
+     \ctxlua{buffers.typesingle([\!!bs\detokenize{#1}\!!es,true)}%
    \else
      \def\obs{\obeyedspace}%
      \ctxlua{buffers.visualizers.flush_nested(\!!bs\detokenize{#1}\!!es,true)}%
@@ -333,7 +333,7 @@
 
 \def\dodotypeDD#1%
   {\doinitializeverbatim
-   \ctxlua{buffers.hooks.flush_line(\!!bs\detokenize{#1}\!!es,true)}%
+   \ctxlua{buffers.typesingle(\!!bs\detokenize{#1}\!!es,true)}%
    \egroup
    \gobbleoneargument} % grab last >
 
@@ -573,7 +573,7 @@
       {}
       {\doinitializeverbatim
        \beginofverbatimlines
-       \ctxlua{buffers.type("_typing_")}%
+       \ctxlua{buffers.typebuffer("_typing_")}%
        \endofverbatimlines
        \getvalue{\strippedcsname#2}}}
 

[-- Attachment #1.1.3: pret-lam.lua --]
[-- Type: text/plain, Size: 5120 bytes --]

-- filename : type-lam.lua
-- comment  : Pretty printing of (extended) lambda calculus
-- author   : Matthijs Kooijman, Universiteit Twente, NL
-- copyright: Matthijs Kooijman
-- license  : None

local utf = unicode.utf8

if not buffers                 then buffers                 = { } end
if not buffers.visualizers     then buffers.visualizers     = { } end
if not buffers.visualizers.lam then buffers.visualizers.lam = { } end

buffers.visualizers.lam.colors = {
    "prettytwo",
    "prettyone",
    "prettythree",
    "prettyfour"
}

-- Symbols that should have a different representation
buffers.visualizers.lam.symbols = {
    [' '] = {repr = '\\obs '},
    ['_'] = {repr = '\\_'},
    ['->'] = {repr = '\\rightarrow'},
    -- The default * sits very high above the baseline, \ast (u+2217) looks
    -- better.
    ['*'] = {repr = '\\ast'},
}


-- Keywords that should be bold
buffers.visualizers.lam.keywords = {
    ['case'] = {},
    ['of'] = {},
    ['let'] = {},
    ['in'] = {},
}

-- See if str starts with a symbol, and return the remaining string and that
-- symbol. If no symbol from the table is matched, just returns the first
-- character.  We can do a lookup directly, since symbols can be different in
-- length, so we just loop over all symbols, trying them in turn.
function buffers.visualizers.lam.take_symbol(str)
    for symbol,props in pairs(buffers.visualizers.lam.symbols) do
        -- Try to remove symbol from the start of str 
        symbol, newstr = utf.match(str, "^(" .. symbol .. ")(.*)")
        if symbol then
            -- Return this tokens repr, or just the token if it has no
            -- repr.
            res = props.repr or symbol
            -- Enclose the token in {\style .. }
            if props.style then
                res = "{\\" .. props.style ..  " " .. res ..  "}"
            end
            return res, newstr
        end
    end
    -- No symbol found, just return the first character
    return utf.match(str, "^(.)(.*)")
end

-- Take a single word from str, if posible. Returns the rest of the string and
-- the word taken.
function buffers.visualizers.lam.take_word(str)
        res, newstr = utf.match(str, "^(%a[%a%d_]+)(.*)")
        return res, newstr or str
end

-- Tries to match each of the patterns and returns the captures of the first
-- matching pattern (up to 5 captures are supported). Returns nil when nothing
-- matches.
function buffers.visualizers.lam.match_mul(str, patterns)
    for i, pat in ipairs(patterns) do
        a, b, c, d, e = utf.match(str, pat)
        if a then
            return a, b, c, d, e
        end
    end
    return nil
end

-- Find any subscripts in the given word and typeset them
function buffers.visualizers.lam.do_subscripts(word)
    local match_mul = buffers.visualizers.lam.match_mul
    base, sub = match_mul(res, submatches)
    if sub then
        word = base .. "\\low{" .. sub .. "}"
        -- After a word has been used as a base, allow subscripts
        -- without _, even for non-numbers.
        if not bases[base] then
            -- Register that we've added this base
            bases[base] = true
            -- Add a pattern for this base
            submatches[#submatches+1] = "^(" .. base .. ")([%a%d,]+)$"
        end
    end
    return word
end

function buffers.visualizers.lam.begin_of_buffer(type, name)
    -- Initially allow subscripts using _ or just appending a number (later,
    -- we will add extra patterns here.
    submatches = {"^(.*)_([%a%d,]+)$", "^(.*[^%d])(%d+)$"}
    -- This stores all the bases we've encountered so far (to prevent
    -- duplicates). For each of them there will be a pattern in submatches
    -- above.
    bases = {}
end

function buffers.visualizers.lam.flush_line(str,nested)
    local result, state = { }, 0
    local finish, change = buffers.finish_state, buffers.change_state
    local take_symbol = buffers.visualizers.lam.take_symbol
    local take_word = buffers.visualizers.lam.take_word
    local do_subscripts = buffers.visualizers.lam.do_subscripts
    -- Set the colorscheme, which is used by finish_state and change_state
    buffers.currentcolors = buffers.visualizers.lam.colors
    while str ~= "" do
        local found = false
        local word, symbol
        -- See if the next token is a word
        word, str = take_word(str)
        if word then
            if buffers.visualizers.lam.keywords[res] then
                -- Make all keywords bold
                word = "{\\bold " .. word ..  "}"
            else
                -- Process any subscripts in the word
                word = do_subscripts(word)
            end
        else
            -- The next token is not a word, it must be a symbol
            symbol, str = take_symbol(str)
        end

        -- Append the resulting token
        result[#result+1] = word or symbol
    end

    state = finish(state, result)
    buffers.flush_result(result,nested)
end

-- vim: set sw=4 sts=4 expandtab ai:

[-- Attachment #1.2: Digital signature --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 487 bytes --]

___________________________________________________________________________________
If your question is of interest to others as well, please add an entry to the Wiki!

maillist : ntg-context@ntg.nl / http://www.ntg.nl/mailman/listinfo/ntg-context
webpage  : http://www.pragma-ade.nl / http://tex.aanhet.net
archive  : https://foundry.supelec.fr/projects/contextrev/
wiki     : http://contextgarden.net
___________________________________________________________________________________