ntg-context - mailing list for ConTeXt users
 help / color / mirror / Atom feed
From: Hans Hagen via ntg-context <ntg-context@ntg.nl>
To: mailing list for ConTeXt users <ntg-context@ntg.nl>
Cc: Hans Hagen <j.hagen@freedom.nl>
Subject: Re: \autoinsertedspace fails
Date: Thu, 5 Jan 2023 00:45:07 +0100	[thread overview]
Message-ID: <db4b2f45-37ec-5030-c1f9-5f1360220165@freedom.nl> (raw)
In-Reply-To: <44990e16-0a82-ee19-abed-8b588ac42b8d@rik.users.panix.com>

[-- Attachment #1: Type: text/plain, Size: 1060 bytes --]

On 1/4/2023 11:10 PM, Rik Kabel via ntg-context wrote:
> No change with the latest (2023.01.04).
> 
> Is this a problem with what I am doing, or a bug?
well, it's new and not thtat tested ... we need to specify it

This\optionalspace fails unexpectedly with autoinsertedspace.\par
This\optionalspace \emph{fails unexpectedly} with autoinsertedspace.\par

there can be more variants, like do we want to remove preceding spaces?

we already have:

This\optionalspace, fails unexpectedly with autoinsertedspace.\par
This\optionalspace, \emph{fails unexpectedly} with autoinsertedspace.\par
This\optionalspace fails unexpectedly with autoinsertedspace.\par
This\optionalspace \emph{fails unexpectedly} with autoinsertedspace.\par

-----------------------------------------------------------------
                                           Hans Hagen | PRAGMA ADE
               Ridderstraat 27 | 8061 GH Hasselt | The Netherlands
        tel: 038 477 53 69 | www.pragma-ade.nl | www.pragma-pod.nl
-----------------------------------------------------------------

[-- Attachment #2: spac-chr.lmt --]
[-- Type: text/plain, Size: 12615 bytes --]

if not modules then modules = { } end modules ['spac-chr'] = {
    version   = 1.001,
    optimize  = true,
    comment   = "companion to spac-chr.mkiv",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files"
}

local byte, lower = string.byte, string.lower

-- beware: attribute copying is bugged ... there will be a proper luatex helper
-- for this

-- to be redone: characters will become tagged spaces instead as then we keep track of
-- spaceskip etc

-- todo: only setattr when export / use properties

local next = next

local trace_characters = false  trackers.register("typesetters.characters", function(v) trace_characters = v end)
local trace_nbsp       = false  trackers.register("typesetters.nbsp",       function(v) trace_nbsp       = v end)

local report_characters = logs.reporter("typesetting","characters")

local nodes, node = nodes, node

local nuts               = nodes.nuts

local getid              = nuts.getid
local getsubtype         = nuts.getsubtype
local setsubtype         = nuts.setsubtype
local getboth            = nuts.getboth
local getnext            = nuts.getnext
local getprev            = nuts.getprev
local getattr            = nuts.getattr
local setattr            = nuts.setattr
local getlanguage        = nuts.getlanguage
local setchar            = nuts.setchar
local setattrlist        = nuts.setattrlist
local getfont            = nuts.getfont
local isglyph            = nuts.isglyph

local setcolor           = nodes.tracers.colors.set

local insertnodebefore   = nuts.insertbefore
local insertnodeafter    = nuts.insertafter
local remove_node        = nuts.remove
local nextchar           = nuts.traversers.char
local nextglyph          = nuts.traversers.glyph

local copy_node          = nuts.copy

local nodepool           = nuts.pool
local new_penalty        = nodepool.penalty
local new_glue           = nodepool.glue
local new_kern           = nodepool.kern
local new_rule           = nodepool.rule

local nodecodes          = nodes.nodecodes
local gluecodes          = nodes.gluecodes

local glyph_code         = nodecodes.glyph
local glue_code          = nodecodes.glue
local spaceskip_code     = gluecodes.spaceskip

local chardata           = characters.data
local ispunctuation      = characters.is_punctuation
local canhavespace       = characters.can_have_space

local typesetters        = typesetters

local unicodeblocks      = characters.blocks

local characters         = typesetters.characters or { } -- can be predefined
typesetters.characters   = characters

local fonthashes         = fonts.hashes
local fontparameters     = fonthashes.parameters
local fontcharacters     = fonthashes.characters
local fontquads          = fonthashes.quads

local setmetatableindex  = table.setmetatableindex

local a_character        = attributes.private("characters")
local a_alignstate       = attributes.private("alignstate")

local c_zero   = byte('0')
local c_period = byte('.')

local function inject_quad_space(unicode,head,current,fraction)
    if fraction ~= 0 then
        fraction = fraction * fontquads[getfont(current)]
    end
    local glue = new_glue(fraction)
    setattrlist(glue,current)
    setattrlist(current) -- why reset all
    setattr(glue,a_character,unicode)
    return insertnodeafter(head,current,glue)
end

local function inject_char_space(unicode,head,current,parent)
    local font = getfont(current)
    local char = fontcharacters[font][parent]
    local glue = new_glue(char and char.width or fontparameters[font].space)
    setattrlist(glue,current)
    setattrlist(current) -- why reset all
    setattr(glue,a_character,unicode)
    return insertnodeafter(head,current,glue)
end

local function inject_nobreak_space(unicode,head,current,space,spacestretch,spaceshrink)
    local glue    = new_glue(space,spacestretch,spaceshrink)
    local penalty = new_penalty(10000)
    setattrlist(glue,current)
    setattrlist(penalty,current)
    setattrlist(current) -- why reset all
    setattr(glue,a_character,unicode) -- bombs
    head, current = insertnodeafter(head,current,penalty)
    if trace_nbsp then
        local rule    = new_rule(space)
        local kern    = new_kern(-space)
        local penalty = new_penalty(10000)
        setcolor(rule,"orange")
        head, current = insertnodeafter(head,current,rule)
        head, current = insertnodeafter(head,current,kern)
        head, current = insertnodeafter(head,current,penalty)
    end
    return insertnodeafter(head,current,glue)
end

local function nbsp(head,current)
    local para = fontparameters[getfont(current)]
    local attr = getattr(current,a_alignstate) or 0
    if attr >= 1 and attr <= 3 then -- flushright
        head, current = inject_nobreak_space(0x00A0,head,current,para.space,0,0)
    else
        head, current = inject_nobreak_space(0x00A0,head,current,para.space,para.spacestretch,para.spaceshrink)
    end
    setsubtype(current,spaceskip_code)
    return head, current
end

-- assumes nuts or nodes, depending on callers .. so no tonuts here

function characters.replacenbsp(head,original)
    local head, current = nbsp(head,original)
    return remove_node(head,original,true)
end

function characters.replacenbspaces(head)
    -- todo: wiping as in characters.handler(head)
    local wipe = false
    for current, char, font in nextglyph, head do -- can be anytime so no traversechar
        if char == 0x00A0 then
            if wipe then
                head = remove_node(h,current,true)
                wipe = false
            end
            local h = nbsp(head,current)
            if h then
                wipe = current
            end
        end
    end
    if wipe then
        head = remove_node(head,current,true)
    end
    return head
end

-- This initialization might move someplace else if we need more of it. The problem is that
-- this module depends on fonts so we have an order problem.

local nbsphash = { } setmetatableindex(nbsphash,function(t,k)
    -- this needs checking !
    for i=unicodeblocks.devanagari.first,unicodeblocks.devanagari.last do nbsphash[i] = true end
    for i=unicodeblocks.kannada   .first,unicodeblocks.kannada   .last do nbsphash[i] = true end
    setmetatableindex(nbsphash,nil)
    return nbsphash[k]
end)

local methods = {

    -- The next one uses an attribute assigned to the character but still we
    -- don't have the 'local' value.

    -- maybe also 0x0008 : backspace

    -- Watch out: a return value means "remove"!

    [0x001E] = function(head,current) -- kind of special
        local next = getnext(current)
        if next and getid(next) == glue_code and getsubtype(next) == spaceskip_code then
            local nextnext = getnext(next)
            if nextnext then
                local char, font = isglyph(nextnext)
                if char and not canhavespace[char] then
                    remove_node(head,next,true)
                end
            end

        else
            local char, font = isglyph(next)
            if char and canhavespace[char] then
                local p = fontparameters[font]
                head, current = insertnodebefore(head,current,new_glue(p.space,p.spacestretch,p.spaceshrink))
            end

        end
        return head, current
    end,

    [0x001F] = function(head,current) -- kind of special
        local next = getnext(current)
        if next then
            local char, font = isglyph(next)
            if char and not ispunctuation[char] then
                local p = fontparameters[font]
                head, current = insertnodebefore(head,current,new_glue(p.space,p.spacestretch,p.spaceshrink))
            end
        end
        return head, current
    end,

    [0x00A0] = function(head,current) -- nbsp
        local prev, next = getboth(current)
        if next then
            local char = isglyph(current)
            if not char then
                -- move on
            elseif char == 0x200C or char == 0x200D then -- nzwj zwj
                next = getnext(next)
				if next then
                    char = isglyph(next)
                    if char and nbsphash[char] then
                        return false
                    end
                end
            elseif nbsphash[char] then
                return false
            end
        end
        if prev then
            local char = isglyph(prev)
            if char and nbsphash[char] then
                return false
            end
        end
        return nbsp(head,current)
    end,

    [0x00AD] = function(head,current) -- softhyphen
        return insertnodeafter(head,current,languages.explicithyphen(current))
    end,

    [0x2000] = function(head,current) -- enquad
        return inject_quad_space(0x2000,head,current,1/2)
    end,

    [0x2001] = function(head,current) -- emquad
        return inject_quad_space(0x2001,head,current,1)
    end,

    [0x2002] = function(head,current) -- enspace
        return inject_quad_space(0x2002,head,current,1/2)
    end,

    [0x2003] = function(head,current) -- emspace
        return inject_quad_space(0x2003,head,current,1)
    end,

    [0x2004] = function(head,current) -- threeperemspace
        return inject_quad_space(0x2004,head,current,1/3)
    end,

    [0x2005] = function(head,current) -- fourperemspace
        return inject_quad_space(0x2005,head,current,1/4)
    end,

    [0x2006] = function(head,current) -- sixperemspace
        return inject_quad_space(0x2006,head,current,1/6)
    end,

    [0x2007] = function(head,current) -- figurespace
        return inject_char_space(0x2007,head,current,c_zero)
    end,

    [0x2008] = function(head,current) -- punctuationspace
        return inject_char_space(0x2008,head,current,c_period)
    end,

    [0x2009] = function(head,current) -- breakablethinspace
        return inject_quad_space(0x2009,head,current,1/8) -- same as next
    end,

    [0x200A] = function(head,current) -- hairspace
        return inject_quad_space(0x200A,head,current,1/8) -- same as previous (todo)
    end,

    [0x200B] = function(head,current) -- zerowidthspace
        return inject_quad_space(0x200B,head,current,0)
    end,

    [0x202F] = function(head,current) -- narrownobreakspace
        return inject_nobreak_space(0x202F,head,current,fontquads[getfont(current)]/8)
    end,

    [0x205F] = function(head,current) -- math thinspace
        return inject_nobreak_space(0x205F,head,current,4*fontquads[getfont(current)]/18)
    end,

    -- The next one is also a bom so maybe only when we have glyphs around it

 -- [0xFEFF] = function(head,current) -- zerowidthnobreakspace
 --     return head, current
 -- end,

}

characters.methods = methods

function characters.handler(head)
    local wipe = false
    for current, char in nextchar, head do
        local method = methods[char]
        if method then
            if wipe then
                head = remove_node(head,wipe,true)
                wipe = false
            end
            if trace_characters then
                report_characters("replacing character %C, description %a",char,lower(chardata[char].description))
            end
            local h = method(head,current)
            if h then
                wipe = current
            end
        end
    end
    if wipe then
        head = remove_node(head,wipe,true)
    end
    return head
end

-- function characters.handler(head)
--     local wiped = false
--     for current, char in nextchar, head do
--         local method = methods[char]
--         if method then
--             if wiped then
--                 wiped[#wiped+1] = current
--             else
--                 wiped = { current }
--             end
--             if trace_characters then
--                 report_characters("replacing character %C, description %a",char,lower(chardata[char].description))
--             end
--             local h = method(head,current)
--             if h then
--                 head = h
--             end
--         end
--     end
--     if wiped then
--         for i=1,#wiped do
--             head = remove_node(head,wiped[i],true)
--         end
--     end
--     return head
-- end

[-- Attachment #3: Type: text/plain, Size: 496 bytes --]

___________________________________________________________________________________
If your question is of interest to others as well, please add an entry to the Wiki!

maillist : ntg-context@ntg.nl / https://www.ntg.nl/mailman/listinfo/ntg-context
webpage  : https://www.pragma-ade.nl / http://context.aanhet.net
archive  : https://bitbucket.org/phg/context-mirror/commits/
wiki     : https://contextgarden.net
___________________________________________________________________________________

  reply	other threads:[~2023-01-04 23:45 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-30  0:12 Rik Kabel via ntg-context
2023-01-04 22:10 ` Rik Kabel via ntg-context
2023-01-04 23:45   ` Hans Hagen via ntg-context [this message]
2023-01-05  3:21     ` Rik Kabel via ntg-context
2023-01-05  7:38       ` Hans Hagen via ntg-context
2023-01-05  9:35       ` Hans Hagen via ntg-context
2023-01-06  2:05         ` Rik Kabel via ntg-context
2023-01-06  7:53           ` Hans Hagen via ntg-context

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=db4b2f45-37ec-5030-c1f9-5f1360220165@freedom.nl \
    --to=ntg-context@ntg.nl \
    --cc=j.hagen@freedom.nl \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).