--[======[ # NAME links-or-notes.lua - Pandoc filter for selectively *not* turning links into notes. # SYNOPSIS pandoc -L links-or-notes.lua ... # IMPORTANT Do *not* use `-V links-as-notes` or `-M links-as-notes` when using this filter! The filter *replaces* the functionality of that variable. # DESCRIPTION With Pandoc’s default templates for some formats you can set the variable `-V links-as-notes` to get links rendered as footnotes. However this allows for no exceptions: either *all* links are rendered as footnotes or *no* links are rendered as footnotes. Sometimes you want more granularity than that — typically you want links to some domains to not be turned into footnotes, or you want to explicitly mark some links as non-notes — and this filter allows for just that. # USAGE The filter provides two mechanisms for controlling which links are not turned into footnotes: 1. The filter contains an array table whose values are patterns for matching URLs. Any link with an URL which matches one of these patterns will not be turned into a note. There is a helper function `url_pattern` which turns an ordinary string into a suitable pattern for matching against a URL — it “protects” characters which are special in patterns so that they are matched as ordinary characters. Typically you just type something like this: ``` {.lua} local no_notes_url = { '^%s*$', -- empty target url_pattern"https://doi.org", -- anchored url_pattern('wikipedia.org', false), -- unanchored url_pattern('example.com', 'https?') -- anchored and -- matches https OR http } ``` Each row in the table above — which you will find in the filter code below the line which says “EXCEPTION URL PATTERNS” — exemplifies a different way to generate a pattern matching an URL. 1. The first row is a “raw” pattern which will match a “blank” URL, so that you don’t get a blank footnote if you type something like `[blah]()` in your Markdown. 2. The second row matches any URL starting with `https://:doi.org` so that links with such URLs are not turned into footnotes. The actual pattern will be `^https%:%/%/doi%.org` with punctuation characters escaped and the leading `^` indicating that the match shall be anchored to the start of the URL. Such a pattern is created if the `url_pattern` is called with a single string argument: any punctuation characters in the string are escaped and the start-of-string anchor prepended. 3. The third row (which is actually disabled by default) matches any URL containing the substring `wikipedia.org`. Note that in this case the `url_pattern` function takes the boolean `false` as a second argument, which means that nothing is prepended to the pattern; the only action is that any punctuation characters in the string are escaped. Note also that when there are more than one argument (and the argument is not a string) the arguments must be put in parentheses. This omission of the anchor and the protocol is necessary because Wikipedia URLs are usually prefixed with a subdomain indicating the language, so that `https://en.wikipedia.org` links to the English Wikipedia, `https://sv.wikipedia.org` links to the Swedish Wikipedia and so on, and if you want Wikipedia URLs to be exempt from “notification” you probably want this regardless of the language! 4. In the fourth row — which is a pure throwaway example! — the second argument to `url_pattern` is a string. Such a string is used as the protocol of an anchored URL, so that the pattern becomes `^https?%:%/%/example%.org`. Note that the `?` after `s` is copied unescaped, which means that the `s` is optional, so that the pattern matches both `http://` and `https://` URLs! 2. Any link with a class `.no-note` — e.g. `[link text](http://example.com){.no-note}` — will not be turned into a link even if its URL does not match any of the exception patterns. 3. Conversely any link with a class `.note` *will* be turned into a note even if its URL matches an exception pattern. # TO BRACKET OR NOT TO BRACKET There are two schools when it comes to presenting a literal URL in text: either it is enclosed in angle brackets `` or not `http://example.com`. In the filter code there is a variable `note_url_brackets`. If its value is `false` (the default) no brackets are inserted by the filter. Change the value to `true` if you want brackets inserted. # A SPECIAL CASE: `mailto:` URLs. As an exception a `mailto:` protocol is not included in the displayed text when turning a link into a footnote. # TODO - Take configuration from the document metadata. # AUTHOR Benct Philip Jonsson # COPYRIGHT AND LICENSE This software is Copyright (c) 2020 by Benct Philip Jonsson. This is free software, licensed under: The MIT (X11) License ]======] -- Set this to true to show URLs in notes as "" -- rather than as "http://example.com" local note_url_brackets = false -- This function turns a string into a pattern suitable for -- matching (a prefix on) a URL. -- * By default the pattern is anchored to the start of the string. -- Pass an explicit `false` as second argument to disable this. -- * By default the pattern is *not* anchored to the end of the string. -- Pass an explicit `true` as third argument to enable final anchoring. local function url_pattern (str, prefix, suffix) if nil == prefix then prefix = true end if 'string' == type(prefix) then prefix = '^' .. prefix .. '%:%/%/' else prefix = prefix and '^' or "" end suffix = suffix and '$' or "" local pattern = tostring(str) -- Escape punctuation characters :gsub('%p','%%%0') return prefix .. pattern .. suffix end ------------------------------------------------------------ -- EXCEPTION URL PATTERNS -- -- Edit as needed! -- -- This table is a list of prefixes/patterns for -- URLs which should not be turned into notes. -- Basically domains. local no_notes_urls = { '^%s*$', -- empty target url_pattern"https://doi.org", -- anchored -- url_pattern('wikipedia.org', false), -- unanchored -- url_pattern('example.com', 'https?') -- anchored and matches https OR http } ------------------------------------------------------------ local url_str = note_url_brackets and function (url) url = '<' .. url .. '>' return pandoc.Str(url) end or pandoc.Str function Link (link) if link.classes:includes"no-note" then return nil end local url = link.target if not link.classes:includes"note" then for _,pattern in ipairs(no_notes_urls) do if url:match(pattern) then return nil end end end url = url:gsub('^mailto%:',"") -- Already a textual URL? local str = pandoc.utils.stringify(link) if str:match( url_pattern(url,false) ) then return nil end local text = pandoc.List:new(link.content) link.content = {url_str(url)} text:extend{ pandoc.Note{pandoc.Plain(link)} } link.classes:extend('no-note') return text end