#!/usr/bin/env perl use 5.010001; use utf8; # use utf8::all; use strict; use warnings; use warnings FATAL => 'utf8'; use autodie; use open qw[ :utf8 :std ]; use Getopt::Long qw[GetOptions :config bundling no_auto_abbrev no_ignore_case]; use Pod::Usage qw[pod2usage]; use Text::Balanced qw[extract_multiple]; my %opt = ( attributes => '.hl', check_word_chars => 1, check_whitespace => 1, backslash_escapes => 1, backticks_code => 1, tilde_code_blocks => 1, tex_math_dollars => 1, tex_math_double_backslash => 0, tex_math_single_backslash => 0, ); my @opts = grep { /_/ } keys %opt; sub all { $opt{$_} = 1 for @opts; } sub none { $opt{$_} = 0 for @opts; } sub neg_opt { my($name) = @_; $name =~ s/^no_//; $opt{$name} = 0; } GetOptions( \%opt, 'attributes|a=s', 'check_whitespace|check-whitespace|s', 'no_check_whitespace|no-check-whitespace|S' => \&neg_opt, 'check_word_chars|check-word-chars|w', 'no_check_word_chars|no-check-word-chars|W' => \&neg_opt, 'backslash_escapes|backslash-escapes|b', 'no_backslash_escapes|no-backslash-escapes|B' => \&neg_opt, 'backticks_code|backticks-code|c', 'no_backticks_code|no-backticks-code|C' => \&neg_opt, 'tilde_code_blocks|tilde-code-blocks|t', 'no_tilde_code_blocks|no-tilde-code-blocks|T' => \&neg_opt, 'tex_math_dollars|tex-math-dollars|d', 'no_tex_math_dollars|no-tex-math-dollars|D' => \&neg_opt, 'tex_math_double_backslash|tex-math-double-backslash|db', 'no_tex_math_double_backslash|no-tex-math-double-backslash|DB' => \&neg_opt, 'tex_math_single_backslash|tex-math-single-backslash|sb', 'no_tex_math_single_backslash|no-tex-math-single-backslash|SB' => \&neg_opt, 'none|n' => \&none, 'all|N|A' => \&all, 'help|h' => sub { pod2usage(1) }, 'man|m' => sub { pod2usage( -verbose => 2) }, ); my $span_start = '['; my $span_stop = "]{$opt{attributes}}"; my @extractors; if ( $opt{tex_math_double_backslash} ) { push @extractors, ( qr{ \\\\ \( .+? \\\\ \) }msx, qr{ \\\\ \[ .+? \\\\ \] }msx, ); } if ( $opt{tex_math_single_backslash} ) { push @extractors, ( qr{ \\ \( .+? \\ \) }msx, qr{ \\ \[ .+? \\ \] }msx, ); } push @extractors, qr{ \\. }msx if $opt{backslash_escapes}; push @extractors, qr[ ( ( \~{3,} ) .+? \g{-1} ) ]msx if $opt{tilde_code_blocks}; push @extractors, qr[ ( ( \`+ ) .+? \g{-1} ) ]msx if $opt{backticks_code}; if ( $opt{tex_math_dollars} ) { push @extractors, ( qr{ \$\$ (?: [^\n] | (? qr/$highlight/msx }; } # Slurp stdin my $text = do { local $/; <>; }; # Process the text my @chunks = extract_multiple $text, \@extractors; for my $chunk ( @chunks ) { if ( ref $chunk ) { $chunk = $span_start . $$chunk . $span_stop; } } print join "", @chunks; __END__ =encoding UTF-8 =head1 NAME highlight-eq2span.pl -- Replace Obsidian higlight runs with Pandoc spans =head1 VERSION This documentation describes version 0.001 of highlight-eq2span.pl =head1 SYNOPSIS perl highlight-eq2span.pl [OPTIONS] output.md =head1 DESCRIPTION highlight-eq2span.pl replaces C<==HIGHLIGHTED==> as understood by Obsidian with Pandoc spans like C<[HIGHLIGHTED]{.hl}>. This script is a regex-based text filter, with far simpler parsing capabilities than Pandoc. However it by default tries to leave B<==> sequences which are unlikely to be highlighting markup alone. There are some command line options to control this. =head1 OPTIONS =over =item -a, --attributes STR Use STR as attributes for Pandoc spans. Default value: C<.hl> =item -s, --check-whitespace Assume that opening C<==> delimiters are not followed by whitespace, and that closing C<==> delimiters are not preceded by whitespace. Default value: true =item -S --no-check-whitespace Set the -s option just above to false. =item -w, --check-word-chars Assume that opening C<==> delimiters are not preceded by word-chars, and that closing C<==> delimiters are not followed by word-chars. Default value: true =item -W --no-check-word-chars Set the -w option just above to false. =item -b, --backslash-escapes Skip characters preceded by a backslash. This notably includes C<\=>. Default value: true Note that the B<--db> and B<--sb> option below affect this option! =item -B --no-backslash-escapes Set the -b option just above to false. =item -c, --backticks-code Skip chunks of text which look like block or inline backticks-delimited code. Default value: true =item -C --no-backticks-code Set the -c option just above to false. =item -t, --tilde-code-blocks Skip chunks of text which look like tilde-delimited code blocks. Default value: true =item -T --no-tilde-code-blocks Set the -t option just above to false. =item -d, --tex-math-dollars Skip chunks of text which look like block or inline $ delimited math. Default value: true =item -D --no-tex-math-dollars Set the -d option just above to false. =item --db, --tex-math-double-backslash Skip chunks of text which look like C<\\(...\\)> or C<\\[...\\]> delimited math. Default value: false =item --DB --no-tex-math-double-backslash Set the --db option just above to false. =item --sb, --tex-math-single-backslash Skip chunks of text which look like C<\(...\)> or C<\[...\]> delimited math. Default value: false =item --SB --no-tex-math-single-backticks Set the --sb option just above to false. =item -n, --none Disable all switches. =item -A, -N, --all Enable all switches. =item -h --help Print usage help and exit. =item -m, --man Print full documentation and exit. =head1 LICENSE This software is copyright (c) 2022 by Benct Philip Jonsson. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. http://dev.perl.org/licenses/ =head1 AUTHOR Benct Philip Jonsson Ebpjonsson@gmail.comE =cut # Vim: set ft=pod et ts=4 sts=4 sw=4 tw=72 cc=72: # Vim: set ft=pod et ts=4 sts=4 sw=4 tw=72 cc=72: