#!/usr/bin/env perl

use 5.010001;
use utf8;
# use utf8::all;
use strict;
use warnings;
use warnings FATAL => 'utf8';
use autodie;

use open qw[ :utf8 :std ];

use Getopt::Long qw[GetOptions
  :config bundling no_auto_abbrev no_ignore_case];
use Pod::Usage qw[pod2usage];
use Text::Balanced qw[extract_multiple];

my %opt = (
  attributes => '.hl',
  check_word_chars => 1,
  check_whitespace => 1,
  backslash_escapes => 1,
  backticks_code => 1,
  tilde_code_blocks => 1,
  tex_math_dollars => 1,
  tex_math_double_backslash => 0,
  tex_math_single_backslash => 0,
);

my @opts = grep { /_/ } keys %opt;

sub all {
  $opt{$_} = 1 for @opts;
}

sub none {
  $opt{$_} = 0 for @opts;
}

sub neg_opt {
  my($name) = @_;
  $name =~ s/^no_//;
  $opt{$name} = 0;
}

GetOptions(
  \%opt,
  'attributes|a=s',
  'check_whitespace|check-whitespace|s',
  'no_check_whitespace|no-check-whitespace|S' => \&neg_opt,
  'check_word_chars|check-word-chars|w',
  'no_check_word_chars|no-check-word-chars|W' => \&neg_opt,
  'backslash_escapes|backslash-escapes|b',
  'no_backslash_escapes|no-backslash-escapes|B' => \&neg_opt,
  'backticks_code|backticks-code|c',
  'no_backticks_code|no-backticks-code|C' => \&neg_opt,
  'tilde_code_blocks|tilde-code-blocks|t',
  'no_tilde_code_blocks|no-tilde-code-blocks|T' => \&neg_opt,
  'tex_math_dollars|tex-math-dollars|d',
  'no_tex_math_dollars|no-tex-math-dollars|D' => \&neg_opt,
  'tex_math_double_backslash|tex-math-double-backslash|db',
  'no_tex_math_double_backslash|no-tex-math-double-backslash|DB' => \&neg_opt,
  'tex_math_single_backslash|tex-math-single-backslash|sb',
  'no_tex_math_single_backslash|no-tex-math-single-backslash|SB' => \&neg_opt,
  'none|n' => \&none,
  'all|N|A' => \&all,
  'help|h' => sub { pod2usage(1) },
  'man|m' => sub { pod2usage( -verbose => 2) },
);

my $span_start = '[';
my $span_stop  = "]{$opt{attributes}}";

my @extractors;

if ( $opt{tex_math_double_backslash} ) 	{
  push @extractors, (
    qr{ \\\\ \( .+? \\\\ \) }msx,
    qr{ \\\\ \[ .+? \\\\ \] }msx,
  );
}
if ( $opt{tex_math_single_backslash} ) 	{
  push @extractors, (
    qr{ \\ \( .+? \\ \) }msx,
    qr{ \\ \[ .+? \\ \] }msx,
  );
}
push @extractors, qr{ \\. }msx if $opt{backslash_escapes};
push @extractors, qr[ ( ( \~{3,} ) .+? \g{-1} ) ]msx if $opt{tilde_code_blocks};
push @extractors, qr[ ( ( \`+ ) .+? \g{-1} ) ]msx if $opt{backticks_code};
if ( $opt{tex_math_dollars} ) {
  push @extractors, (
    qr{ \$\$ (?: [^\n] | (?<! \n ) \n (?! \n ) )+? \$\$ }msx,
    qr{ \$ (?! \s ) .+? (?<! \s ) \$ (?! \d ) }msx,
  );
}

{
  my $highlight = qr{
    #w (?<! [\pL\pN\p{Mn}] )
    \=\=
    #s (?! \s )
    ( .+? )
    #s (?<! \s )
    \=\=
    #w (?! [\pL\pN\p{Mn}] )
  }msx;
  if ( $opt{check_whitespace} ) {
    $highlight =~ s/#s//g;
  }
  if ( $opt{check_word_chars} ) {
    $highlight =~ s/#w//g;
  }
  push @extractors, +{ highlight => qr/$highlight/msx };
}

# Slurp stdin
my $text = do { local $/; <>; };

# Process the text
my @chunks = extract_multiple $text, \@extractors;
for my $chunk ( @chunks ) {
  if ( ref $chunk ) {
    $chunk = $span_start . $$chunk . $span_stop;
  }
}

print join "", @chunks;
    
__END__

=encoding UTF-8

=head1 NAME

highlight-eq2span.pl -- Replace Obsidian higlight runs with Pandoc spans

=head1 VERSION

This documentation describes version 0.001 of highlight-eq2span.pl

=head1 SYNOPSIS

    perl highlight-eq2span.pl [OPTIONS] <input.md >output.md

=head1 DESCRIPTION

highlight-eq2span.pl replaces C<==HIGHLIGHTED==> as understood
by Obsidian with Pandoc spans like C<[HIGHLIGHTED]{.hl}>.

This script is a regex-based text filter, with far simpler parsing
capabilities than Pandoc.
However it by default tries to leave B<==> sequences which are unlikely
to be highlighting markup alone. There are some command line
options to control this.

=head1 OPTIONS

=over

=item -a, --attributes STR

Use STR as attributes for Pandoc spans.

Default value: C<.hl>

=item -s, --check-whitespace 

Assume that opening C<==> delimiters are not followed by whitespace,
and that closing C<==> delimiters are not preceded by whitespace.

Default value: true

=item -S --no-check-whitespace

Set the -s option just above to false.

=item -w, --check-word-chars 

Assume that opening C<==> delimiters are not preceded by word-chars,
and that closing C<==> delimiters are not followed by word-chars.

Default value: true

=item -W --no-check-word-chars

Set the -w option just above to false.

=item -b, --backslash-escapes

Skip characters preceded by a backslash.
This notably includes C<\=>.

Default value: true

Note that the B<--db> and B<--sb> option below affect this option!

=item -B --no-backslash-escapes

Set the -b option just above to false.

=item -c, --backticks-code 

Skip chunks of text which look like block or inline
backticks-delimited code.

Default value: true

=item -C --no-backticks-code

Set the -c option just above to false.

=item -t, --tilde-code-blocks

Skip chunks of text which look like tilde-delimited code blocks.

Default value: true

=item -T --no-tilde-code-blocks

Set the -t option just above to false.

=item -d, --tex-math-dollars 

Skip chunks of text which look like block or inline $ delimited math.

Default value: true


=item -D --no-tex-math-dollars

Set the -d option just above to false.

=item --db, --tex-math-double-backslash 

Skip chunks of text which look like C<\\(...\\)> or C<\\[...\\]>
delimited math.

Default value: false

=item --DB --no-tex-math-double-backslash

Set the --db option just above to false.

=item --sb, --tex-math-single-backslash 

Skip chunks of text which look like C<\(...\)> or C<\[...\]>
delimited math.

Default value: false

=item --SB --no-tex-math-single-backticks

Set the --sb option just above to false.

=item -n, --none

Disable all switches.

=item -A, -N, --all

Enable all switches.

=item -h --help

Print usage help and exit.

=item -m, --man

Print full documentation and exit.

=head1 LICENSE

This software is copyright (c) 2022 by Benct Philip Jonsson.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

http://dev.perl.org/licenses/

=head1 AUTHOR

Benct Philip Jonsson E<lt>bpjonsson@gmail.comE<gt>

=cut

# Vim: set ft=pod et ts=4 sts=4 sw=4 tw=72 cc=72:


# Vim: set ft=pod et ts=4 sts=4 sw=4 tw=72 cc=72: