From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 27940 invoked by alias); 14 Aug 2015 23:19:31 -0000 Mailing-List: contact zsh-users-help@zsh.org; run by ezmlm Precedence: bulk X-No-Archive: yes List-Id: Zsh Users List List-Post: List-Help: X-Seq: 20409 Received: (qmail 25037 invoked from network); 14 Aug 2015 23:19:27 -0000 X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on f.primenet.com.au X-Spam-Level: X-Spam-Status: No, score=-1.9 required=5.0 tests=BAYES_00,SPF_HELO_PASS autolearn=ham autolearn_force=no version=3.4.0 X-Injected-Via-Gmane: http://gmane.org/ To: zsh-users@zsh.org From: Joep van Delft Subject: Tip of the day: grep2awk zle function Date: Sat, 15 Aug 2015 01:01:57 +0200 Message-ID: <20150815010157.696d904b@xs4all.nl> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit X-Complaints-To: usenet@ger.gmane.org X-Gmane-NNTP-Posting-Host: p5ddc4c32.dip0.t-ipconnect.de X-Newsreader: Claws Mail 3.12.0 (GTK+ 2.24.28; i686-pc-linux-gnu) Hi! This summer's side project has resulted in grep2awk. grep2awk is a zle function that turns the first grep command in the current command line into its awk equivalent, leaving the cursor right where you want it. This operation includes a transformation of Basic Regular Expression to Extended Regular Expression. Possibly of some interest, is an adaptation of the ZSH test suite, with which zle functions can be tested with the currently installed zsh, outside the scope of the ZSH source directory/make file. Code, installation instructions, and some details can be found at https://github.com/joepvd/grep2awk. Kind regards, Joep # grep2awk: # A zle convenience function to exchange a `grep` on the command line # into a corresponding `awk`-line. # How to use: # Drop this file somewhere in your $fpath. And configure your shell to # make use of it: # # autoload grep2awk # zle -N grep2awk # bindkey "^X^A" grep2awk # # This makes the widget available under . # # The files t/comptest and t/ztst.zsh are taken (and modified) from the # zsh project. This project can be used under the same terms as the zsh # project. # # By Joep van Delft, 2015 emulate -L zsh setopt extendedglob autoload -Uz split-shell-arguments grep2awk() { # Needs to be in its own scope, to not have the variables leak to # the main shell process. local replacement_line REPLY REPLY2 flag cursor local -a reply typeset -A opt # Define some functions first: # - regex2awkprog # - fixed2ere # - bre2ere # - swapcommand regex2awkprog() { local re prelude action prog awk re=${(Q)1} # Remove one level of quoting, so we have what # grep would receive. (( opt[F] > 0 )) && { re="$(fixed2ere $re)" } # A sanity check: (( opt[E] + opt[F] + opt[G] + opt[P] > 1 )) && { zle -M "grep2awk:Options E, F, G and P are mutually exclusive." zle -M "Exiting." return 1 } (( opt[G] == 1 || opt[E] + opt[F] + opt[P] == 0 )) && { # It is Basic Regular Expression re="$(bre2ere $re)" } (( opt[w] > 0 )) && { re="\\<($re)\\>" } (( opt[x] > 0 )) && { re="^($re)$" } # Replace any forward slash with an escaped one: re=${re//\//\\/} # Wrap in slashes, and replace any single quote with its # ASCII code: re=/${re//\'/\\47}/ # Negate the pattern if told so: (( opt[v] > 0 )) && { re="!$re" } (( opt[i] + opt[y] > 0 )) && { prelude+="BEGIN{IGNORECASE=1}; " } # The print action. Add some extra stuff if line count or file name # are enabled. action="{print ${opt[H]:+FILENAME\":\"}${opt[n]:+NR\":\"}\$0}" # c, H, l and L are mutually exclusive. One of them will win. (( opt[H] + opt[l] + opt[L] + opt[c] > 1 )) && { zle -M "grep2awk:Options c, H, l and L are mutually exclusive." zle -M "Exiting." return 1 } (( opt[c] > 0 )) && { # This is incompatible with `grep -c`, as files where the count is 0 # are not displayed. This is saner behavior, IMHO. prelude+='BEGINFILE{c=0};ENDFILE{if(c>0)print FILENAME":"c}; ' action='{c++}' } (( opt[l] > 0 )) && { action="{print FILENAME; nextfile}" } (( opt[L] > 0 )) && { prelude+="BEGINFILE{flag=0};ENDFILE{if (flag==0)print FILENAME}; " action="{flag=1; nextfile}" } prog="${prelude}${re} $action" printf -- ${(qq)prog} } fixed2ere() { local p ere # We already receive a backslash escaped version of the # string by virtue of the ``(q)``-flag when this function is called. # That means that we need to transform the characters in the # first line to the corresponding character sequences in the # second line: # # + | { } [ ] ( ) * . ? \ $ ^ # \+ \| \{ \} \[ \] \( \) \* \. \? \\ \$ \^ str=$1 ere="" for i in {1..$#str}; do if [[ $str[$i] =~ '\+|\||\{|\}|\[|\]|\(|\)|\*|\.|\?|\$|\^' ]]; then ere+="\\$str[$i]" elif [[ $str[$i] == $'\n' ]]; then # This works after arrowing up into history, but not # directly after writing it. Is there a way to detect # if this is the case? ere+='|' elif [[ $str[$i] == '\' ]]; then ere+="\\\\$str[$i]" else ere+="$str[$i]" fi done printf -- "%s" "$ere" } bre2ere() { # Basic Regular Expression to Extended Regular Expression # # What needs to be done: # Translate BRE: \+,\{,\},\|,\(,\), +, {, }, |, (, ) # to ERE: +, {, }, |, (, ),\+,\{,\},\|,\(,\) # # But only if the backslash is not escaped with a backslash. local bre ere i debug # Variables tracking states: local s_brack s_brack_start s_esc s_altern s_altern_start s_char_cls bre=$1 ere="" s_esc=off s_altern=on s_altern_start=off s_brack=off s_brack_start=off s_char_cls=off zstyle -s ':grep2awk:bre2ere:' debug debug if [[ -n $debug ]]; then rm "$debug" 2>/dev/null debug() { printf "bre[$i]=$bre[$i] s_esc=$s_esc " >> "$debug" printf "s_brack=$s_brack s_brack_start=$s_brack_start " >> "$debug" printf "s_altern=$s_altern s_alter_now=$s_altern_start\n" >> "$debug" } fi for i in {1..$#bre}; do [[ -n $debug ]] && debug # Bracket expressions # ------------------- if [[ $bre[$i] == '[' && $s_esc == off && $s_brack == off ]]; then ere+="$bre[$i]" s_brack=on s_brack_start=on # Character class accounting within bracket expressions, in order # to keep track of the end of a bracket expression. elif [[ $bre[$i] == '[' && $s_brack == on ]]; then ere+="$bre[$i]" s_char_cls=on elif [[ $bre[$i] == ']' && $s_char_cls == on ]]; then [[ $s_brack == on ]] || return 1 ere+="$bre[$i]" s_char_cls=off # Special casing the start of bracket expressions. # s_brack_start allows for inclusion of ] as a character as per # POSIX.2. elif [[ $s_brack == on && $s_brack_start == on ]]; then ere+="$bre[$i]" [[ $bre[$i] == '^' ]] \ && s_brack_start=on \ || s_brack_start=off elif [[ $bre[$i] == ']' && $s_brack == on ]]; then [[ $s_char_cls == off ]] || return 1 ere+="$bre[$i]" s_brack=off # Finally, the default bracket expansion action: elif [[ $s_brack == on ]]; then ere+="$bre[$i]" # Alternation and subexpressions # ------------------------------ # Needed for special casing of *, ^ and $. elif [[ $bre[$i] =~ '\||\(' && $s_esc == on ]]; then s_altern=on s_altern_start=on ere+="$bre[$i]" # The escape sign is dropped now s_esc=off elif [[ $bre[$i] == '^' && $s_altern == on ]]; then [[ $s_esc == off ]] || return 1 ere+='^' elif [[ $bre[$i] == "*" && $s_altern == on ]]; then # When * is at the start of an alternation/subexpression, # escape it: ere+='\\*' elif [[ $bre[$i] == '$' ]]; then # Dollar is a special character when it is at the end of # string, end of alternation, or end of subexpression. # Otherwise: it is not special. As we do not need to know the # 'history' of the regex, we can use a lookahead to find out # with which thing we are dealing. # # The behavior of $ is not documented in man 1 grep; it is, # however, in `man 7 regex`. Some tests indicate that GNU grep # adheres to the POSIX.2 specification. if [[ $#bre -eq $i || ${bre:$i:2} =~ '\\(\||\))' ]]; then [[ $s_esc == off ]] && ere+='$' || ere+='\\$' else [[ $s_esc == off ]] && ere+='\\$' || ere+='$' fi # Normal BRE to ERE: # ------------------ elif [[ $bre[$i] =~ '\+|\{|\}|\)|\^' && $s_esc == on ]]; then ere+="$bre[$i]" # The escape sign is dropped now s_esc=off elif [[ $bre[$i] =~ '\+|\||\{|\}|\(|\)|\^' && $s_esc == off ]]; then ere+="\\\\$bre[$i]" elif [[ $bre[$i] == '\\' && $s_esc == on ]]; then ere+='\\\\\\\\' s_esc=off elif [[ $bre[$i] == "\\" && $s_esc == off ]]; then s_esc=on else ere+="$bre[$i]" s_esc=off fi [[ $s_altern == on \ && $s_altern_start == off \ && $bre[$i] != "^" \ ]] && s_altern=off s_altern_start=off done printf -- "%s" "$ere" } # Some debugging/testing convenience functions. # If grep2awk is autoloaded, the regex replacement # functions can be called directly: # # grep2awk bre2ere 'a[^a-z]\(c$\|d\)' # # See the test suite for some examples. [[ -n $1 ]] && { echo $($1 "$2") return $? } zstyle -s ':grep2awk:' awk awk : ${awk:=awk --} # Store line as $reply[@]: split-shell-arguments # `flag` keeps the program state while looping through the splitted # shell words. Its semantics: # flag==0: No 'grep', egrep', 'fgrep' encountered yet. # flag==1: Set when grep command is replaced by awk # command. Options or regex. # flag==2: Regex (End of options arrived). # flag==3: The rest. flag=0 cursor=0 for (( i=1; i<=$#reply; i++ )); do if (( i%2==1 && flag < 3 )); then # Uneven "words" are whitespace. Only interested in changing the # cursor position if exchanging the regex is not replaced by the awk # program yet (flag < 3). (( cursor += $#reply[$i] )) elif (( flag == 0 )); then case $reply[$i] in (grep) flag=1 reply[$i]="$awk" ;; (egrep) flag=1 reply[$i]="$awk" typeset "opt[E]"=1 ;; (fgrep) flag=1 reply[$i]="$awk" typeset "opt[F]"=1 ;; esac (( cursor += $#reply[$i] )) elif [[ $flag == 1 && $reply[$i] == -* ]]; then # Is it the End of Options-marker? if [[ $reply[$i] == "--" ]]; then flag=2 elif [[ $reply[$i] =~ ^--. ]]; then zle -M "grep2awk: Long options to grep are not supported" return 1 else for ((l=1; l<$#reply[$i]; l++)); do #check those options! # Is it an option to grep? A regular expression of # the options that are recognized: o=${reply[$i]:$l:1} if [[ $o =~ "c|E|e|F|G|H|i|l|L|n|v|w|x|y" ]]; then typeset "opt[$o]"=1 else zle -M "grep2awk: Option $o is not supported" return 1 fi done [[ $o == "e" ]] && flag=2 fi # Delete the options, and the now superfluous whitespace: reply[$i]="" reply[$i+1]="" # And leave the cursor pointer where it was. elif (( flag==1 || flag==2 )); then # The fall-through for opt_or_reg OR regex: # This is the regex. reply[$i]=$(regex2awkprog $reply[$i]) (( cursor += $#reply[$i] - 2 )) flag=3 fi # Append the current version of $reply[$i] to replacement_line: replacement_line+=$reply[$i] done # Now we are done. Apply the changes to the BUFFER: BUFFER="$replacement_line" CURSOR=$cursor } grep2awk "$@"