9fans - fans of the OS Plan 9 from Bell Labs
 help / color / mirror / Atom feed
From: Lucio De Re <lucio@proxima.alt.za>
To: 9fans@cse.psu.edu
Subject: Re: [9fans] Minor change to pr(1)
Date: Tue, 22 Apr 2003 08:53:43 +0200	[thread overview]
Message-ID: <20030422085343.K23448@cackle.proxima.alt.za> (raw)
In-Reply-To: <29d74e36513be89fc1fdef0aa4c96dfb@cs.cmu.edu>; from David Swasey on Fri, Apr 18, 2003 at 02:07:13PM -0400

[-- Attachment #1: Type: text/plain, Size: 1163 bytes --]

On Fri, Apr 18, 2003 at 02:07:13PM -0400, David Swasey wrote:
>
> Please consider changing pr -i to use a single space, rather than a
> tab, in the case where one space is needed just before a tab stop.
> For example, I would prefer that the output of
>
> 	echo 'donttab here' | pr -ti
>
> contains no tabs.  One way to accomplish this is
>
I use "trim" which I wrote many years ago to remove trailing spaces
from text files and eventually grew to convert spaces to tabs and
viceversa.  The attached code is for Plan 9 but needs a couple of
BSDisms to compile correctly: this is the mkfile line I use:

	pcc -o trim -D_POSIX_SOURCE -D_BSD_EXTENSION trim.c

NO man(1) page, yet, but I'm working on it.  A comment of relevance to
the above:

**	A tab is never output if its purpose can be fulfilled by
**	a single space (yes, it seems odd, but it has frustrated
**	me since 1991 - it's now fixed).

Trim tries to be clever about 'C' strings and leave tabs and spaces
unchanged between quotes, but it would require a fancier syntax
scanner to be perfect :-(

++L

PS: the code is in the public domain, in the strict sense of the term.

[-- Attachment #2: trim.c --]
[-- Type: text/plain, Size: 9918 bytes --]

/*
**      @(#) trim.c - space and tab manipulator
**      @(#) $Id: trim.c,v 1.5 2003/04/22 06:46:30 lucio Exp $
*/
/*
**      Removes all trailing spaces unconditionally;
**      translates tabs to equivalent spaces or reduces spaces to
**      the appropriate combination of tabs and spaces.
**      A tab is never output if its purpose can be fulfilled by
**      a single space (yes, it seems odd, but it has frustrated
**      me since 1991 - it's now fixed).
**
**      A couple of questions to be resolved in the new version:
**      (1) How does one trigger the "default" tab settings? This was
**          an empty argument when we did not use getopt().
**      (2) More exhaustive testing is required.
**
** ==================================================================
**
**      $Logfile:$
**      $RCSfile: trim.c,v $
**      $Revision: 1.5 $
**      $Date: 2003/04/22 06:46:30 $
**      $Author: lucio $
**
** ==================================================================
**
**      $Log: trim.c,v $
**      Revision 1.5  2003/04/22 06:46:30  lucio
**      Update for Plan 9
**
**      Revision 1.4  2001/04/12 05:08:09  lucio
**      Brought in line with new "usage" conventions.
**
**      Revision 1.3  2001/02/28 11:21:07  lucio
**      .cvsignore
**        added a few intermediate products
**      Makefile
**        facility to build HTML docs from MAN pages
**      dirseek.1
**        updated to latest version
**      dirseek.c
**        Missing 'H' in permissible options string
**        Allow '-' to represent std{in,out}
**      trim.c
**        A second stab at bringing it up to speed
**
**      Revision 1.2  2001/02/15 11:50:09  lucio
**      Preliminary update - more in line with modern practice.
**      Still some work required.
**
**      Revision 1.1  2001/02/15 11:39:54  lucio
**      New archivery.
**
** ==================================================================
*/
#if	defined(__Plan9__) && defined(__STDC__)
#define _BSD_EXTENSION
#endif

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#if defined(__NetBSD__) || defined(MSDOS) || defined(_POSIX_SOURCE)
#include <unistd.h>
#include <stdlib.h>
#if defined(_BSD_EXTENSION)
#include <bsd.h>
#else
extern char *optarg;
extern int optind;
#endif
#endif

#define     TAB     '\t'
#define     NL      '\n'
#define     SP      ' '
#define     SQ      '\''
#define     DQ      '"'

#define     FALSE   0
#define     TRUE    1

#define     TABLIM  12

static  char *ident = "@(#) $Id: trim.c,v 1.5 2003/04/22 06:46:30 lucio Exp $";
static  char *copyright = "Copyright (C) 1988-1991 Lucio de Re";
static  char *usage[] = {
    "usage: %s [-h|H] [-t|T <arg>]  [-|infile] [outfile]\n",
    "\n",
    "opts: h/H     - this message\n",
    "      t <arg> - compress all spaces to tabs\n",
    "      T <arg> - expand all tabs to spaces\n",
    "\n",
    "       <arg>: none - default to 8,16,24 ...\n",
    "              c - 'C' tabs (4,8,12 ...)\n",
    "              C - COBOL tabs (8,12,16 ...)\n",
    "              <n1>,<n2>,<n3> ...\n",
    "\n",
    "opts override TRIMTABS in environment:\n",
    "         TRIMTABS=[-]<arg>\n",
    "     where <arg> is as above\n",
    "     and leading '-' to compress to tabs,\n",
    "     (default is to expand to spaces)\n",
    "\n",
    "'-' may be used to force read from 'stdin'\n",
    "    when 'outfile' is specified.\n",
    NULL
};

static use (char *argv0, char **usage) {
    fprintf (stderr, *usage, argv0);
}

static help (char *argv0, char **usage) {
    fprintf (stderr, *usage++, argv0);
    while (*usage) {
        fputs (*usage++, stderr);
    }
}

#if !defined(NetBSD) && !defined(MSDOS)
static char *basename (char *name) {
    char *c = strrchr (name, '/');

    if (c) {
            return (c + 1);
    } else {
            return (name);
    }
}
#endif

static  int tablim = 2;
static  int tabs[TABLIM + 2];

extern char *getenv ();

int set (char *str) {
    switch (*str) {
        case 'c':
            tabs[1] = 4;
            tablim = 2;
            break;
        case 'C':
            tabs[1] = 7;
            tabs[2] = 11;
            tablim = 3;
            break;
        case '\0':
            tabs[1] = 8;
            tablim = 2;
            break;
        default:
            if (isdigit (*str)) {
                char ch;

                tabs[tablim = 1] = 0;
                while ((ch = *str) != '\0') {
                    if (isdigit (ch)) {
                        tabs[tablim] *= 10;
                        tabs[tablim] += ch - '0';
                        str++;
                    } else if (ch == ',') {
                        if (tabs[tablim] > tabs[tablim-1]) {
                            tabs[++tablim] = 0;
                            str++;
                        } else {
                            --tablim;
                            while (*(++str))		/* flush remainder */
                                ;
                        }
                    } else
                        while (*(++str))
                            ;
                }
                if (tabs[tablim] > tabs[tablim-1])
                    tablim++;
            }
            break;
    }
    tabs[tablim] = tabs[tablim - 1] - tabs[tablim - 2];
}

int next (int tabdex, int index) {
    int diff;

    if (index < tabs[tabdex])
        tabdex = 1;
    while (tabdex < tablim && index >= tabs[tabdex])
        tabdex++;
    if (tabdex >= tablim) {
        diff = tabs[tabdex - 1] - tabs[tabdex - 2];
        tabs[tabdex] = tabs[tabdex - 1] + diff;
        while (index >= tabs[tabdex])
            tabs[tabdex] += diff;
    }
    return (tabdex);
}

main (int argc, char *argv[]) {
    FILE *inf = stdin,
         *outf = stdout;
    int argx = 1;
    char *argv0 = basename (argv[0]);
    int ch,
        sp,
        index;
    int tins = 0;
    int old_tins,
        quote = '\0',
        escseq = FALSE;
    int tabdex = 1,
        htdex = 1;
    char *opts;

    tabs[0] = 0;
    tabs[1] = 8;
    if (opts = getenv ("TRIMTABS")) {
        if (*opts == '-') {
            tins = 1;
            opts++;
        }
        set (opts);
    }

    while ((ch = getopt (argc, argv, "t:T:hH")) != -1) {
        switch (ch) {
            case 't':           /* tab stops to be inserted */
                tins = 1;
            case 'T':   		/* tab stops to be expanded */
                set (optarg);
                break;
            case 'h':
            case 'H':
                help (argv0, usage);
                exit (0);
            default:
                use (argv0, usage);
                exit (2);
        }
    }
    argc -= optind;
    argv += optind;
    if (argc) {
        if (*argv[0] != '-') {
            if ((inf = fopen (*argv, "r")) == NULL) {
                 fprintf (stderr, "%s: cannot open %s for input\n", argv0, *argv);
                 exit (1);
            }
        }
        --argc;
        ++argv;
    }
    if (argc) {
        if (*argv[0] != '-') {
            if ((outf = fopen (*argv, "w")) == NULL) {
                 fprintf (stderr, "%s: cannot open %s for output\n", argv0, *argv);
                 exit (1);
            }
        }
        --argc;
        ++argv;
    }
    sp = index = 0;
    old_tins = tins;
    while ((ch = fgetc (inf)) != EOF) {
        switch (ch) {
            case NL:
                quote = '\0';
                tins = old_tins;
                fputc (ch, outf);
                sp = index = 0;
                escseq = FALSE;
                break;
            case SP:
                sp++;
                index++;
                escseq = FALSE;
                break;
            case TAB:
                if (tablim > 0) {
                    int x = tabs [tabdex = next (tabdex, index)];

                    sp += x - index;
                    index = x;
                    escseq = FALSE;
                    break;
                }
            default:
                if (tins) {
                    int x = index - sp;
                    int c;

                    while ((c = tabs[htdex = next (htdex, x)]) <= index) {
                        if (c == x + 1) {
                            fputc (SP, outf);
                        } else {
                            fputc (TAB, outf);
                        }
                        x = c;
                    }
                    while (x++ < index)
                        fputc (SP, outf);
                    sp = 0;
                } else {
                    while (sp > 0) {
                        fputc (SP, outf);
                        sp--;
                    }
                }
                fputc (ch, outf);
                index++;
                if (escseq)
                    escseq = FALSE;
                else
                    switch (ch) {
                        case '\\':
                            escseq = TRUE;
                            break;
                        case SQ:
                        case DQ:
                            if (quote == ch) {
                                quote = '\0';
                                tins = old_tins;
                            } else if (quote == '\0') {
                                quote = ch;
                                old_tins = tins;
                                tins = 0;
                            }
                        default:
                            escseq = FALSE;
                            break;
                    }
                break;
        }
    }
    fclose (inf);
    fclose (outf);
}

      parent reply	other threads:[~2003-04-22  6:53 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-04-18 18:07 David Swasey
2003-04-18 18:15 ` rsc
2003-04-22  6:53 ` Lucio De Re [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20030422085343.K23448@cackle.proxima.alt.za \
    --to=lucio@proxima.alt.za \
    --cc=9fans@cse.psu.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).