From mboxrd@z Thu Jan 1 00:00:00 1970 From: Lucio De Re To: 9fans@cse.psu.edu Subject: Re: [9fans] Minor change to pr(1) Message-ID: <20030422085343.K23448@cackle.proxima.alt.za> References: <29d74e36513be89fc1fdef0aa4c96dfb@cs.cmu.edu> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary=gKMricLos+KVdGMg In-Reply-To: <29d74e36513be89fc1fdef0aa4c96dfb@cs.cmu.edu>; from David Swasey on Fri, Apr 18, 2003 at 02:07:13PM -0400 Date: Tue, 22 Apr 2003 08:53:43 +0200 Topicbox-Message-UUID: 946b7304-eacb-11e9-9e20-41e7f4b1d025 --gKMricLos+KVdGMg Content-Type: text/plain; charset=us-ascii On Fri, Apr 18, 2003 at 02:07:13PM -0400, David Swasey wrote: > > Please consider changing pr -i to use a single space, rather than a > tab, in the case where one space is needed just before a tab stop. > For example, I would prefer that the output of > > echo 'donttab here' | pr -ti > > contains no tabs. One way to accomplish this is > I use "trim" which I wrote many years ago to remove trailing spaces from text files and eventually grew to convert spaces to tabs and viceversa. The attached code is for Plan 9 but needs a couple of BSDisms to compile correctly: this is the mkfile line I use: pcc -o trim -D_POSIX_SOURCE -D_BSD_EXTENSION trim.c NO man(1) page, yet, but I'm working on it. A comment of relevance to the above: ** A tab is never output if its purpose can be fulfilled by ** a single space (yes, it seems odd, but it has frustrated ** me since 1991 - it's now fixed). Trim tries to be clever about 'C' strings and leave tabs and spaces unchanged between quotes, but it would require a fancier syntax scanner to be perfect :-( ++L PS: the code is in the public domain, in the strict sense of the term. --gKMricLos+KVdGMg Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="trim.c" /* ** @(#) trim.c - space and tab manipulator ** @(#) $Id: trim.c,v 1.5 2003/04/22 06:46:30 lucio Exp $ */ /* ** Removes all trailing spaces unconditionally; ** translates tabs to equivalent spaces or reduces spaces to ** the appropriate combination of tabs and spaces. ** A tab is never output if its purpose can be fulfilled by ** a single space (yes, it seems odd, but it has frustrated ** me since 1991 - it's now fixed). ** ** A couple of questions to be resolved in the new version: ** (1) How does one trigger the "default" tab settings? This was ** an empty argument when we did not use getopt(). ** (2) More exhaustive testing is required. ** ** ================================================================== ** ** $Logfile:$ ** $RCSfile: trim.c,v $ ** $Revision: 1.5 $ ** $Date: 2003/04/22 06:46:30 $ ** $Author: lucio $ ** ** ================================================================== ** ** $Log: trim.c,v $ ** Revision 1.5 2003/04/22 06:46:30 lucio ** Update for Plan 9 ** ** Revision 1.4 2001/04/12 05:08:09 lucio ** Brought in line with new "usage" conventions. ** ** Revision 1.3 2001/02/28 11:21:07 lucio ** .cvsignore ** added a few intermediate products ** Makefile ** facility to build HTML docs from MAN pages ** dirseek.1 ** updated to latest version ** dirseek.c ** Missing 'H' in permissible options string ** Allow '-' to represent std{in,out} ** trim.c ** A second stab at bringing it up to speed ** ** Revision 1.2 2001/02/15 11:50:09 lucio ** Preliminary update - more in line with modern practice. ** Still some work required. ** ** Revision 1.1 2001/02/15 11:39:54 lucio ** New archivery. ** ** ================================================================== */ #if defined(__Plan9__) && defined(__STDC__) #define _BSD_EXTENSION #endif #include #include #include #if defined(__NetBSD__) || defined(MSDOS) || defined(_POSIX_SOURCE) #include #include #if defined(_BSD_EXTENSION) #include #else extern char *optarg; extern int optind; #endif #endif #define TAB '\t' #define NL '\n' #define SP ' ' #define SQ '\'' #define DQ '"' #define FALSE 0 #define TRUE 1 #define TABLIM 12 static char *ident = "@(#) $Id: trim.c,v 1.5 2003/04/22 06:46:30 lucio Exp $"; static char *copyright = "Copyright (C) 1988-1991 Lucio de Re"; static char *usage[] = { "usage: %s [-h|H] [-t|T ] [-|infile] [outfile]\n", "\n", "opts: h/H - this message\n", " t - compress all spaces to tabs\n", " T - expand all tabs to spaces\n", "\n", " : none - default to 8,16,24 ...\n", " c - 'C' tabs (4,8,12 ...)\n", " C - COBOL tabs (8,12,16 ...)\n", " ,, ...\n", "\n", "opts override TRIMTABS in environment:\n", " TRIMTABS=[-]\n", " where is as above\n", " and leading '-' to compress to tabs,\n", " (default is to expand to spaces)\n", "\n", "'-' may be used to force read from 'stdin'\n", " when 'outfile' is specified.\n", NULL }; static use (char *argv0, char **usage) { fprintf (stderr, *usage, argv0); } static help (char *argv0, char **usage) { fprintf (stderr, *usage++, argv0); while (*usage) { fputs (*usage++, stderr); } } #if !defined(NetBSD) && !defined(MSDOS) static char *basename (char *name) { char *c = strrchr (name, '/'); if (c) { return (c + 1); } else { return (name); } } #endif static int tablim = 2; static int tabs[TABLIM + 2]; extern char *getenv (); int set (char *str) { switch (*str) { case 'c': tabs[1] = 4; tablim = 2; break; case 'C': tabs[1] = 7; tabs[2] = 11; tablim = 3; break; case '\0': tabs[1] = 8; tablim = 2; break; default: if (isdigit (*str)) { char ch; tabs[tablim = 1] = 0; while ((ch = *str) != '\0') { if (isdigit (ch)) { tabs[tablim] *= 10; tabs[tablim] += ch - '0'; str++; } else if (ch == ',') { if (tabs[tablim] > tabs[tablim-1]) { tabs[++tablim] = 0; str++; } else { --tablim; while (*(++str)) /* flush remainder */ ; } } else while (*(++str)) ; } if (tabs[tablim] > tabs[tablim-1]) tablim++; } break; } tabs[tablim] = tabs[tablim - 1] - tabs[tablim - 2]; } int next (int tabdex, int index) { int diff; if (index < tabs[tabdex]) tabdex = 1; while (tabdex < tablim && index >= tabs[tabdex]) tabdex++; if (tabdex >= tablim) { diff = tabs[tabdex - 1] - tabs[tabdex - 2]; tabs[tabdex] = tabs[tabdex - 1] + diff; while (index >= tabs[tabdex]) tabs[tabdex] += diff; } return (tabdex); } main (int argc, char *argv[]) { FILE *inf = stdin, *outf = stdout; int argx = 1; char *argv0 = basename (argv[0]); int ch, sp, index; int tins = 0; int old_tins, quote = '\0', escseq = FALSE; int tabdex = 1, htdex = 1; char *opts; tabs[0] = 0; tabs[1] = 8; if (opts = getenv ("TRIMTABS")) { if (*opts == '-') { tins = 1; opts++; } set (opts); } while ((ch = getopt (argc, argv, "t:T:hH")) != -1) { switch (ch) { case 't': /* tab stops to be inserted */ tins = 1; case 'T': /* tab stops to be expanded */ set (optarg); break; case 'h': case 'H': help (argv0, usage); exit (0); default: use (argv0, usage); exit (2); } } argc -= optind; argv += optind; if (argc) { if (*argv[0] != '-') { if ((inf = fopen (*argv, "r")) == NULL) { fprintf (stderr, "%s: cannot open %s for input\n", argv0, *argv); exit (1); } } --argc; ++argv; } if (argc) { if (*argv[0] != '-') { if ((outf = fopen (*argv, "w")) == NULL) { fprintf (stderr, "%s: cannot open %s for output\n", argv0, *argv); exit (1); } } --argc; ++argv; } sp = index = 0; old_tins = tins; while ((ch = fgetc (inf)) != EOF) { switch (ch) { case NL: quote = '\0'; tins = old_tins; fputc (ch, outf); sp = index = 0; escseq = FALSE; break; case SP: sp++; index++; escseq = FALSE; break; case TAB: if (tablim > 0) { int x = tabs [tabdex = next (tabdex, index)]; sp += x - index; index = x; escseq = FALSE; break; } default: if (tins) { int x = index - sp; int c; while ((c = tabs[htdex = next (htdex, x)]) <= index) { if (c == x + 1) { fputc (SP, outf); } else { fputc (TAB, outf); } x = c; } while (x++ < index) fputc (SP, outf); sp = 0; } else { while (sp > 0) { fputc (SP, outf); sp--; } } fputc (ch, outf); index++; if (escseq) escseq = FALSE; else switch (ch) { case '\\': escseq = TRUE; break; case SQ: case DQ: if (quote == ch) { quote = '\0'; tins = old_tins; } else if (quote == '\0') { quote = ch; old_tins = tins; tins = 0; } default: escseq = FALSE; break; } break; } } fclose (inf); fclose (outf); } --gKMricLos+KVdGMg--