From: Lucio De Re <lucio@proxima.alt.za>
To: 9fans@cse.psu.edu
Subject: Re: [9fans] Minor change to pr(1)
Date: Tue, 22 Apr 2003 08:53:43 +0200 [thread overview]
Message-ID: <20030422085343.K23448@cackle.proxima.alt.za> (raw)
In-Reply-To: <29d74e36513be89fc1fdef0aa4c96dfb@cs.cmu.edu>; from David Swasey on Fri, Apr 18, 2003 at 02:07:13PM -0400
[-- Attachment #1: Type: text/plain, Size: 1163 bytes --]
On Fri, Apr 18, 2003 at 02:07:13PM -0400, David Swasey wrote:
>
> Please consider changing pr -i to use a single space, rather than a
> tab, in the case where one space is needed just before a tab stop.
> For example, I would prefer that the output of
>
> echo 'donttab here' | pr -ti
>
> contains no tabs. One way to accomplish this is
>
I use "trim" which I wrote many years ago to remove trailing spaces
from text files and eventually grew to convert spaces to tabs and
viceversa. The attached code is for Plan 9 but needs a couple of
BSDisms to compile correctly: this is the mkfile line I use:
pcc -o trim -D_POSIX_SOURCE -D_BSD_EXTENSION trim.c
NO man(1) page, yet, but I'm working on it. A comment of relevance to
the above:
** A tab is never output if its purpose can be fulfilled by
** a single space (yes, it seems odd, but it has frustrated
** me since 1991 - it's now fixed).
Trim tries to be clever about 'C' strings and leave tabs and spaces
unchanged between quotes, but it would require a fancier syntax
scanner to be perfect :-(
++L
PS: the code is in the public domain, in the strict sense of the term.
[-- Attachment #2: trim.c --]
[-- Type: text/plain, Size: 9918 bytes --]
/*
** @(#) trim.c - space and tab manipulator
** @(#) $Id: trim.c,v 1.5 2003/04/22 06:46:30 lucio Exp $
*/
/*
** Removes all trailing spaces unconditionally;
** translates tabs to equivalent spaces or reduces spaces to
** the appropriate combination of tabs and spaces.
** A tab is never output if its purpose can be fulfilled by
** a single space (yes, it seems odd, but it has frustrated
** me since 1991 - it's now fixed).
**
** A couple of questions to be resolved in the new version:
** (1) How does one trigger the "default" tab settings? This was
** an empty argument when we did not use getopt().
** (2) More exhaustive testing is required.
**
** ==================================================================
**
** $Logfile:$
** $RCSfile: trim.c,v $
** $Revision: 1.5 $
** $Date: 2003/04/22 06:46:30 $
** $Author: lucio $
**
** ==================================================================
**
** $Log: trim.c,v $
** Revision 1.5 2003/04/22 06:46:30 lucio
** Update for Plan 9
**
** Revision 1.4 2001/04/12 05:08:09 lucio
** Brought in line with new "usage" conventions.
**
** Revision 1.3 2001/02/28 11:21:07 lucio
** .cvsignore
** added a few intermediate products
** Makefile
** facility to build HTML docs from MAN pages
** dirseek.1
** updated to latest version
** dirseek.c
** Missing 'H' in permissible options string
** Allow '-' to represent std{in,out}
** trim.c
** A second stab at bringing it up to speed
**
** Revision 1.2 2001/02/15 11:50:09 lucio
** Preliminary update - more in line with modern practice.
** Still some work required.
**
** Revision 1.1 2001/02/15 11:39:54 lucio
** New archivery.
**
** ==================================================================
*/
#if defined(__Plan9__) && defined(__STDC__)
#define _BSD_EXTENSION
#endif
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#if defined(__NetBSD__) || defined(MSDOS) || defined(_POSIX_SOURCE)
#include <unistd.h>
#include <stdlib.h>
#if defined(_BSD_EXTENSION)
#include <bsd.h>
#else
extern char *optarg;
extern int optind;
#endif
#endif
#define TAB '\t'
#define NL '\n'
#define SP ' '
#define SQ '\''
#define DQ '"'
#define FALSE 0
#define TRUE 1
#define TABLIM 12
static char *ident = "@(#) $Id: trim.c,v 1.5 2003/04/22 06:46:30 lucio Exp $";
static char *copyright = "Copyright (C) 1988-1991 Lucio de Re";
static char *usage[] = {
"usage: %s [-h|H] [-t|T <arg>] [-|infile] [outfile]\n",
"\n",
"opts: h/H - this message\n",
" t <arg> - compress all spaces to tabs\n",
" T <arg> - expand all tabs to spaces\n",
"\n",
" <arg>: none - default to 8,16,24 ...\n",
" c - 'C' tabs (4,8,12 ...)\n",
" C - COBOL tabs (8,12,16 ...)\n",
" <n1>,<n2>,<n3> ...\n",
"\n",
"opts override TRIMTABS in environment:\n",
" TRIMTABS=[-]<arg>\n",
" where <arg> is as above\n",
" and leading '-' to compress to tabs,\n",
" (default is to expand to spaces)\n",
"\n",
"'-' may be used to force read from 'stdin'\n",
" when 'outfile' is specified.\n",
NULL
};
static use (char *argv0, char **usage) {
fprintf (stderr, *usage, argv0);
}
static help (char *argv0, char **usage) {
fprintf (stderr, *usage++, argv0);
while (*usage) {
fputs (*usage++, stderr);
}
}
#if !defined(NetBSD) && !defined(MSDOS)
static char *basename (char *name) {
char *c = strrchr (name, '/');
if (c) {
return (c + 1);
} else {
return (name);
}
}
#endif
static int tablim = 2;
static int tabs[TABLIM + 2];
extern char *getenv ();
int set (char *str) {
switch (*str) {
case 'c':
tabs[1] = 4;
tablim = 2;
break;
case 'C':
tabs[1] = 7;
tabs[2] = 11;
tablim = 3;
break;
case '\0':
tabs[1] = 8;
tablim = 2;
break;
default:
if (isdigit (*str)) {
char ch;
tabs[tablim = 1] = 0;
while ((ch = *str) != '\0') {
if (isdigit (ch)) {
tabs[tablim] *= 10;
tabs[tablim] += ch - '0';
str++;
} else if (ch == ',') {
if (tabs[tablim] > tabs[tablim-1]) {
tabs[++tablim] = 0;
str++;
} else {
--tablim;
while (*(++str)) /* flush remainder */
;
}
} else
while (*(++str))
;
}
if (tabs[tablim] > tabs[tablim-1])
tablim++;
}
break;
}
tabs[tablim] = tabs[tablim - 1] - tabs[tablim - 2];
}
int next (int tabdex, int index) {
int diff;
if (index < tabs[tabdex])
tabdex = 1;
while (tabdex < tablim && index >= tabs[tabdex])
tabdex++;
if (tabdex >= tablim) {
diff = tabs[tabdex - 1] - tabs[tabdex - 2];
tabs[tabdex] = tabs[tabdex - 1] + diff;
while (index >= tabs[tabdex])
tabs[tabdex] += diff;
}
return (tabdex);
}
main (int argc, char *argv[]) {
FILE *inf = stdin,
*outf = stdout;
int argx = 1;
char *argv0 = basename (argv[0]);
int ch,
sp,
index;
int tins = 0;
int old_tins,
quote = '\0',
escseq = FALSE;
int tabdex = 1,
htdex = 1;
char *opts;
tabs[0] = 0;
tabs[1] = 8;
if (opts = getenv ("TRIMTABS")) {
if (*opts == '-') {
tins = 1;
opts++;
}
set (opts);
}
while ((ch = getopt (argc, argv, "t:T:hH")) != -1) {
switch (ch) {
case 't': /* tab stops to be inserted */
tins = 1;
case 'T': /* tab stops to be expanded */
set (optarg);
break;
case 'h':
case 'H':
help (argv0, usage);
exit (0);
default:
use (argv0, usage);
exit (2);
}
}
argc -= optind;
argv += optind;
if (argc) {
if (*argv[0] != '-') {
if ((inf = fopen (*argv, "r")) == NULL) {
fprintf (stderr, "%s: cannot open %s for input\n", argv0, *argv);
exit (1);
}
}
--argc;
++argv;
}
if (argc) {
if (*argv[0] != '-') {
if ((outf = fopen (*argv, "w")) == NULL) {
fprintf (stderr, "%s: cannot open %s for output\n", argv0, *argv);
exit (1);
}
}
--argc;
++argv;
}
sp = index = 0;
old_tins = tins;
while ((ch = fgetc (inf)) != EOF) {
switch (ch) {
case NL:
quote = '\0';
tins = old_tins;
fputc (ch, outf);
sp = index = 0;
escseq = FALSE;
break;
case SP:
sp++;
index++;
escseq = FALSE;
break;
case TAB:
if (tablim > 0) {
int x = tabs [tabdex = next (tabdex, index)];
sp += x - index;
index = x;
escseq = FALSE;
break;
}
default:
if (tins) {
int x = index - sp;
int c;
while ((c = tabs[htdex = next (htdex, x)]) <= index) {
if (c == x + 1) {
fputc (SP, outf);
} else {
fputc (TAB, outf);
}
x = c;
}
while (x++ < index)
fputc (SP, outf);
sp = 0;
} else {
while (sp > 0) {
fputc (SP, outf);
sp--;
}
}
fputc (ch, outf);
index++;
if (escseq)
escseq = FALSE;
else
switch (ch) {
case '\\':
escseq = TRUE;
break;
case SQ:
case DQ:
if (quote == ch) {
quote = '\0';
tins = old_tins;
} else if (quote == '\0') {
quote = ch;
old_tins = tins;
tins = 0;
}
default:
escseq = FALSE;
break;
}
break;
}
}
fclose (inf);
fclose (outf);
}
prev parent reply other threads:[~2003-04-22 6:53 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-04-18 18:07 David Swasey
2003-04-18 18:15 ` rsc
2003-04-22 6:53 ` Lucio De Re [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20030422085343.K23448@cackle.proxima.alt.za \
--to=lucio@proxima.alt.za \
--cc=9fans@cse.psu.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).