|
|
DataMuseum.dkPresents historical artifacts from the history of: Commodore CBM-900 |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about Commodore CBM-900 Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - download
Length: 9056 (0x2360)
Types: TextFile
Notes: UNIX file
Names: »tr.c«
└─⟦f27320a65⟧ Bits:30001972 Commodore 900 hard disk image with partial source code
└─⟦f4b8d8c84⟧ UNIX Filesystem
└─⟦this⟧ »cmd/tr.c«
/*
* Tr translates characters from the standard input to the standard output.
* It usage is as follows:
* tr [-c] [-d] [-s] [string1] [string2]
* In the two strings, in addition to normal characters, one can include
* `x-y' all characters between `x' and `y'
* \r ascii carriage return
* \n ascii line feed
* \b ascii back space
* \t ascii horizontal tab
* \f ascii form feed
* \d, \dd or \ddd
* character with ascii code d (or dd or ddd) in octal
* \x `x' for any `x' not listed above
* In the absence of any options, tr simply converts any character appearing
* in string1 to the character in the same position in string2. If string2
* is shorter then string1, then it is extended by replicating the last
* character.
* The `-c' option simply compliments string1 with respect to the character
* set. The resulting set of characters is used in increasing order.
* The `-d' option causes characters in string1 to be deleted rather than
* being translated.
* The `-s' option causes characters in string2 (or string1 if there is no
* string2 and the -d option is not specified) to be compressed on output.
* This means that multiple occurances of the same character are compressed
* to one occurance.
*/
#include <stdio.h>
#include <mdata.h>
#define bool char /* boolean type */
#define not ! /* logical negation operator */
#define and && /* logical conjunction */
#define or || /* logical disjunction */
#define TRUE (0 == 0)
#define FALSE (not TRUE)
#define EOS '\0' /* end-of-string char */
#define CSIZE (1 << NBCHAR) /* character set size */
#define MAXDIG ((NBCHAR+2) / 3) /* max digits in character constant */
/*
* List is used to hold a character list from which characters are begin
* extrated. It allows expansion of character ranges, backslash-protected
* characters and character ranges.
*/
typedef struct list {
unsigned char *l_next, /* next character from string */
*l_start; /* start of string */
bool l_inr; /* iff we are in a-b range */
int l_rnext, /* next to give if inrange */
l_rhi; /* maximum to give if inrange */
} list;
bool cflag, /* compliment string 1 */
dflag, /* delete chars in string 1 */
sflag, /* remove duplicates in string 2 */
delete[CSIZE], /* set of chars to delete on input */
squeeze[CSIZE]; /* set of chars to squeeze on output */
unsigned char map[CSIZE]; /* map to apply to characters */
int nextchar(); /* get next char of list */
void die(), /* print error message and exit */
usage(), /* print usage message and exit */
scan(), /* actually do copy */
startlist(), /* start list from string */
makeset(), /* make set of chars in list */
maketrans(); /* make transformation table */
unsigned char *cstr(); /* compliment character list */
char *alloc(), /* unfailable malloc */
*ralloc(); /* unfailable realloc */
int
main(argc, argv)
int argc;
register char *argv[];
{
register unsigned char *str1,
*str2;
for (str1=*++argv; str1 != NULL && *str1 == '-'; str1=*++argv)
while (*++str1 != EOS)
switch (*str1) {
case 'c':
cflag = TRUE;
break;
case 'd':
dflag = TRUE;
break;
case 's':
sflag = TRUE;
break;
default:
usage();
}
if (str1 == NULL)
usage();
if (cflag)
str1 = cstr(str1);
str2 = *++argv;
if (str2 != NULL && *++argv != NULL)
usage();
if (dflag)
if (sflag) { /* -d and -s */
if (str2 == NULL)
usage();
makeset(str1, delete);
makeset(str2, squeeze);
maketrans(str1, str2, map);
} else { /* -d and no -s */
if (str2 != NULL)
usage();
makeset(str1, delete);
maketrans("", "", map);
}
else
if (sflag)
if (str2 == NULL) { /* -s, no -d and one string */
maketrans("", "", map);
makeset(str1, squeeze);
} else { /* -s, no -d and two strings */
maketrans(str1, str2, map);
makeset(str2, squeeze);
}
else { /* no -s, no -d */
if (str2 == NULL)
usage();
maketrans(str1, str2, map);
}
scan();
return (0);
}
/*
* Die simply sends an error message to stderr and exits.
*/
void
die(str)
char *str;
{
fprintf(stderr, "%r\n", &str);
exit(1);
}
/*
* Usage gives a usage error message and exits.
*/
void
usage()
{
die("usage: tr [-cds] [from_list [to_list]]");
}
/*
* Scan does the acutal copying. It deletes any input characters in
* the set `delete'. It then transforms input characters to output
* characters useing the mapping `map'. Finally, it changes multiple
* occurances of output characters in the set `squeeze' to single
* occurances.
*/
void
scan()
{
register int ch,
lastch;
lastch = EOF;
while ((ch=getchar()) != EOF) {
if (delete[ch])
continue;
ch = map[ch];
if (ch == lastch && squeeze[ch])
continue;
putchar(ch);
lastch = ch;
}
}
/*
* Makeset sets the array of bools `set' (indexed by chars) such that
* the i'th entry is TRUE iff character i appears in the character
* list `str'.
*/
void
makeset(str, set)
unsigned char *str;
register bool *set;
{
register bool *rp;
register int n;
list cl;
for (rp=set, n=CSIZE; --n >= 0;)
*rp++ = FALSE;
startlist(&cl, str);
while ((n=nextchar(&cl)) != EOF)
set[n] = TRUE;
return;
}
/*
* Maketrans sets the array of chars `map' (indexed by chars) to
* the mapping which converts all characters in the character list
* `str1' to the corresponding character in the character list `str2'.
* If `str2' is short, the last character is duplicated.
*/
void
maketrans(str1, str2, map)
unsigned char *str1,
*str2,
*map;
{
register unsigned char *rp;
register int n,
m;
list l1,
l2;
bool extra;
startlist(&l1, str1);
startlist(&l2, str2);
for (rp=map, n=0; n < CSIZE; ++n)
*rp++ = n;
rp = map;
n = nextchar(&l2);
if (n == EOF) {
if (nextchar(&l1) != EOF)
die("Second string empty");
return;
}
for (extra=FALSE; (m=nextchar(&l1)) != EOF;) {
rp[m] = n;
if (not extra) {
m = nextchar(&l2);
if (m != EOF)
n = m;
else
extra = TRUE;
}
}
if (not extra && nextchar(&l2) != EOF)
die("Extra characters in second string");
return;
}
/*
* Cstr returns a string which is the compliment of the string `str'.
*/
unsigned char *
cstr(str)
unsigned char *str;
{
register unsigned char *rp;
register bool *sp;
register int n;
bool set[CSIZE];
unsigned char *res;
makeset(str, set);
res = rp = (unsigned char *)alloc(3 + 2 + CSIZE + 1);
sp = set;
if (not *sp++) { /* handle EOS specially */
*rp++ = '\\';
*rp++ = '0';
*rp++ = '0';
*rp++ = '0';
}
for (n=0; ++n < CSIZE;)
if (not *sp++) {
if (n == '\\' || n == '-')
*rp++ = '\\';
*rp++ = n;
}
*rp++ = EOS;
return ((unsigned char *)ralloc(res, rp - res));
}
/*
* Startlist sets the list pointed to by `lp' to the string `str'.
*/
void
startlist(lp, str)
register list *lp;
unsigned char *str;
{
lp->l_next = lp->l_start = str;
lp->l_inr = FALSE;
}
/*
* Nextchar returns the next character from the list pointed to by
* `lp'. This includes backslash protection and character ranges.
* When there are no more characters, it returns EOF.
*/
int
nextchar(lp)
register list *lp;
{
register int res;
int getprot();
if (lp->l_inr) {
res = lp->l_rnext;
lp->l_inr = ++lp->l_rnext <= lp->l_rhi;
return (res);
}
res = *lp->l_next++;
if (res == EOS)
return (EOF);
if (res == '\\')
res = getprot(lp);
if (*lp->l_next != '-')
return (res);
++lp->l_next;
lp->l_rnext = res;
res = *lp->l_next++;
if (res == EOS)
die("Unexpected end of character list in `%s'", lp->l_start);
if (res == '\\')
res = getprot(lp);
if (lp->l_rnext > res)
die("Bad character range in `%s'", lp->l_start);
lp->l_rhi = res;
res = lp->l_rnext;
lp->l_inr = ++lp->l_rnext <= lp->l_rhi;
return (res);
}
/*
* Getprot is used to get a backslash protected character from the
* character list pointed to by `lp'.
*/
int
getprot(lp)
register list *lp;
{
register unsigned char ch;
register unsigned n;
int m;
ch = *lp->l_next++;
switch (ch) {
case EOS:
die("Unexpected end of character list in `%s'", lp->l_start);
case 'r':
return ('\r');
case 'b':
return ('\b');
case 't':
return ('\t');
case 'n':
return ('\n');
case 'f':
return ('\f');
default:
break;
}
if ('0' > ch || ch > '7')
return (ch);
n = ch - '0';
ch = *lp->l_next;
for (m=MAXDIG; --m > 0 && '0' <= ch && ch <= '7'; ch=*++lp->l_next)
n = n*8 + ch-'0';
if (n >= CSIZE)
die("Illegal character constant in `%s'", lp->l_start);
return (n);
}
/*
* Alloc is simply an interface to malloc which does not return if
* there is no space.
*/
char *
alloc(len)
unsigned len;
{
register char *res;
extern char *malloc();
res = malloc(len);
if (res == NULL)
die("Out of space");
return (res);
}
/*
* Ralloc is simply an interface to realloc which does not return if
* there is no space.
*/
char *
ralloc(cp, len)
char *cp;
unsigned len;
{
register char *res;
extern char *realloc();
res = realloc(cp, len);
if (res == NULL)
die("Out of space");
return (res);
}