|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T a
Length: 13592 (0x3518) Types: TextFile Names: »autopun.c«
└─⟦b20c6495f⟧ Bits:30007238 EUUGD18: Wien-båndet, efterår 1987 └─⟦this⟧ »EUUGD18/General/Autopun/autopun.c«
/* * autopun - Phrase reparser. * * Usage: * autopun [-e English_dict] [-p Phone_dict] * or * autopun [-e English_dict] -c -p Phone_dict * * Given an English phrase (as stdin), autopun prints (to stdout) a table * that can be used to create phonemically-similar phrases. E.g., * "Happy Birthday" can be recast as "Hub pip Earth tee". */ #include <stdio.h> #include "phoneme.h" #define TRUE 1 #define FALSE 0 char *cmd; /* name of this program */ int phread = FALSE; /* "read the phone dict" */ int phcreate = FALSE; /* "create the phone dict" */ char *engldict = "/usr/dict/words"; /* English dictionary file */ char *phonedict; /* phonemic dictionary file */ char *indictname; /* name of the dict being read */ FILE *indict; /* the open dict being read */ FILE *outdict; /* the open dict being written */ /* ...name == phonedict */ /* * struct amatch - representation of the phonemic match of a word with * some part of the phrase. */ struct amatch { struct amatch *next; /* next word matching at this position */ char *text; /* English text of the matching word */ short nextpos; /* position of phoneme after this word */ }; /* * struct posinfo - information about one phonemic position in the phrase. */ struct posinfo { struct amatch *wlist; /* list of words that match at this point */ unsigned num_parents; /* # of words leading directly to this point */ }; /* * posinfo[] - indexed by phoneme position in the phrase, */ #define MAX_PHONES 300 /* Max # of phonemes in a phrase */ struct posinfo posinfo[MAX_PHONES]; short target[MAX_PHONES]; /* phonemes of the phrase to reparse */ int targlen; /* # of phonemes in target[] (less P_end) */ /* * phmap[] - phoneme map. Maps a set of similar-sounding phonemes into one. * Used in preprocessing strings before comparison. */ short phmap[P_NUM] = { P_end, /* P_end */ P_IY, /* P_IY */ P_IY, /* P_IH */ P_EY, /* P_EY */ P_EY, /* P_EH */ P_AE, /* P_AE */ P_AE, /* P_AA */ P_AE, /* P_AO */ P_AE, /* P_OW */ P_AE, /* P_UH */ P_UW, /* P_UW */ P_UW, /* P_ER */ P_AE, /* P_AX */ P_AE, /* P_AH */ P_AY, /* P_AY */ P_AE, /* P_AW */ P_OY, /* P_OY */ P_p, /* P_p */ P_p, /* P_b */ P_t, /* P_t */ P_t, /* P_d */ P_k, /* P_k */ P_g, /* P_g */ P_f, /* P_f */ P_f, /* P_v */ P_TH, /* P_TH */ P_f, /* P_DH */ P_s, /* P_s */ P_s, /* P_z */ P_s, /* P_SH */ P_s, /* P_ZH */ P_HH, /* P_HH */ P_m, /* P_m */ P_n, /* P_n */ P_n, /* P_NG */ P_l, /* P_l */ P_l, /* P_w */ P_y, /* P_y */ P_r, /* P_r */ P_CH, /* P_CH */ P_CH, /* P_j */ P_WH, /* P_WH */ P_end /* P_PAS */ }; main(argc, argv) int argc; char **argv; { char *cp; char line[300]; char *strrchr(); /* * get the basename of the command name, * for use in error messages. */ if ((cmd = strrchr(argv[0], '/'))) { ++cmd; } else { cmd = argv[0]; } while (++argv, --argc > 0) { cp = *argv; if (*cp == '-' && *(cp + 1) != '\0') { switch(*++cp) { case 'c': /* "create the phonemic dictionary */ /* (requires -p flag) */ phcreate = TRUE; break; case 'p': /* "filename of phonemic dictionary" */ if (++argv, --argc <= 0 || (**argv == '-' && *(*argv + 1) != '\0')) { bomb("missing -p filename"); } phonedict = *argv; break; case 'e': /* "filename of English dictionary" */ if (++argv, --argc <= 0) { bomb("missing -e filename"); } engldict = *argv; break; default: bomb("unknown switch `%c'", *cp); } } else { bomb("extra filename `%s'", *argv); } } if (phcreate && !phonedict) { bomb("missing -p flag"); } phread = (phonedict && !phcreate); /* * open up the necessary files: * Input is either an English or phonemic dictionary; * Output is a phonemic dictionary (if requested). */ if (phread) { indictname = phonedict; } else { indictname = engldict; } if (!(indict = fopen(indictname, "r"))) { fprintf(stderr, "%s: can't open \"%s\" -- ", cmd, indictname); perror(""); exit(1); } if (phcreate) { if (strcmp(indictname, phonedict) == 0) { bomb("can't overwrite `%s'", phonedict); } if (!(outdict = fopen(phonedict, "w"))) { fprintf(stderr, "%s: can't create \"%s\" -- ", cmd, phonedict); perror(""); exit(1); } } /* * Grab a phrase and process it. */ if (isatty(fileno(stdin))) { fputs("Enter English text: ", stderr); fflush(stderr); } if (fgets(line, 300, stdin)) { line[strlen(line) - 1] = '\0'; /* removes the terminating \n */ reparse(line); } /* * Close up shop, making sure that any I/O errors are reported */ if (ferror(indict)) { fprintf(stderr, "%s: problem reading \"%s\" -- ", cmd, indictname); perror(""); exit(1); } (void) fclose(indict); if (phcreate) { if (fclose(outdict) != 0) { fprintf(stderr, "%s: problem writing \"%s\" -- ", cmd, phonedict); perror(""); exit(1); } } exit(0); } /* * reparse() - given a line of English text, * Find and print the info necessary to reparse that line's phonemes * into other English phrases. */ reparse(text) char *text; { char dictword[MAX_PHONES]; /* an English word from the dictionary */ char *textcopy; /* dynamic copy of the text */ short phrase[MAX_PHONES]; /* the mapped, phonemic version of the phrase */ short testword[MAX_PHONES]; /* ditto for a word from the dictionary */ int twordlen; /* # of phonemes in testword[] (less P_end) */ register short *sp, *dp; /* temp source and dest phoneme pointers */ int idx; /* index where a match started */ short *xlate_line(); short *mapphrase(); char *strsave(); /* * Translate the input phrase and copy it to a safe place. */ sp = xlate_line(text); (void) mapphrase(sp); dp = target; while (*sp != P_end) { *dp++ = *sp++; } *dp = P_end; targlen = dp - &target[0]; /* * For each word in the dictionary, * Convert that word into phonemic codes; * Write the converted codes to the phonemic dictionary (if necessary); * Record where that word would fit into the input phrase. */ while (fgets(dictword, MAX_PHONES, indict)) { dictword[strlen(dictword) - 1] = '\0'; if (phread) { twordlen = encphones(dictword, testword); } else { sp = xlate_line(dictword); twordlen = mapphrase(sp) - sp; if (twordlen == 0) { continue; /* (loop leap) */ } dp = testword; while (*sp != P_end) { *dp++ = *sp++; } *dp = P_end; if (phcreate) { writephones(dictword, testword); } } /* * Search for and record matches until * one can't possibly exist (too few phonemes left). */ sp = target; dp = &target[targlen]; textcopy = (char *) 0; while (dp - sp >= twordlen && (idx = wordidx(sp, testword)) != -1) { sp += idx + 1; if (!textcopy) { textcopy = strsave(dictword); } recmatch((sp - 1) - target, twordlen, textcopy); } } prune(); saymatches(text); } /* * prune() - prune away useless matches: * remove potential matches that lead to unmatchable parts of the phrase; * remove unreachable match lists. */ prune() { int pos; struct amatch *prevm; struct amatch *curm; /* * note and remove all the matches that lead to an unmatchable point. */ for (pos = 0; pos < MAX_PHONES; ++pos) { posinfo[pos].num_parents = 0; } for (pos = MAX_PHONES - 1; pos >= 0; --pos) { prevm = (struct amatch *) 0; curm = posinfo[pos].wlist; while (curm) { /* * If this word leads us to the end, everything is o.k. * If this word leads us to a matchable point, * note that we can reach that point. * Otherwise, this word is a dead-end -- remove it. */ if (curm->nextpos >= targlen) { prevm = curm; } else if (posinfo[curm->nextpos].wlist) { ++posinfo[curm->nextpos].num_parents; prevm = curm; } else { if (!prevm) { posinfo[pos].wlist = curm->next; } else { prevm->next = curm->next; } /* (we should free curm here if we are reclaiming space) */ } curm = curm->next; } } /* * Find and remove each unreachable point in the phrase * (except the first one). * This traversal cascades forward. */ for (pos = 1; pos < MAX_PHONES; ++pos) { if (posinfo[pos].num_parents > 0) continue; for (curm = posinfo[pos].wlist; curm; curm = curm->next) { if (curm->nextpos >= targlen) continue; --posinfo[curm->nextpos].num_parents; } posinfo[pos].wlist = (struct amatch *) 0; /* (if we were reclaiming space, here's where we'd do it */ } } /* * saymatches() - print the phrase match information table. */ saymatches(text) char *text; /* the original text */ { int pos; int curcol; int addcols; struct amatch *curm; printf("%s\n", text); for (pos = 0; pos < MAX_PHONES; ++pos) { if (!posinfo[pos].wlist) continue; printf("%02d:\n", pos); curcol = 0; for (curm = posinfo[pos].wlist; curm; curm = curm->next) { addcols = 1 + strlen(curm->text) + 1 + 2; if (curcol + addcols >= 70) { printf("\n"); curcol = 0; } if (curcol == 0) { printf(" "); curcol += 1; } printf(" %s:", curm->text); if (curm->nextpos >= targlen) { printf("$ "); } else { printf("%02d", curm->nextpos); } curcol += addcols; } printf("\n"); } } /* * mapphrase() - given a phonetic word or phrase, map it in place via phmap[], * returning a pointer to the new end. */ short * mapphrase(pp) short *pp; /* a P_end-terminated word/phrase to map */ { short *dp; /* points to where to put the next phoneme */ dp = pp; while (*pp != P_end) { *dp = phmap[*pp]; if (*dp != P_end) ++dp; ++pp; } *dp = P_end; return(dp); } /* * wordidx() - given a phrase and a comparison word, * return the index in the phrase where the word was found (-1 if not found). * * Wordidx() assumes both the phrase and the word have been mapped by phmap[]. */ int wordidx(phrase, word) short *phrase; /* a P_end-terminated list of phonemes */ short *word; /* ditto */ { short *start; /* the starting phoneme being compared */ register short *pp; /* the current phrase phoneme being compared */ register short *wp; /* the current word phoneme being compared */ for (start = phrase; *start != P_end; ++start) { wp = word; pp = start; while (*wp != P_end) { if (*pp != *wp) break; ++pp, ++wp; } if (*wp == P_end) { return(start - phrase); } } return(-1); } /* * recmatch() - record a match. */ recmatch(pos, phlen, text) int pos; /* position of the match within the phrase */ int phlen; /* # of phonemes matched */ char *text; /* text that matched */ { struct amatch *prevm; struct amatch *newm; struct amatch *nextm; struct amatch *matchalloc(); newm = matchalloc(); newm->text = text; newm->nextpos = pos + phlen; prevm = (struct amatch *) 0; for (nextm = posinfo[pos].wlist; nextm && newm->nextpos < nextm->nextpos; prevm = nextm, nextm = nextm->next) { /* (empty body) */ } if (!prevm) { newm->next = posinfo[pos].wlist; posinfo[pos].wlist = newm; } else { newm->next = prevm->next; prevm->next = newm; } } /* * writephones() - write a list of phonemes to a file. */ writephones(ep, sp) char *ep; /* english text */ short *sp; /* corresponding phonemes */ { fputs(ep, outdict); putc(' ', outdict); while (*sp != P_end) { putc((int) *sp + (int) '!', outdict); ++sp; } putc('\n', outdict); if (ferror(outdict)) { fprintf(stderr, "Error: problem writing \"%s\" -- ", phonedict); perror(""); exit(1); } } /* * encphones() - encode ascii from a phoneme file into phonetic codes. */ int /* returns # of phonemes in word[] */ encphones(text, word) register char *text; /* encoded phonemes (less the newline) */ short *word; /* where to put the phonemes */ { register short *sp; char *strchr(); /* * separate the English text from its encoded form */ text = strchr(text, ' '); *text++ = '\0'; sp = word; while (*text) { *sp = (short) (*text - '!'); ++text, ++sp; } *sp = P_end; return(sp - word); } /* * matchalloc() - allocate a new match element. */ struct amatch * matchalloc() { #define MAX_MATCHES 1000 /* max # of matches in a phrase */ static struct amatch matchpool[MAX_MATCHES]; static struct amatch *nextpool = matchpool; if (nextpool >= &matchpool[MAX_MATCHES]) { fprintf(stderr, "Error: too many matches (over %d)\n", MAX_MATCHES); exit(1); } return (nextpool++); } /* * strsave() - copy the given string to a malloc'ed area, * returning the resultant pointer. */ char * strsave(s) char *s; { char *ret; char *malloc(); if (!(ret = malloc(strlen(s) + 1))) { fprintf(stderr, "Error: out of memory saving \"%s\"\n", s); exit(1); } (void) strcpy(ret, s); return(ret); } /* VARARGS 1 */ bomb(str, a1, a2, a3) char *str; int a1, a2, a3; { fprintf(stderr, "%s: ", cmd); fprintf(stderr, str, a1, a2, a3); fprintf(stderr, "\n"); fprintf(stderr, "Usage:\n %s [-c] [-e English_dict] [-p Phone_dict]\n", cmd); exit(1); }