|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T p
Length: 8873 (0x22a9) Types: TextFile Names: »phoneme.c«
└─⟦b20c6495f⟧ Bits:30007238 EUUGD18: Wien-båndet, efterår 1987 └─⟦this⟧ »EUUGD18/General/Autopun/phoneme.c«
#include <stdio.h> #include <ctype.h> #define DATA #include "phoneme.h" #undef DATA /* * xlate_line() - given an English-text phrase or word, * Translate that thing into a phoneme-list. * * xlate_line() returns a pointer to the *static* resultant phoneme-list. */ short * xlate_line(text) char *text; /* English text to convert */ { static short phonelist[MAXPHONES]; char word[1 + MAXPHONES + 1]; /* the current parsed word */ short *nxtphones; char *dp; short *xlate_word(); nxtphones = phonelist; word[0] = ' '; while (*text) { while (*text && !(isalpha(*text) || *text == '\'')) { ++text; } dp = &word[1]; if (!*text) break; /* Loop Exit */ while (isalpha(*text) || *text == '\'') { if (islower(*text)) { *dp = toupper(*text); } else { *dp = *text; } ++dp, ++text; } *dp++ = ' '; *dp = '\0'; nxtphones = xlate_word(word, nxtphones); } *nxtphones = P_end; return(phonelist); } /* * xlate_word() - translate the given English word into a phoneme stream. * The word has the following form: * ' [-A-Z']* ' * That is, it begins and ends with a space and it contains only * upper-case letters, apostrophes, and hyphens. */ short * /* where to put any following phonemes (or a terminator) */ xlate_word(word, phonedst) char *word; short *phonedst; /* where to put the new phonemes */ { char *apply1rule(); short *newphones; /* array of phonemes to append */ ++word; /* Skip the initial blank */ while (*word) { word = apply1rule(word, &newphones); while (*newphones != P_end) { *phonedst++ = *newphones++; } } return(phonedst); } /* * apply1rule() - apply the appropriate translation rule to the start of * the word provided, setting a pointer to the resultant phonemes and * returning a pointer to the unconverted part of the word. */ char * /* returns the place to convert next */ apply1rule(word, newphp) char *word; /* the part of the word to start in */ short **newphp; /* where to put a pointer to the set of translated phonemes */ { int rtype; /* rule type to use */ struct rule *rule; /* the current rule being tested */ register char *cp; /* temp pointer for matching */ register char *rem; /* points to the first char past the match */ static short firstcall = TRUE; /* "first call to this routine" */ static short nosym = P_end; /* an empty phoneme list */ if (firstcall) { firstcall = FALSE; ruleinit(); } if (isupper(*word)) { rtype = (int) (*word - 'A') + RIDX_A; } else { rtype = RIDX_PUNC; } for (rule = Rules[rtype]; rule->match; ++rule) { for (cp = rule->match, rem = word; *cp; cp++, rem++) { if (*cp != *rem) break; } if (*cp) continue; /* failed to match */ if (!leftmatch(rule->prefix, rule->preflen, word - 1)) continue; if (!rightmatch(rule->suffix, rule->sufflen, rem)) continue; *newphp = rule->outsyms; return(rem); } fprintf(stderr, "Error: Can't find rule for: '%c' in \"%s\"\n", *word, word); *newphp = &nosym; return(word + 1); /* Skip the annoyance */ } int leftmatch(pattern, patlen, context) char *pattern; /* first char of pattern to match in text */ short patlen; /* strlen(pattern) */ char *context; /* last char of text to be matched */ { char *pat; char *text; int count; if (!*pattern) return(TRUE); /* null string matches any context */ /* point to last character in pattern string */ count = patlen; pat = pattern + (count - 1); text = context; for (; count > 0; pat--, count--) { switch (*pat) { case '\'': case ' ': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': if (*pat != *text) return(FALSE); text--; continue; case '#': /* One or more vowels */ if (!isvowel(*text)) return(FALSE); text--; while (isvowel(*text)) text--; break; case ':': /* Zero or more consonants */ while (isconsonant(*text)) text--; break; case '^': /* One consonant */ if (!isconsonant(*text)) return(FALSE); text--; break; case '.': /* B, D, V, G, J, L, M, N, R, W, Z */ #ifdef NOTDEF if (*text != 'B' && *text != 'D' && *text != 'V' && *text != 'G' && *text != 'J' && *text != 'L' && *text != 'M' && *text != 'N' && *text != 'R' && *text != 'W' && *text != 'Z') { return FALSE; } text--; #endif switch (*text) { case 'B': case 'D': case 'V': case 'G': case 'J': case 'L': case 'M': case 'N': case 'R': case 'W': case 'Z': text--; break; default: return(FALSE); } break; case '+': /* E, I or Y (front vowel) */ #ifdef NOTDEF if (*text != 'E' && *text != 'I' && *text != 'Y') return FALSE; text--; #endif switch(*text) { case 'E': case 'I': case 'Y': text--; break; default: return(FALSE); } break; default: fprintf(stderr, "Bad char in left rule: '%c'\n", *pat); return(FALSE); } } return(TRUE); } int rightmatch(pattern, patlen, context) char *pattern; /* first char of pattern to match in text */ short patlen; /* strlen(pattern) [ignored] */ char *context; /* last char of text to be matched */ { char *pat; char *text; if (!*pattern) return(TRUE); /* null string matches any context */ pat = pattern; text = context; for (pat = pattern; *pat != '\0'; pat++) { switch (*pat) { case '\'': case ' ': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': if (*pat != *text) return(FALSE); text++; continue; case '#': /* One or more vowels */ if (!isvowel(*text)) return FALSE; text++; while (isvowel(*text)) text++; break; case ':': /* Zero or more consonants */ while (isconsonant(*text)) text++; break; case '^': /* One consonant */ if (!isconsonant(*text)) return FALSE; text++; break; case '.': /* B, D, V, G, J, L, M, N, R, W, Z */ #ifdef NOTDEF if (*text != 'B' && *text != 'D' && *text != 'V' && *text != 'G' && *text != 'J' && *text != 'L' && *text != 'M' && *text != 'N' && *text != 'R' && *text != 'W' && *text != 'Z') { return(FALSE); } text++; #endif switch (*text) { case 'B': case 'D': case 'V': case 'G': case 'J': case 'L': case 'M': case 'N': case 'R': case 'W': case 'Z': text++; break; default: return(FALSE); } break; case '+': /* E, I or Y (front vowel) */ #ifdef NOTDEF if (*text != 'E' && *text != 'I' && *text != 'Y') return(FALSE); text++; #endif switch(*text) { case 'E': case 'I': case 'Y': text++; break; default: return(FALSE); } break; case '%': /* ER, E, ES, ED, ING, ELY (a suffix) */ if (*text == 'E') { text++; if (*text == 'L') { text++; if (*text == 'Y') { text++; break; } else { text--; /* Don't gobble L */ break; } } else if (*text == 'R' || *text == 'S' || *text == 'D') { text++; } break; } else if (*text == 'I') { text++; if (*text == 'N') { text++; if (*text == 'G') { text++; break; } } return(FALSE); } return(FALSE); default: fprintf(stderr, "Bad char in right rule:'%c'\n", *pat); return(FALSE); } } return(TRUE); } /* * ruleinit() - initialize the remaining fields of the phoneme rule table. */ ruleinit() { struct rule *rule; int rtype; for (rtype = 0; rtype < RULECNT; ++rtype) { for (rule = Rules[rtype]; rule->match; ++rule) { rule->preflen = strlen(rule->prefix); rule->sufflen = strlen(rule->suffix); } } } int isvowel(chr) char chr; { #ifdef NOTDEF return(chr == 'A' || chr == 'E' || chr == 'I' || chr == 'O' || chr == 'U'); #endif switch (chr) { case 'A': case 'E': case 'I': case 'O': case 'U': return(TRUE); } return(FALSE); } int isconsonant(chr) char chr; { return(isupper(chr) && !isvowel(chr)); } /* * outphonemes() - output the given P_end-terminated array of phonemes. */ outphonemes(php) short *php; /* phoneme pointer */ { while (*php != P_end) { outstring(phochars[*php]); ++php; } } outstring(string) char *string; { while (*string != '\0') fputc(*string++, stderr); }