⟦80bff4faf⟧

TextFile

#include <stdio.h>
#include <ctype.h>

#define DATA
#include "phoneme.h"
#undef DATA

/*
 * xlate_line() - given an English-text phrase or word,
 * Translate that thing into a phoneme-list.
 *
 * xlate_line() returns a pointer to the *static* resultant phoneme-list.
 */

short *
xlate_line(text)
char *text;	/* English text to convert */
{
    static short phonelist[MAXPHONES];
    char word[1 + MAXPHONES + 1];	/* the current parsed word	*/
    short *nxtphones;
    char *dp;
    short *xlate_word();

    nxtphones = phonelist;
    word[0] = ' ';
    while (*text) {
	while (*text && !(isalpha(*text) || *text == '\'')) {
	    ++text;
	}
	dp = &word[1];
	if (!*text) break;			/* Loop Exit */

	while (isalpha(*text) || *text == '\'') {
	    if (islower(*text)) {
		*dp = toupper(*text);
	    } else {
		*dp = *text;
	    }
	    ++dp, ++text;
	}
	*dp++ = ' ';
	*dp = '\0';
	nxtphones = xlate_word(word, nxtphones);
    }
    *nxtphones = P_end;
    return(phonelist);
}

/*
 * xlate_word() - translate the given English word into a phoneme stream.
 * The word has the following form:
 *	' [-A-Z']* '
 * That is, it begins and ends with a space and it contains only
 * upper-case letters, apostrophes, and hyphens.
 */

short *		/* where to put any following phonemes (or a terminator) */
xlate_word(word, phonedst)
char *word;
short *phonedst;	/* where to put the new phonemes	*/
{
    char *apply1rule();
    short *newphones;	/* array of phonemes to append */

    ++word; /* Skip the initial blank */
    while (*word) {
	word = apply1rule(word, &newphones);
	while (*newphones != P_end) {
	    *phonedst++ = *newphones++;
	}
    }
    return(phonedst);
}

/*
 * apply1rule() - apply the appropriate translation rule to the start of
 * the word provided, setting a pointer to the resultant phonemes and
 * returning a pointer to the unconverted part of the word.
 */

char *		/* returns the place to convert next	*/
apply1rule(word, newphp)
char *word;	/* the part of the word to start in	*/
short **newphp;	/* where to put a pointer to the set of translated phonemes */
{
    int rtype;		/* rule type to use				*/
    struct rule *rule;	/* the current rule being tested		*/
    register char *cp;	/* temp pointer for matching			*/
    register char *rem;	/* points to the first char past the match	*/
    static short firstcall = TRUE;	/* "first call to this routine"	*/

    static short nosym = P_end;	/* an empty phoneme list		*/

    if (firstcall) {
	firstcall = FALSE;
	ruleinit();
    }

    if (isupper(*word)) {
	rtype = (int) (*word - 'A') + RIDX_A;
    } else {
	rtype = RIDX_PUNC;
    }
    for (rule = Rules[rtype]; rule->match; ++rule) {
	for (cp = rule->match, rem = word; *cp; cp++, rem++) {
	    if (*cp != *rem) break;
	}
	if (*cp) continue; /* failed to match */

	if (!leftmatch(rule->prefix, rule->preflen, word - 1)) continue;
	if (!rightmatch(rule->suffix, rule->sufflen, rem)) continue;

	*newphp = rule->outsyms;
	return(rem);
    }

    fprintf(stderr, "Error: Can't find rule for: '%c' in \"%s\"\n",
     *word, word);
    *newphp = &nosym;
    return(word + 1);	/* Skip the annoyance */
}


int
leftmatch(pattern, patlen, context)
char *pattern;	/* first char of pattern to match in text */
short patlen;	/* strlen(pattern)	*/
char *context;	/* last char of text to be matched */
{
    char *pat;
    char *text;
    int count;

    if (!*pattern) return(TRUE);	/* null string matches any context */

    /* point to last character in pattern string */
    count = patlen;
    pat = pattern + (count - 1);

    text = context;

    for (; count > 0; pat--, count--) {
	switch (*pat) {
	case '\'':
	case ' ':
	case 'A':
	case 'B':
	case 'C':
	case 'D':
	case 'E':
	case 'F':
	case 'G':
	case 'H':
	case 'I':
	case 'J':
	case 'K':
	case 'L':
	case 'M':
	case 'N':
	case 'O':
	case 'P':
	case 'Q':
	case 'R':
	case 'S':
	case 'T':
	case 'U':
	case 'V':
	case 'W':
	case 'X':
	case 'Y':
	case 'Z':
	    if (*pat != *text) return(FALSE);
	    text--;
	    continue;

	case '#':	/* One or more vowels */
	    if (!isvowel(*text)) return(FALSE);
	    text--;

	    while (isvowel(*text)) text--;
	    break;

	case ':':	/* Zero or more consonants */
	    while (isconsonant(*text)) text--;
	    break;

	case '^':	/* One consonant */
	    if (!isconsonant(*text)) return(FALSE);
	    text--;
	    break;

	case '.':	/* B, D, V, G, J, L, M, N, R, W, Z */
#ifdef NOTDEF
	    if (*text != 'B' && *text != 'D' && *text != 'V'
	      && *text != 'G' && *text != 'J' && *text != 'L'
	      && *text != 'M' && *text != 'N' && *text != 'R'
	      && *text != 'W' && *text != 'Z') {
		return FALSE;
	    }
	    text--;
#endif
	    switch (*text) {
	    case 'B':
	    case 'D':
	    case 'V':
	    case 'G':
	    case 'J':
	    case 'L':
	    case 'M':
	    case 'N':
	    case 'R':
	    case 'W':
	    case 'Z':
		text--;
		break;
	    default:
		return(FALSE);
	    }
	    break;

	case '+':	/* E, I or Y (front vowel) */
#ifdef NOTDEF
	    if (*text != 'E' && *text != 'I' && *text != 'Y') return FALSE;
	    text--;
#endif
	    switch(*text) {
	    case 'E':
	    case 'I':
	    case 'Y':
		text--;
		break;
	    default:
		return(FALSE);
	    }
	    break;

	default:
	    fprintf(stderr, "Bad char in left rule: '%c'\n", *pat);
	    return(FALSE);
	}
    }
    return(TRUE);
}


int
rightmatch(pattern, patlen, context)
char *pattern;	/* first char of pattern to match in text */
short patlen;	/* strlen(pattern) [ignored]		*/
char *context;	/* last char of text to be matched */
{
    char *pat;
    char *text;

    if (!*pattern) return(TRUE);	/* null string matches any context */

    pat = pattern;
    text = context;

    for (pat = pattern; *pat != '\0'; pat++) {
	switch (*pat) {
	case '\'':
	case ' ':
	case 'A':
	case 'B':
	case 'C':
	case 'D':
	case 'E':
	case 'F':
	case 'G':
	case 'H':
	case 'I':
	case 'J':
	case 'K':
	case 'L':
	case 'M':
	case 'N':
	case 'O':
	case 'P':
	case 'Q':
	case 'R':
	case 'S':
	case 'T':
	case 'U':
	case 'V':
	case 'W':
	case 'X':
	case 'Y':
	case 'Z':
	    if (*pat != *text) return(FALSE);
	    text++;
	    continue;

	case '#':	/* One or more vowels */
	    if (!isvowel(*text)) return FALSE;
	    text++;

	    while (isvowel(*text)) text++;
	    break;

	case ':':	/* Zero or more consonants */
	    while (isconsonant(*text)) text++;
	    break;

	case '^':	/* One consonant */
	    if (!isconsonant(*text)) return FALSE;
	    text++;
	    break;

	case '.':	/* B, D, V, G, J, L, M, N, R, W, Z */
#ifdef NOTDEF
	    if (*text != 'B' && *text != 'D' && *text != 'V'
	      && *text != 'G' && *text != 'J' && *text != 'L'
	      && *text != 'M' && *text != 'N' && *text != 'R'
	      && *text != 'W' && *text != 'Z') {
		return(FALSE);
	    }
	    text++;
#endif
	    switch (*text) {
	    case 'B':
	    case 'D':
	    case 'V':
	    case 'G':
	    case 'J':
	    case 'L':
	    case 'M':
	    case 'N':
	    case 'R':
	    case 'W':
	    case 'Z':
		text++;
		break;
	    default:
		return(FALSE);
	    }
	    break;

	case '+':	/* E, I or Y (front vowel) */
#ifdef NOTDEF
	    if (*text != 'E' && *text != 'I' && *text != 'Y') return(FALSE);
	    text++;
#endif
	    switch(*text) {
	    case 'E':
	    case 'I':
	    case 'Y':
		text++;
		break;
	    default:
		return(FALSE);
	    }
	    break;

	case '%':	/* ER, E, ES, ED, ING, ELY (a suffix) */
	    if (*text == 'E') {
		text++;
		if (*text == 'L') {
		    text++;
		    if (*text == 'Y') {
			text++;
			break;
		    } else {
			text--; /* Don't gobble L */
			break;
		    }
		} else if (*text == 'R' || *text == 'S' || *text == 'D') {
		    text++;
		}
		break;
	    } else if (*text == 'I') {
		text++;
		if (*text == 'N') {
		    text++;
		    if (*text == 'G') {
			text++;
			break;
		    }
		}
		return(FALSE);
	    }
	    return(FALSE);

	default:
	    fprintf(stderr, "Bad char in right rule:'%c'\n", *pat);
	    return(FALSE);
	}
    }
    return(TRUE);
}

/*
 * ruleinit() - initialize the remaining fields of the phoneme rule table.
 */

ruleinit()
{
    struct rule *rule;
    int rtype;

    for (rtype = 0; rtype < RULECNT; ++rtype) {
	for (rule = Rules[rtype]; rule->match; ++rule) {
	    rule->preflen = strlen(rule->prefix);
	    rule->sufflen = strlen(rule->suffix);
	}
    }
}

int
isvowel(chr)
char chr;
{
#ifdef NOTDEF
    return(chr == 'A' || chr == 'E' || chr == 'I' || chr == 'O' || chr == 'U');
#endif
    switch (chr) {
    case 'A':
    case 'E':
    case 'I':
    case 'O':
    case 'U':
	return(TRUE);
    }
    return(FALSE);
}

int
isconsonant(chr)
char chr;
{
    return(isupper(chr) && !isvowel(chr));
}

/*
 * outphonemes() - output the given P_end-terminated array of phonemes.
 */

outphonemes(php)
short *php;		/* phoneme pointer	*/
{
    while (*php != P_end) {
	outstring(phochars[*php]);
	++php;
    }
}

outstring(string)
char *string;
{
    while (*string != '\0') fputc(*string++, stderr);
}
DataMuseum.dk

DKUUG/EUUG Conference tapes

⟦80bff4faf⟧ TextFile

Derivation

TextFile