DataMuseum.dk

Presents historical artifacts from the history of:

DKUUG/EUUG Conference tapes

This is an automatic "excavation" of a thematic subset of
artifacts from Datamuseum.dk's BitArchive.

See our Wiki for more about DKUUG/EUUG Conference tapes

Excavated with: AutoArchaeologist - Free & Open Source Software.


top - metrics - download
Index: T t

⟦d8eb2dc41⟧ TextFile

    Length: 7877 (0x1ec5)
    Types: TextFile
    Names: »tibdex.c«

Derivation

└─⟦52210d11f⟧ Bits:30007239 EUUGD2: TeX 3 1992-12
    └─⟦c319c2751⟧ »unix3.0/TeX3.0.tar.Z« 
        └─⟦036c765ac⟧ 
            └─⟦this⟧ »TeX3.0/TeXcontrib/tib/tibdex.c« 
└─⟦060c9c824⟧ Bits:30007080 DKUUG TeX 2/12/89
    └─⟦this⟧ »./tex82/TeXcontrib/tib/tibdex.c« 
└─⟦52210d11f⟧ Bits:30007239 EUUGD2: TeX 3 1992-12
    └─⟦63303ae94⟧ »unix3.14/TeX3.14.tar.Z« 
        └─⟦c58930e5c⟧ 
            └─⟦this⟧ »TeX3.14/TeXcontrib/tib/tibdex.c« 

TextFile

/*
        Tibdex - makes an inverted index for tib
                 a slight modification of Invert of bib

    input:  records of lines, separated by blank lines
    output: key:file1 start/length ... start/length:file2 start/length ...

Note to non-unix users:
    This program uses a unix command "sort" which sorts records in a file 
    "tmp_file" with the following sort key: 
          ASCII sort on the first field (fields are separated by spaces),
          ASCII sort on the second field,
          numeric sort on the third record,
          numeric sort on the fourth record,
    removing duplicates, and then writes to the same file. The use of "sort"
    is isolated by the "#define SORT_IT" statement just below.  If "sort" 
    is unavailable, replace the sort in the definition of "SORT_IT" by some 
    equivalent sort.  If the format of "SORT_IT" is unacceptable, modify 
    appropriately the one place further below where "SORT_IT" is used.
    It may not be necessary to remove duplicates.
     
*/

#define SORT_IT \
  sprintf(sortcmd,"sort -u +0 -1 +1 -2 +2n -3 +3n %s -o %s",tmp_file,tmp_file);\
    system(sortcmd);


#include "stdio.h"
#include "tib.h"
#define isnull(x)  (*(x) == NULL)
#define makelow(c) ('A'<=(c) && (c)<='Z' ? (c)-'A'+'a' : c)

char    headerline[240] = " ";/* header line -- list of files            */
int     max_kcnt = 100;     /*  max number of keys                      */
int     max_klen =   6;     /*  max length of keys                      */
char    *ignore =           /*  string of line starts to ignore         */
            "CNOPVcnopv\\\%";
char    *common = COMFILE;  /*  name of file of common words            */
char    *INDEX = INDXFILE;  /*  name of output file                     */
char    tmp_file[120];       /*  name of temporary file                  */
char    dirsp2[]=DIRSEP;    /*  directory separator character           */
char    optch2[]=OPTCH;     /*  option character on call                */
int	silent = 0;	    /*  0 => statistics printed			*/
			    /*  1 => no statisitics printed		*/

long int nextrecord(), recsize(), nextline();
char sortcmd[MAXSTR];

int     argc;
char    **argv;

main(argcount,arglist)
int argcount;
char **arglist;
{   char            filename[MAXSTR], *pcom, *getenv(), *ptemp;
    FILE            *input, *output;
    long int        start,length;
    char            word[MAXSTR];
    int             kcnt;
    char            tag_line[MAXSTR];
    char            argchk[3];

    long int	    records = 0;  /*  number of records read           */
    long int	    keys    = 0;  /*  number of keys read (occurences) */
    long int	    distinct;     /*  number of distinct keys          */
    long int	    shorten();
   int i, strcmp();

   /* header */
   for (i = 1; i < argcount; i++) {
      strcpy(argchk,OPTCH);
      strcat(argchk,"z");
      if (strcmp(arglist[i],argchk) == 0)
         silent = true;
   }
   if (silent == false)
      fprintf (stderr, "Tibdex -- version %s, released %s.\n", VERSION, RDATE);

    /* get file names from environment */
    pcom = getenv("COMFILE");
    if (pcom != NULL)
       strcpy(common,pcom);
    ptemp = getenv("TMPDIR");
    if (ptemp != NULL) {
       strcpy(tmp_file,ptemp);
       strcat(tmp_file,dirsp2);
       strcat(tmp_file,"tibdXXXXXX");
    }
    else
       strcpy(tmp_file,INVTEMPFILE);

    argc= argcount-1;
    argv= arglist+1;
    mktemp(tmp_file);
    output= fopen(tmp_file,"w");
    if (output == NULL) {
       fprintf(stderr, "tibdex: can't open temporary output file\n");
       exit(1);
    }

    for ( flags() ; argc>0 ; argc--, argv++ ,flags() )
    {   /* open input file              */
         strcpy(filename, *argv);
         input = fopen(filename, "r");
         if (input == NULL) {
            strcat(filename, ".ref");
            input=fopen(filename, "r");
            if (input == NULL) {
               fprintf(stderr, "can't open %s or %s\n", *argv, filename);
               exit(1);
               }
            }
            strcat(headerline, " ");
            strcat(headerline, filename);
            start=      0L;
            length=     0L;

        for(;;) /* each record  */
        {   /* find start of next record (exit if none)     */
                start= nextrecord(input,start+length);
                if (start==EOF)   break;
            records++;
	    kcnt= 0;
            length= recsize(input,start);
            sprintf(tag_line, " %s %D %D\n", filename, start, length);

            while (ftell(input) < start+length && kcnt < max_kcnt)
            {   getword(input,word,ignore);
                makekey(word,max_klen,common);
                if (!isnull(word))
                {   fputs(word,output); fputs(tag_line,output);
                    kcnt++; keys++;
                }
            }
        }
        fclose(input);
    }
    fclose(output);

    SORT_IT

    distinct = shorten(tmp_file,INDEX);
    if( silent == 0 )
	fprintf(stderr,
	    "%D documents   %D distinct keys  %D key occurrences\n",
	    records, distinct, keys);
}



/*  Flag    Meaning                             Default
    -ki     Keys per record                     100
    -li     max Length of keys                  6
    -%str   ignore lines that begin with %x     CNOPVXcnopv
            where x is in str
            str is a seq of chars
    -cfile  file contains Common words          /??????/common
            do not use common words as keys
    -pfile  name of output file                 INDEX
    -s	    do not print statistics		statistics printed
*/

#define    operand     (strlen(*argv+2)==0 ? (argv++,argc--,*argv) : *argv+2)

flags()
{   for (; argc>0 && *argv[0]==optch2[0];  argc--,argv++)
    {   switch ((*argv)[1])
        {   case 'k':   max_kcnt= atoi(operand);
                        break;
            case 'l':   max_klen= atoi(operand);
                        break;
            case 'c':   common=  operand;
                        break;
            case '%':   ignore=  *argv+2;
                        break;
            case 'p':   INDEX=  operand;
                        break;
	    case 'z':	silent= 1;
			break;
            default:    fprintf(stderr, "unknown flag '%s'\n", *argv);
        }
    }
}


/*  shorten(inf,outf): file "inf" consists of lines of the form:
        key file start length
    sorted by key and file.  replace lines with the same key
    with one line of the form:
        key:file1 start/length ... start/length:file2 start/length ...
    rename as file "outf"
    returns number of lines in output
*/
long shorten(inf,outf)
char *inf, *outf;
{   FILE *in, *out;
    char line[MAXSTR];
    char key[MAXSTR],  newkey[MAXSTR],
         file[MAXSTR], newfile[MAXSTR];
    long int start, length;
    long int lines = 0;

    strcpy(file,"");
    strcpy(key,"");
    in=  fopen(inf, "r");
    out= fopen(outf, "w");
    if (in==NULL || out==NULL)
    {   fprintf(stderr, "tibdex: error in opening file for compression\n");
        return(1);
    }

    fputs(headerline,out);
    fprintf(out, "\n");
    getline(in,line);
    sscanf(line,"%s%s%ld%ld", key, file, &start, &length);
    fprintf(out, "%s :%s %D/%D", key, file, start, length);
    for ( getline(in, line) ; !feof(in);  getline(in, line))
    {   sscanf(line,"%s%s%ld%ld", newkey, newfile, &start, &length);
        if (strcmp(key,newkey)!=0)
        {   strcpy(key, newkey);
            strcpy(file, newfile);
            fprintf(out, "\n%s :%s %D/%D",  key, file, start, length);
	    lines++;
        }
        else if (strcmp(file,newfile)!=0)
        {   strcpy(file,newfile);
            fprintf(out, ":%s %D/%D", file, start, length);
        }
        else
            fprintf(out, " %D/%D", start, length);
    }
    fprintf(out, "\n");
    lines++;

    fclose(in); fclose(out);
    unlink(inf);
    return (lines);
}