/*	CDICT:	Compress Dictionary utility program for
		MicroSPELL 2.0

		(C)opyright May 1987,1992 by Daniel Lawrence
		All Rights Reserved
*/

#include	<stdio.h>
#include	"dopt.h"
#include	"dsfx.h"

/* globals */

char mdfile[NSTRING];		/* main dictionary text file name */
char mcfile[NSTRING];		/* compressed dictionary file name */
FILE *mdptr = NULL;		/* main dictionary file pointer */
FILE *mcptr = NULL;		/* compressed dictionary file pointer */
int sflen[NSUFFIX];		/* length of suffixes */
long letter_offset[ALPHASIZE];	/* offsets in file to letters */
unsigned char lcase[128];	/* lower case table (quick!) */

main(argc, argv)

int argc;	/* # of command line arguments */
char **argv;	/* text of command line arguments */

{
	register char *word;		/* current word */
	register int suffix;		/* suffix index */
	register char cur_first_letter;	/* current first letter scanned */
	register int index;		/* loop index */
	long total_words;		/* total number of words in this dictionary */
	char lastword[NSTRING];		/* previous word in dictionary */
	char tempword[NSTRING];		/* temporary word in dictionary */
	char *nxtmword();

	printf("CDICT Dictionary Compression Utility for MicroSPELL v%s\n",
		VERSION);

	if (argc != 3) {
		help();
		exit(EXBADOPT);
	}

	strcpy(mdfile, argv[1]);
	strcpy(mcfile, argv[2]);

	if (mopen() != TRUE) {
		printf("%%Can not open text dictionary file\n");
		exit(EXMDICT);
	}

	/* init the lower case table */
	for (index = 0; index < 128; index ++)
		if ('A' <= index && index <= 'Z')
			lcase[index] = index - 'A' + 'a';
		else
			lcase[index] = index;

	/* open the output compressed dictionary file */
	mcptr = fopen(mcfile, "wb");
	if (mcptr == NULL) {
		printf("%%Can not open compressed dictionary output file\n");
		exit(EXMDICT);
	}

	/* position past character table */
	fwrite((char *)&(letter_offset[0]), sizeof(long), ALPHASIZE, mcptr);

	/* prepare the suffix length table */
	for (suffix = 0; suffix < NSUFFIX; suffix++)
		sflen[suffix] = strlen(sfx[suffix]);

	printf("[Compressing %s => %s]\n", mdfile, mcfile);
	lastword[0] = 0;	/* null last word */

	/* scan the dictionary, compressing */
	cur_first_letter = 0;
	total_words = 0L;
	word = nxtmword();
	while (word) {
		if (lcase[word[0]] != cur_first_letter) {
			cur_first_letter = lcase[word[0]];
			letter_offset[cur_first_letter - 'a'] = ftell(mcptr);
		}
		strcpy(tempword, word);
		cmpsword(lastword, word);
		total_words++;
		strcpy(lastword, tempword);
		word = nxtmword();
	}

	/* write out letter offset table to front of file */
	fseek(mcptr, 0, 0);
	fwrite((char *)&(letter_offset[0]), sizeof(long), ALPHASIZE, mcptr);

	/* close things up */
	mclose();
	fclose(mcptr);
	printf("[%ld words in dictionary compressed]\n", total_words);
}

help()		/* tell us about cdict... */

{
	printf("\nUsage:\n\n");
	printf("	CDICT <text dictionary> <compressed dictionary>\n");
}

mopen()		/* open the main dicionary */

{
	/* if it is already open, close it down */
	if (mdptr != NULL)
		fclose(mdptr);

	/* open up the text dictionary... */
	if ((mdptr = fopen(mdfile, "r")) == NULL)
		return(FALSE);

	return(TRUE);
}

mclose()	/* close the dictionary down */

{
	/* if it is already open, close it down */
	if (mdptr != NULL)
		fclose(mdptr);
	mdptr = NULL;
}

char *nxtmword()	/* get the next word from the main dictionary */

{
	static char word[NSTRING];	/* word to return */

	/* is it already closed? */
	if (mdptr == NULL)
		return(NULL);

	/* get the next word */
	if (fgets(word, NSTRING - 1, mdptr) == NULL) {
		/* no more left!!!! close out */
		fclose(mdptr);
		mdptr = NULL;
		return(NULL);
	}

	/* all's well, dump the return, any trailing spaces and
	   return the word */
	do
		word[strlen(word) - 1] = 0;
	while (word[strlen(word) - 1] == ' ');
	return(word);
}

cmpsword(lastword, word)	/* compress the given word */

char *lastword;		/* previous dictionary word */
char *word;		/* current dictionary word */

{
	register int index;	/* index into current word */
	register int same;	/* # of same characters */
	register int suffix;	/* suffix code */
	register int wlen;	/* length of current word */
	register int orig_first;	/* original first character */

	/* scan for common suffixes */
	wlen = strlen(word);
	for (suffix = 0; suffix < NSUFFIX; suffix++) {
		if (wlen < sflen[suffix])
			continue;
		if (strcmp(&word[wlen - sflen[suffix]], sfx[suffix]) == 0) {
			word[wlen - sflen[suffix]] = 0;	/* trunc it */
			break;
		}
	}

	/* If there is no suffix...suffix ends up as NSUFFIX */

	/* save the capitalization of the original lead char */
	word[0] = lcase[(orig_first = word[0])];

	/* scan for like beginning characters */
	index = 0;
	while (lastword[index] && lastword[index] == word[index])
		index++;

	same = index;
#if	ASCII
	suffix |= 128;
#endif

	if (orig_first == word[0])
		fprintf(mcptr, "%c%s%c", 'A'+same, &word[index], suffix);
	else
		fprintf(mcptr, "%c%s%c", 'A'+same+128, &word[index], suffix);
}

#if	CMS
#undef	fopen
/*	The IBM 30xx likes to tell us when file opens
	fail...it's too chatty....I like to handle these myself	*/

FILE *cmsopen(file, mode)

char *file;	/* name of file to open */
char *mode;	/* mode to open it in */

{
	quiet(1);
	return(fopen(file,mode));
}
#endif

