/* *********************************************************************
			newdic.cpp
			creates a new dictionary output for including in other programs
			Input is a sorted list of lower case words, one line per word.
			Output is a data define suitable for including in program.
******************************************************************** */
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <time.h>
const int MAX_PER_LINE = 15; // maximum data entries in a line
FILE  *IPF, *OTF;
int line_buf [MAX_PER_LINE];
int item_cnt = 0; // number of items stored.
int first_flag = 1;
long int byte_count = 0;
char one_let [26] = {0};
static short unsigned int az_idx [26*26] = {0}; // index array.
void out_byte (int);
void flush (void);
int main (int num_arg, char * args[]){
	const int MAX_CHR = 32;
	char in_wrd [MAX_CHR+2];
	time_t Curent_time = time(NULL);
	char in_wrd_x [MAX_CHR+2];
	char pv_wrd [MAX_CHR+2] = "";
	char time_ascii [30];
	int c1, c2, idx;
	int cur_idx = -1;
	int j, num_same;
	struct tm *loctime;
	int wrd_sz;
	// first parameter is input dictionary name
	if (num_arg != 3) {
		printf ("Needs two file names\n");
		return 1;
	}
	if ((IPF= fopen (args[1],"rt")) == NULL) {
		printf("Unable to open input file %s \n",args[1]);
		return 1;
	}
	if((OTF = fopen(args[2],"wt")) == NULL){
		printf("Unable to open output file %s \n",args[2]);
		return 1;
	}
	fprintf(OTF, "/* New format Dictionary produced by NEWDIC.CPP */\n");
	strcpy (time_ascii,ctime(&Curent_time));
	// remove CR/LF
	time_ascii[24] = 0;
	fprintf(OTF,"/* Produced on %s */\n",time_ascii);
   fprintf(OTF,"/* Source file: %s */\n",args[1]);
	fprintf(OTF, " const unsigned char dic_data_buf [] = { \n   ");
	// loop round reading dictionary.
	long int di;
	char chas;
	int first_byte;
	int i;
	for (di=1;;di++,strcpy (pv_wrd, in_wrd_x)) {
		if (fgets (in_wrd, MAX_CHR+1,IPF) == NULL) break;
		// remove CR/LF
		int s_len = strlen(in_wrd);
		if (s_len != 0) in_wrd[s_len-1] = 0;
		if (strcmp(in_wrd,pv_wrd) < 0){
			printf("Words out of sequence in input dictionary\n");
         printf("Word number = %di, Value = %s \n",di,in_wrd);
         return 1;
      }
      // check word is valid.
      wrd_sz = strlen(in_wrd);
      for (i = 0;i < wrd_sz; i ++){
         chas = in_wrd[i];
         if (!isalpha(chas)) {
            printf("Non-alphabetic letter in word %s \n",in_wrd);
            return 1;
         }
         if (!islower(chas)) {
            printf("Not lower case letter in word %s\n",in_wrd);
            return 1;
         }
         in_wrd_x[i] = chas;
      }
      in_wrd_x [wrd_sz] = 0; // terminate word
      if (wrd_sz < 2) {
         if (wrd_sz == 1){
            one_let [in_wrd_x[0] - 'a'] = 1;
         }
         continue;
      }
      // update index count.
      c1 = in_wrd_x [0] - 'a';
      c2 = in_wrd_x [1] - 'a';
      idx = c1*26 + c2;
      int num_same = 0;
      if (idx != cur_idx) { // new first two letters
         if (cur_idx != -1) az_idx [cur_idx] = byte_count;
         byte_count = 0;
      }
      else { // not a new index
         int ij = 2;
         for (;ij < wrd_sz;ij++){
            if (in_wrd_x [ij] ==  pv_wrd[ij]) {
               num_same ++;
            }
            else{
               break;
            }
         }
         if (num_same > 7) num_same = 7; // maximum
      }
      // output word.
      // create first byte
      first_byte =  (wrd_sz - 2)*8 + num_same;
      out_byte (first_byte);
      for (j = 2+num_same ; j < wrd_sz; j++){
         out_byte (in_wrd_x [j]);
      }
      cur_idx = idx;
   }
   if (cur_idx != -1) az_idx [cur_idx] = byte_count;
   if (item_cnt != 0) flush();
   // close off data define.
   fprintf(OTF, "  }; \n");
   fprintf(OTF,
      "    const unsigned short int dic_data_idx [26*26+1] = {0\n   ");
   int jj;
   long int accum = 0; // accumulated counts.
   for (jj = 0; jj < 26*26; jj++){
      accum += az_idx [jj];
      if (accum > 65535) {
         printf("Word buffer too big > 65535-redesign!\n");
         return 1;
     }
      fprintf(OTF,",%5li",accum);
      if ((jj+1)%10 == 0) {
         fprintf (OTF,"\n   ");
      }
   }
   fprintf(OTF, " \n }; \n");
   // print out single letter table.
   fprintf(OTF,"  const char dic_one_let [26] = \n   {%i",one_let[0]);
   for (i=1;i<26;i++){
		fprintf(OTF,",%i",one_let[i]);
   }
	fprintf (OTF, "};\n");

   fclose (OTF);
   return 0;
}
/* *********************************************************************
  out_byte - stores output bytes in a list, when a line full flushes
                                 them out.
******************************************************************** */
void out_byte (int inpt){
   byte_count ++;
   if (byte_count > 65535) { // too big for short unsigned.
      printf("Dictionary table is too big\n");
      exit(1);
   }
   if (inpt >= 256) {
      printf ("System error 256\n");
      exit (1);
   }
   if (item_cnt == MAX_PER_LINE) flush();
   line_buf [item_cnt++] = inpt;
   return;
}
/* *******************************************************************
   flush() - clears byte buffer.
******************************************************************** */
void flush (void){
   int jj, val;
   for (jj= 0; jj < item_cnt; jj++){
      val = line_buf [jj];
      if (first_flag) {
         fprintf (OTF, "%i",line_buf[0]);
         first_flag = 0;
         continue;
      }
      if (val >= 'a' && val <= 'z') {
         fprintf (OTF,",'%c'",val);
      }
      else {
         fprintf(OTF, ", %i",val);
      }
   }
   fprintf(OTF, "\n   ");
   item_cnt = 0;
   return;
}

