/* LZW.C */
/* Lempel Ziv Welch */
/*
Decompreesion: 	read first input code   code -> OLDcode
		with CODE = code(K)	K -> output
					K -> FINchar

Next Code: 	read next input code    code - > INcode
		if no new code EXIT.
		if CODE not defined (special case)
			FINchar -> output
			OLDcode -> CODE
			code(OLDcode, FINchar -> INcode

Next Symbol:	if CODE = code(wK)	K -> stack
					code(w) -> code
					repeat Next Symbol
		if CODE = code(K)
			K -> output
			K -> FINchar
			do while stack not empty
				stack top -> output
				POP stack
			code(OLDcode, K) -> string table
			INcode -> OLDcode
			repeat Next Code
*/

/* *symtbl : symbol table for codes
             w    pre-code
             K    character
             bitsize (9-13 bits)
*/

/* Program Part */

#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <stdlib.h>

	int 	collision = 0;
	long 	ngetc = 1L;
#define dbputl() printf("[%1d][%d:%d][%d]\n",ngetc,bitsize,tblcnt,collision);

#define allocate(x,n,t) 	x = (t *) calloc(n,sizeof(t))
#define MaxBitSize 13 			/* for string table <= 15 */
#define MaxTblIdx 0x1FFF 		/* <- MaxBitSize */
#define HashShift MaxBitSize - 8
#define MaxTryOut 30 			/* collision try out */

#define ErrREAD  1
#define ErrWRITE 2
#define ErrMEMO  3
#define ErrNOLZ  4

#define Hint short
typedef struct { 	unsigned Hint w;
			unsigned char K;
			} Hchr; 	/* Prefix + char */

typedef struct Hidxst {		Hint 	idx;
				struct Hidxst *next;
	} Hidx;

unsigned Hint bitsize = 9;
unsigned Hint tblmax  = 0x1FF;
unsigned Hint tblcnt  = 256;
unsigned Hint tblwrd  = 0;
Hint tblbit = 16;
unsigned Hint stkmax  = 2048;
Hchr	*symtbl;
Hidx	*symidx;
FILE	*f1, *f2;

errexit(s1,s2)
char	*s1,*s2;
{
	printf(s1,s2);	exit(-1);
}             /* errexit */

unsigned Hint hash(s)
Hchr	s;
{
	return (((unsigned Hint)s.w)^(s.K << HashShift));
}                          /* hash */

Hint lookup(s,h)
Hchr s; Hint h;
{
  Hidx	*np;
  register Hint i;

  for (np=&symidx[h]; np; np=np->next)    {
	if( ((i=np->idx)==-1) || ((s.w==symtbl[i].w)&&(s.K==symtbl[i].K)) )
		return(i);
  }                               /*  for */
  return(-1);
}                                    /*  lookup */

install(s,h,idx)
Hchr	s; unsigned Hint h,idx;
{
	Hidx	*np,*mp;
	int	i,j,offset;

	mp=&symidx[h];
	if (mp->idx != -1) { /* collision */
		i = 0; j = h;
		offset=h ? MaxTblIdx - h : 1;
		do {
			j -= offset;
			if (j<0)   j += MaxTblIdx;
			np = &symidx[j];
		} while ((++i <= MaxTryOut) && (np->idx != -1));
		if (np->idx != -1)  {
	collision++;
			allocate(np,1,Hidx);	if (!np) return(ErrMEMO);
		} /* if */
		memcpy(np,mp,sizeof(Hidx));
		mp->next = np;
	} /* if */
	mp->idx = idx;
	memcpy(&symtbl[idx],&s,sizeof(s));
	return(0);
}                          /* install */

outkey(w)
unsigned Hint w;
{
	tblbit -= bitsize;
	if (tblbit>0) tblwrd |= (unsigned) w << tblbit;
	else if (tblbit<0) 	{
		tblwrd |= (unsigned) w >> (-tblbit);
		putc(tblwrd >> 8,f2); putc(tblwrd,f2);
		tblbit += 16; tblwrd = (unsigned) w << tblbit;
	      } else	{
			tblwrd |= w;
			putc(tblwrd >> 8,f2); putc(tblwrd,f2);
			tblbit = 16; tblwrd = 0;
		} /* else */
	if (w==tblmax) { /* expand table */
		tblmax = (tblmax << 1) +1;
		bitsize++;
	} /* if */
}       /* outkey */

freeidx()
{
  Hidx *np,*mp;
  Hint i;

  for (i=0; i<=MaxTblIdx; i++) symidx[i].idx = -1;
  for (i=0; i<=MaxTblIdx; i++) {
	np = symidx[i].next;
	while (np) {
		mp = np; np = np->next;
		if (mp->idx == -1) break;
		else free(mp);
	}         /* while */
	symidx[i].next = NULL;
  }                          /* for */
}                   /* freeidx */

clrtbl()
{
  dbputl();
  while (bitsize <= MaxBitSize) outkey(tblmax);
  memset(symtbl,0,(MaxTblIdx+1)*sizeof(Hchr));
  freeidx();
  bitsize = 9; tblmax = 0x1FF; tblcnt = 256;
  collision = 0;
}                                /* clrtbl */

int LZWcomp(filein,fileout)
char *filein,*fileout;
{
  Hchr hkey;
  unsigned Hint h;
  Hint i;

  allocate(symtbl,MaxTblIdx+1,Hchr);
  allocate(symidx,MaxTblIdx+1,Hidx);
  if (!symtbl || !symidx)
	return(ErrMEMO);
  for (i=0; i<=MaxTblIdx; i++)
	symidx[i].idx = -1;
  if ((f1=fopen(filein,"r"))==NULL)
	return(ErrREAD);
  if ((f2=fopen(fileout,"w"))==NULL)
	return(ErrWRITE);
  setvbuf(f1,NULL,_IOFBF,4096);
  setvbuf(f2,NULL,_IOFBF,4096);
  putc('l',f2); putc('z',f2); /* header */
  hkey.w = getc(f1); /* 1st char */
  while (1) {
    hkey.K = getc(f1); if (feof(f1)) break;
    ngetc++;
    h = hash(hkey);
    if ((i=lookup(hkey,h)) != -1) /* hkey already exists */
		hkey.w = i;
    else {
		if (hkey.w >= tblmax) outkey(tblmax); /* send 111...1 */
		outkey(hkey.w);
		if (tblcnt > MaxTblIdx) clrtbl(); /* table full */
		else if (install(hkey,h,tblcnt++)) return(ErrMEMO);
		hkey.w = hkey.K;
    } /* else */
  } /* while */
  if (hkey.w >= tblmax) outkey(tblmax); /* send 111...1 */
  outkey(hkey.w); /* last code */
  if (tblbit < 8) {
	putc(tblwrd >> 8,f2); putc(tblwrd,f2);
  } /* if */
  else if (tblbit < 16)  putc(tblwrd >> 8,f2);
  fclose(f1); fclose(f2);
 free(symtbl); freeidx(); free(symidx);
  return (0);
} /* LZWcomp */

/* -------------------- decompress section ------------------------- */
Hint getwd(fp,nbit) /* return 'XXXX' or '00XX' */
FILE *fp; Hint *nbit;
{
  register unsigned int w;
  unsigned int v;

  w = getc(fp);
  if ( feof(fp)) *nbit = 0;
  else {
		v = getc(fp);
		if ( feof(fp)) *nbit = 8;
		else {
				w = ((unsigned) w <<8) | v;
				*nbit = 16;
			} /* else */
  } /* else */
  return(w);
}             /*  getwd */

unsigned Hint getcode()
{
	unsigned Hint code;
	Hint nbit;

	tblbit -= bitsize;
	if (tblbit > 0)
		code = ((unsigned) tblwrd >> tblbit);
	else if (tblbit < 0) {
		code = ((unsigned) tblwrd << (-tblbit));
		tblwrd = getwd(f1,&nbit); tblbit += nbit;
		if (tblbit < 0) return(-1);
		code |= ((unsigned) tblwrd >> tblbit);
	     } /* if */
	     else {
			code = tblwrd;
			tblwrd = getwd(f1,&tblbit);
	     } /* else */
	return(code & tblmax);
}                            	/* getcode */

int	LZWdecomp(filein,fileout)
char	*filein,*fileout;
{
	unsigned Hint code,oldcode,incode;
	unsigned char finchar,*stk;
	int sp;

	sp = 0; allocate(stk,stkmax,unsigned char);
	if ( (f1=fopen(filein,"r"))==NULL)
		return(ErrREAD);
	setvbuf(f1,NULL,_IOFBF,4096);
	if ((char)getc(f1)!='l')
		return(ErrNOLZ);
	if ((char)getc(f1)!='z')
		return(ErrNOLZ);
	if ( (f2=fopen(fileout,"w"))==NULL)
		return(ErrWRITE);
	setvbuf(f2,NULL,_IOFBF,4096);
	allocate(symtbl,MaxTblIdx+1,Hchr);
	tblwrd = getwd(f1,&tblbit);	/* 1st code */
	tblbit -= bitsize;
	if (tblbit>0) {
		finchar = oldcode = (unsigned) tblwrd >> tblbit;
		putc(oldcode,f2);
	}       /* if */
	while (1)	{
		code=getcode();
		if ((Hint)code+1==0) break;
		while (code==tblmax) { /* expand table */
			if (++bitsize > MaxBitSize) {
				bitsize=9; tblmax=0x1FF; tblcnt=256;
				finchar = oldcode = getcode(); /* 1st code */
				putc(oldcode,f2);
			} /* if */
			else	tblmax = ( tblmax << 1 ) +1;
			code = getcode();
		}                 /* while */
		incode = code;
		if (code >= tblcnt) { 	/* special case */
			stk[sp++] = finchar;
			code = oldcode;
		}                     /* if code */
		while (code>255)   { /* next symbol */
			if (sp >= stkmax)	{
				stkmax += 1024;	/* expand stack */
				stk = (unsigned char *)realloc(stk,stkmax);
			}             /* if sp */
			stk[sp++] = symtbl[code].K;
			code = symtbl[code].w;
		}                     /* while code */
		putc(code,f2);
		finchar = code;
		while (sp>0) putc(stk[--sp],f2);	/* clear stack */
		symtbl[tblcnt].w = oldcode; symtbl[tblcnt++].K = finchar;
		oldcode = incode;
	} /* while */
	fclose(f1); fclose(f2);
	return(0);
} /* LZWdecomp */

main(argc,argv)
int argc; char **argv;
{
	int retval;

	_fmode=O_BINARY;
	if (argc < 4) errexit("Usage: lzw [-c][-d] filein fileout\n",NULL);
	if (strncmp(argv[1],"-c",2)==NULL)
		retval=LZWcomp(argv[2],argv[3]);
	else	if (strncmp(argv[1],"-d",2)==NULL)
			retval=LZWdecomp(argv[2],argv[3]);
	else	errexit("lzw: option must be '-c' or '-d'.\n",NULL);
	dbputl();
	switch(retval)	{
	  case	ErrREAD:  printf("lzw: error read %s.\n",argv[2]);   break;
	  case	ErrWRITE: printf("lzw: error write %s.\n",argv[3]);  break;
	  case	ErrMEMO:  printf("lzw: memory no enough.\n"); 	     break;
	  case	ErrNOLZ:  printf("lzw: %s is not LZWed.\n",argv[2]); break;
	}               /* switch */
	return(retval);
}                                   /* main */

