4 long inlength,outlength;
8 unsigned huffbits[256];
9 unsigned long huffstring[256];
11 huffnode nodearray[256]; // 256 nodes is worst case
13 void CountBytes (unsigned char huge *start, long length);
14 void Huffmanize (void);
15 void OptimizeNodes (huffnode *table);
16 long HuffCompress (unsigned char huge *source, long length,
17 unsigned char huge *dest);
18 void HuffExpand (unsigned char huge *source, unsigned char huge *dest,
19 long length,huffnode *hufftable);
20 void RLEWExpand (unsigned huge *source, unsigned huge *dest,long length,
22 long RLEWCompress (unsigned huge *source, long length, unsigned huge *dest,
26 =============================================================================
30 =============================================================================
35 ======================
39 = Adds the bytes in the pointed to area to the counts array
40 = If this is the first segment, make sure counts is zerod
42 ======================
45 void CountBytes (unsigned char huge *start, long length)
54 =======================
58 = Returns the byte with the lowest counts value
60 =======================
66 long low = 0x7fffffff;
78 /*========================================================================*/
85 = A recursive function that follows all leaves of nodearray and fills in
86 = coding tables huffbits and huffstring.
91 void TraceNode (int nodenum,int numbits,unsigned long bitstring)
95 bit0 = nodearray[nodenum].bit0;
96 bit1 = nodearray[nodenum].bit1;
102 huffbits[bit0]=numbits;
103 huffstring[bit0]=bitstring; // just added a zero in front
104 if (huffbits[bit0]>24 && counts[bit0])
106 puts("Error: Huffman bit string went over 32 bits!");
111 TraceNode (bit0-256,numbits,bitstring);
115 huffbits[bit1]=numbits;
116 huffstring[bit1]=bitstring+ (1ul<<(numbits-1)); // add a one in front
117 if (huffbits[bit1]>24 && counts[bit1])
119 puts("Error: Huffman bit string went over 32 bits!");
124 TraceNode (bit1-256,numbits,bitstring+(1ul<<(numbits-1)));
128 =======================
132 = Takes the counts array and builds a huffman tree at
133 = nodearray, then builds a codeing table.
135 =======================
138 void Huffmanize (void)
142 // codes are either bytes if <256 or nodearray numbers+256 if >=256
144 unsigned value[256],code0,code1;
146 // probablilities are the number of times the code is hit or $ffffffff if
147 // it is allready part of a higher node
149 unsigned long prob[256],low,workprob;
151 int i,worknode,bitlength;
152 unsigned long bitstring;
156 // all possible leaves start out as bytes
165 // start selecting the lowest probable leaves for the ends of the tree
169 while (1) // break out of when all codes have been used
172 // find the two lowest probability codes
187 if (prob[i]<low && i != code0)
195 if (value[code0]<256)
196 Quit("Wierdo huffman error: last code wasn't a node!");
197 if (value[code0]-256 != 254)
198 Quit("Wierdo huffman error: headnode wasn't 254!");
203 // make code0 into a pointer to work
204 // remove code1 (make 0xffffffff prob)
206 nodearray[worknode].bit0 = value[code0];
207 nodearray[worknode].bit1 = value[code1];
209 value[code0] = 256 + worknode;
210 prob[code0] += prob[code1];
211 prob[code1] = 0xffffffff;
216 // done with tree, now build table recursively
223 /*========================================================================*/
231 = Goes through a huffman table and changes the 256-511 node numbers to the
232 = actular address of the node. Must be called before HuffExpand
237 void OptimizeNodes (huffnode *table)
246 if (node->bit0 >= 256)
247 node->bit0 = (unsigned)(table+(node->bit0-256));
248 if (node->bit1 >= 256)
249 node->bit1 = (unsigned)(table+(node->bit1-256));
254 /*========================================================================*/
258 ======================
262 = The file must be counted with CountBytes and then Huffmanized first
264 ======================
267 long HuffCompress (unsigned char huge *source, long length,
268 unsigned char huge *dest)
271 unsigned long string;
275 outlength = biton = 0;
277 *(long huge *)dest=0; // so bits can be or'd on
282 bits = huffbits[byte];
283 string = huffstring[byte] << biton;
284 *(long huge *)(dest+1)=0; // so bits can be or'd on
285 *(long huge *)dest |= string;
286 biton += bits; // advance this many bits
288 biton&=7; // stay under 8 shifts
292 return (outlength+7)/8;
297 /*========================================================================*/
300 ======================
304 ======================
307 void HuffExpand (unsigned char huge *source, unsigned char huge *dest,
308 long length,huffnode *hufftable)
311 unsigned bit,byte,node,code;
312 unsigned sourceseg,sourceoff,destseg,destoff,endoff;
313 huffnode *nodeon,*headptr;
315 headptr = hufftable+254; // head node is allways node 254
317 source++; // normalize
322 sourceseg = FP_SEG(source);
323 sourceoff = FP_OFF(source);
324 destseg = FP_SEG(dest);
325 destoff = FP_OFF(dest);
326 endoff = destoff+length;
331 // ss:bx node pointer
337 //--------------------------
338 // expand less than 64k of data
339 //--------------------------
343 asm mov si,[sourceoff]
346 asm mov ds,[sourceseg]
349 asm mov ch,[si] // load first byte
354 asm test ch,cl // bit set?
356 asm mov dx,[ss:bx] // take bit0 path from node
357 asm shl cl,1 // advance to next bit position
359 asm jnc sourceupshort
362 asm mov dx,[ss:bx+2] // take bit1 path
363 asm shl cl,1 // advance to next bit position
364 asm jnc sourceupshort
367 asm mov ch,[si] // load next byte
369 asm mov cl,1 // back to first bit
372 asm or dh,dh // if dx<256 its a byte, else move node
373 asm jz storebyteshort
374 asm mov bx,dx // next node = (huffnode *)code
379 asm inc di // write a decopmpressed byte out
380 asm mov bx,[headptr] // back to the head node for next bit
382 asm cmp di,ax // done?
388 //--------------------------
389 // expand more than 64k of data
390 //--------------------------
397 asm mov si,[sourceoff]
400 asm mov ds,[sourceseg]
402 asm lodsb // load first byte
405 asm test al,cl // bit set?
407 asm mov dx,[ss:bx] // take bit0 path from node
410 asm mov dx,[ss:bx+2] // take bit1 path
413 asm shl cl,1 // advance to next bit position
416 asm cmp si,0x10 // normalize ds:si
423 asm mov cl,1 // back to first bit
426 asm or dh,dh // if dx<256 its a byte, else move node
428 asm mov bx,dx // next node = (huffnode *)code
433 asm inc di // write a decopmpressed byte out
434 asm mov bx,[headptr] // back to the head node for next bit
436 asm cmp di,0x10 // normalize es:di
444 asm sub [WORD PTR ss:length],1
446 asm dec [WORD PTR ss:length+2]
447 asm jns expand // when length = ffff ffff, done
457 //----------------- replaced by John C. ------------------------------------
460 unsigned bit,byte,node,code;
461 unsigned sourceseg,sourceoff,destseg,destoff,endseg,endoff;
462 huffnode *nodeon,*headptr;
464 nodeon = headptr = hufftable+254; // head node is allways node 254
490 nodeon = (huffnode *)code;
496 source++; // normalize
501 sourceseg = FP_SEG(source);
502 sourceoff = FP_OFF(source);
503 destseg = FP_SEG(dest);
504 destoff = FP_OFF(dest);
509 // cl = bit in source (1,2,4,8,...)
514 // ss:bx node pointer
525 asm lodsb // load first byte
528 asm test al,cl // bit set?
530 asm mov dx,ss:bx // take bit0 path from node
533 asm mov dx,ss:bx+2 // take bit1 path
536 asm shl cl,1 // advance to next bit position
539 asm cmp si,0x10 // normalize ds:si
546 asm mov cl,1 // back to first bit
549 asm or dh,dh // if dx<256 its a byte, else move node
551 asm mov bx,dx // next node = (huffnode *)code
556 asm inc di // write a decopmpressed byte out
557 asm mov bx,headptr // back to the head node for next bit
559 asm cmp di,0x10 // normalize es:di
567 asm sub WORD PTR ss:length,1
569 asm dec WORD PTR ss:length+2
570 asm jns expand // when length = ffff ffff, done
579 //---------------------------------------------------------------------------
581 /*========================================================================*/
584 ======================
588 ======================
591 long RLEWCompress (unsigned huge *source, long length, unsigned huge *dest,
595 unsigned value,count,i;
596 unsigned huge *start,huge *end;
600 end = source + (length+1)/2;
609 while (*source == value && source<end)
614 if (count>3 || value == rlewtag)
617 // send a tag / count / value string
626 // send word without compressing
628 for (i=1;i<=count;i++)
632 } while (source<end);
634 complength = 2*(dest-start);
641 ======================
645 ======================
648 void RLEWExpand (unsigned huge *source, unsigned huge *dest,long length,
651 unsigned value,count,i;
653 unsigned sourceseg,sourceoff,destseg,destoff,endseg,endoff;
663 if (value != rlewtag)
675 for (i=1;i<=count;i++)
681 end = dest + (length)/2;
682 sourceseg = FP_SEG(source);
683 sourceoff = FP_OFF(source);
684 destseg = FP_SEG(dest);
685 destoff = FP_OFF(dest);
686 endseg = FP_SEG(end);
687 endoff = FP_OFF(end);
693 // cx = repeat counts
696 // NOTE: A repeat count that produces 0xfff0 bytes can blow this!
714 asm mov cx,ax // repeat count
715 asm lodsw // repeat value
720 asm cmp si,0x10 // normalize ds:si
732 asm cmp di,0x10 // normalize es:di
758 ======================
762 ======================
765 long RLEBCompress (unsigned char huge *source, long length,
766 unsigned char huge *dest, unsigned char rlebtag)
769 unsigned char value,count;
771 unsigned char huge *start,huge *end;
784 while (*source == value && source<end && count<255)
789 if (count>3 || value == rlebtag)
792 // send a tag / count / value string
801 // send byte without compressing
803 for (i=1;i<=count;i++)
807 } while (source<end);
809 complength = dest-start;
816 ======================
820 ======================
823 void RLEBExpand (unsigned char huge *source, unsigned char huge *dest,
824 long length, unsigned char rlebtag)
826 unsigned char value,count;
828 unsigned sourceseg,sourceoff,destseg,destoff,endseg,endoff;
829 unsigned char huge *end;
839 if (value != rlebtag)
851 for (i=1;i<=count;i++)
858 end = dest + (length);
859 sourceseg = FP_SEG(source);
860 sourceoff = FP_OFF(source);
861 destseg = FP_SEG(dest);
862 destoff = FP_OFF(dest);
863 endseg = FP_SEG(end);
864 endoff = FP_OFF(end);
870 // cl = repeat counts
873 // NOTE: A repeat count that produces 0xfff0 bytes can blow this!
876 asm mov bx,WORD PTR rlebtag
892 asm mov cl,al // repeat count
893 asm lodsb // repeat value
898 asm cmp si,0x10 // normalize ds:si
910 asm cmp di,0x10 // normalize es:di
937 ////////////////////////////////////////////////////////////////////////
938 ////////////////////////////////////////////////////////////////////////
939 ////////////////////////////////////////////////////////////////////////
940 ////////////////////////////////////////////////////////////////////////
941 ////////////////////////////////////////////////////////////////////////
942 ////////////////////////////////////////////////////////////////////////
943 ////////////////////////////////////////////////////////////////////////
952 = Compress a string of words
960 long CarmackCompress (unsigned far *source,long length,
964 unsigned far *instart, far *inptr, far *inscan,
965 far *stopscan, far *outptr;
966 unsigned far *bestscan, beststring;
967 unsigned dist,maxstring,string,outlength;
968 unsigned nearrepeats,farrepeats;
973 // this compression method produces a stream of words
974 // If the words high byte is NEARTAG or FARTAG, the low byte is a word
975 // copy count from the a position specified by either the next byte
976 // or the next word, respectively. A copy count of 0 means to insert the
977 // next byte as the low byte of the tag into the output
984 instart = inptr = source;
988 length = (length+1)/2;
990 nearrepeats = farrepeats = dups = 0;
1001 // scan from start for patterns that match current data
1004 for (inscan = instart; inscan<inptr; inscan++)
1009 maxstring = inptr-inscan;
1010 if (maxstring > length)
1012 if (maxstring > 255)
1016 while (*(inscan+string) == *(inptr+string) && string<maxstring)
1019 if (string >= beststring)
1021 beststring = string;
1026 if (beststring > 1 && inptr-bestscan <= 255)
1029 *outptr++ = beststring + (NEARTAG*256);
1030 *((unsigned char far *)outptr)++ = inptr-bestscan;
1033 inptr += beststring;
1034 length -= beststring;
1036 else if (beststring > 2)
1039 *outptr++ = beststring + (FARTAG*256);
1040 *outptr++ = bestscan-instart;
1043 inptr += beststring;
1044 length -= beststring;
1046 else // no compression
1049 if (chhigh == NEARTAG || chhigh == FARTAG)
1051 // special case of encountering a
1052 // tag word in the data stream
1053 // send a length of 0, and follow it with the real low byte
1054 *outptr++ = ch & 0xff00;
1055 *((unsigned char far *)outptr)++ = ch&0xff;
1071 static char cc[2]="-";
1085 Quit ("Length < 0!");
1088 printf ("%u near patterns\n",nearrepeats);
1089 printf ("%u far patterns\n",farrepeats);
1090 printf ("%u new words\n",newwords);
1091 printf ("%u dups\n\n",dups);
1104 = Compress a string of words
1109 #define NEARTAG 0xa7
1112 long CarmackCompress (unsigned far *source,long length,
1115 unsigned wch,chhigh;
1117 unsigned far *instart, far *inptr, far *inscan,
1118 far *stopscan, far *outptr;
1119 unsigned far *bestscan, beststring;
1120 unsigned dist,maxstring,string,outlength;
1121 unsigned nearrepeats,farrepeats;
1122 unsigned dups,count;
1126 // this compression method produces a stream of words
1127 // If the words high byte is NEARTAG or FARTAG, the low byte is a word
1128 // copy count from the a position specified by either the next byte
1129 // or the next word, respectively. A copy count of 0 means to insert the
1130 // next byte as the low byte of the tag into the output
1137 instart = inptr = bestscan = source;
1141 length = (length+1)/2;
1143 nearrepeats = farrepeats = dups = 0;
1152 inptrx = FP_OFF(inptr)+2; // convienient for cmpsw
1155 // scan from start for patterns that match current data
1162 // cx: repeat counts
1163 // dx: holding spot for inscan repeat count
1167 asm xor bx,bx // beststring = 0
1169 asm les di,[instart] // start looking for an identical string
1170 // at the start of uncompressed text
1172 asm mov ds,ax // both DS and ES point to uncompressed data
1174 asm mov cx,WORD PTR [inptr]
1176 asm shr cx,1 // cx = words between inscan and inptr
1179 asm mov ax,[wch] // current uncompressed word
1180 asm repne scasw // search from DI for up to CX words for a
1182 asm jcxz done // no more to search
1184 asm mov dx,cx // save off remaining repeat count
1185 // the number in CX is the remaining words
1186 // to search for a long pattern
1187 asm mov si,[inptrx] // SI is the current source data
1188 // DI is the match in previously compressed
1190 asm mov ax,di // save off DI so we can back up after scan
1191 asm repe cmpsw // decrement CX for each additional match
1193 asm dec cx // the search ended because of CX, not bad match
1195 asm mov di,ax // back to original spot
1197 asm sub ax,cx // AX is the total number of matching words
1198 asm cmp ax,bx // more chars than beststring?
1201 asm mov WORD PTR [bestscan],di // bestscan is start of the best match+2
1204 asm mov cx,dx // restore the remaining count
1210 asm mov [beststring],bx // save out the register variable
1212 asm sub WORD PTR [bestscan],2 // scasw incremented past start
1215 if (beststring>length)
1216 beststring = length;
1218 if (beststring > 1 && inptr-bestscan <= 255)
1221 *outptr++ = beststring + (NEARTAG*256);
1222 *((unsigned char far *)outptr)++ = inptr-bestscan;
1225 inptr += beststring;
1226 length -= beststring;
1228 else if (beststring > 2)
1231 *outptr++ = beststring + (FARTAG*256);
1232 *outptr++ = bestscan-instart;
1235 inptr += beststring;
1236 length -= beststring;
1238 else // no compression
1241 if (chhigh == NEARTAG || chhigh == FARTAG)
1243 // special case of encountering a
1244 // tag word in the data stream
1245 // send a length of 0, and follow it with the real low byte
1246 *outptr++ = wch & 0xff00;
1247 *((unsigned char far *)outptr)++ = wch&0xff;
1263 static char cc[2]="-";
1278 printf ("\r%u near patterns\n",nearrepeats);
1279 printf ("%u far patterns\n",farrepeats);
1280 printf ("%u new words\n",newwords);
1281 printf ("%u dups\n\n",dups);