16/ted5/JHUFF.C

   1 #include "ted5.h"
   2 #pragma hdrstop
   3
   4 long inlength,outlength;
   5
   6 long counts[256];
   7
   8 unsigned huffbits[256];
   9 unsigned long huffstring[256];
  10
  11 huffnode nodearray[256];        // 256 nodes is worst case
  12
  13 void CountBytes (unsigned char huge *start, long length);
  14 void Huffmanize (void);
  15 void OptimizeNodes (huffnode *table);
  16 long HuffCompress (unsigned char huge *source, long length,
  17   unsigned char huge *dest);
  18 void HuffExpand (unsigned char huge *source, unsigned char huge *dest,
  19   long length,huffnode *hufftable);
  20 void RLEWExpand (unsigned huge *source, unsigned huge *dest,long length,
  21   unsigned rlewtag);
  22 long RLEWCompress (unsigned huge *source, long length, unsigned huge *dest,
  23   unsigned rlewtag);
  24
  25 /*
  26 =============================================================================
  27
  28                            COMPRESSION SUBS
  29
  30 =============================================================================
  31 */
  32
  33
  34 /*
  35 ======================
  36 =
  37 = CountBytes
  38 =
  39 = Adds the bytes in the pointed to area to the counts array
  40 = If this is the first segment, make sure counts is zerod
  41 =
  42 ======================
  43 */
  44
  45 void CountBytes (unsigned char huge *start, long length)
  46 {
  47   long i;
  48
  49   while (length--)
  50     counts[*start++]++;
  51 }
  52
  53 /*
  54 =======================
  55 =
  56 = FindLeast
  57 =
  58 = Returns the byte with the lowest counts value
  59 =
  60 =======================
  61 */
  62
  63 int FindLeast (void)
  64 {
  65   int i,least;
  66   long low = 0x7fffffff;
  67
  68   for (i=0;i<256;i++)
  69     if (counts[i]<low)
  70     {
  71       low = counts[i];
  72       least = i;
  73     }
  74
  75   return least;
  76 }
  77
  78 /*========================================================================*/
  79
  80 /*
  81 ==================
  82 =
  83 = TraceNode
  84 =
  85 = A recursive function that follows all leaves of nodearray and fills in
  86 = coding tables huffbits and huffstring.
  87 =
  88 ==================
  89 */
  90
  91 void TraceNode (int nodenum,int numbits,unsigned long bitstring)
  92 {
  93   unsigned bit0,bit1;
  94
  95   bit0 = nodearray[nodenum].bit0;
  96   bit1 = nodearray[nodenum].bit1;
  97
  98   numbits++;
  99
 100   if (bit0 <256)
 101   {
 102     huffbits[bit0]=numbits;
 103     huffstring[bit0]=bitstring;         // just added a zero in front
 104     if (huffbits[bit0]>24 && counts[bit0])
 105     {
 106       puts("Error: Huffman bit string went over 32 bits!");
 107       exit(1);
 108     }
 109   }
 110   else
 111     TraceNode (bit0-256,numbits,bitstring);
 112
 113   if (bit1 <256)
 114   {
 115     huffbits[bit1]=numbits;
 116     huffstring[bit1]=bitstring+ (1ul<<(numbits-1));     // add a one in front
 117     if (huffbits[bit1]>24 && counts[bit1])
 118     {
 119       puts("Error: Huffman bit string went over 32 bits!");
 120       exit(1);
 121     }
 122   }
 123   else
 124     TraceNode (bit1-256,numbits,bitstring+(1ul<<(numbits-1)));
 125 }
 126
 127 /*
 128 =======================
 129 =
 130 = Huffmanize
 131 =
 132 = Takes the counts array and builds a huffman tree at
 133 = nodearray, then builds a codeing table.
 134 =
 135 =======================
 136 */
 137
 138 void Huffmanize (void)
 139 {
 140
 141 //
 142 // codes are either bytes if <256 or nodearray numbers+256 if >=256
 143 //
 144   unsigned value[256],code0,code1;
 145 //
 146 // probablilities are the number of times the code is hit or $ffffffff if
 147 // it is allready part of a higher node
 148 //
 149   unsigned long prob[256],low,workprob;
 150
 151   int i,worknode,bitlength;
 152   unsigned long bitstring;
 153
 154
 155 //
 156 // all possible leaves start out as bytes
 157 //
 158   for (i=0;i<256;i++)
 159   {
 160     value[i]=i;
 161     prob[i]=counts[i];
 162   }
 163
 164 //
 165 // start selecting the lowest probable leaves for the ends of the tree
 166 //
 167
 168   worknode = 0;
 169   while (1)     // break out of when all codes have been used
 170   {
 171     //
 172     // find the two lowest probability codes
 173     //
 174
 175     code0=0xffff;
 176     low = 0x7ffffffff;
 177     for (i=0;i<256;i++)
 178       if (prob[i]<low)
 179       {
 180         code0 = i;
 181         low = prob[i];
 182       }
 183
 184     code1=0xffff;
 185     low = 0x7fffffff;
 186     for (i=0;i<256;i++)
 187       if (prob[i]<low && i != code0)
 188       {
 189         code1 = i;
 190         low = prob[i];
 191       }
 192
 193     if (code1 == 0xffff)
 194     {
 195       if (value[code0]<256)
 196         Quit("Wierdo huffman error: last code wasn't a node!");
 197       if (value[code0]-256 != 254)
 198         Quit("Wierdo huffman error: headnode wasn't 254!");
 199       break;
 200     }
 201
 202     //
 203     // make code0 into a pointer to work
 204     // remove code1 (make 0xffffffff prob)
 205     //
 206     nodearray[worknode].bit0 = value[code0];
 207     nodearray[worknode].bit1 = value[code1];
 208
 209     value[code0] = 256 + worknode;
 210     prob[code0] += prob[code1];
 211     prob[code1] = 0xffffffff;
 212     worknode++;
 213   }
 214
 215 //
 216 // done with tree, now build table recursively
 217 //
 218
 219   TraceNode (254,0,0);
 220
 221 }
 222
 223 /*========================================================================*/
 224
 225
 226 /*
 227 ===============
 228 =
 229 = OptimizeNodes
 230 =
 231 = Goes through a huffman table and changes the 256-511 node numbers to the
 232 = actular address of the node.  Must be called before HuffExpand
 233 =
 234 ===============
 235 */
 236
 237 void OptimizeNodes (huffnode *table)
 238 {
 239   huffnode *node;
 240   int i;
 241
 242   node = table;
 243
 244   for (i=0;i<255;i++)
 245   {
 246     if (node->bit0 >= 256)
 247       node->bit0 = (unsigned)(table+(node->bit0-256));
 248     if (node->bit1 >= 256)
 249       node->bit1 = (unsigned)(table+(node->bit1-256));
 250     node++;
 251   }
 252 }
 253
 254 /*========================================================================*/
 255
 256 #if 0
 257 /*
 258 ======================
 259 =
 260 = HuffCompress
 261 =
 262 = The file must be counted with CountBytes and then Huffmanized first
 263 =
 264 ======================
 265 */
 266
 267 long HuffCompress (unsigned char huge *source, long length,
 268   unsigned char huge *dest)
 269 {
 270   long outlength;
 271   unsigned long string;
 272   unsigned biton,bits;
 273   unsigned char byte;
 274
 275   outlength = biton = 0;
 276
 277   *(long huge *)dest=0;         // so bits can be or'd on
 278
 279   while (length--)
 280   {
 281     byte = *source++;
 282     bits = huffbits[byte];
 283     string = huffstring[byte] << biton;
 284     *(long huge *)(dest+1)=0;   // so bits can be or'd on
 285     *(long huge *)dest |= string;
 286     biton += bits;              // advance this many bits
 287     dest+= biton/8;
 288     biton&=7;                   // stay under 8 shifts
 289     outlength+=bits;
 290   }
 291
 292   return (outlength+7)/8;
 293 }
 294 #endif
 295
 296
 297 /*========================================================================*/
 298
 299 /*
 300 ======================
 301 =
 302 = HuffExpand
 303 =
 304 ======================
 305 */
 306
 307 void HuffExpand (unsigned char huge *source, unsigned char huge *dest,
 308   long length,huffnode *hufftable)
 309
 310 {
 311   unsigned bit,byte,node,code;
 312   unsigned sourceseg,sourceoff,destseg,destoff,endoff;
 313   huffnode *nodeon,*headptr;
 314
 315   headptr = hufftable+254;      // head node is allways node 254
 316
 317   source++;     // normalize
 318   source--;
 319   dest++;
 320   dest--;
 321
 322   sourceseg = FP_SEG(source);
 323   sourceoff = FP_OFF(source);
 324   destseg = FP_SEG(dest);
 325   destoff = FP_OFF(dest);
 326   endoff = destoff+length;
 327
 328 //
 329 // ds:si source
 330 // es:di dest
 331 // ss:bx node pointer
 332 //
 333
 334         if (length <0xfff0)
 335         {
 336
 337 //--------------------------
 338 // expand less than 64k of data
 339 //--------------------------
 340
 341 asm mov bx,[headptr]
 342
 343 asm     mov     si,[sourceoff]
 344 asm     mov     di,[destoff]
 345 asm     mov     es,[destseg]
 346 asm     mov     ds,[sourceseg]
 347 asm     mov     ax,[endoff]
 348
 349 asm     mov     ch,[si]                         // load first byte
 350 asm     inc     si
 351 asm     mov     cl,1
 352
 353 expandshort:
 354 asm     test    ch,cl                   // bit set?
 355 asm     jnz     bit1short
 356 asm     mov     dx,[ss:bx]                      // take bit0 path from node
 357 asm     shl     cl,1                            // advance to next bit position
 358 asm     jc      newbyteshort
 359 asm     jnc     sourceupshort
 360
 361 bit1short:
 362 asm     mov     dx,[ss:bx+2]            // take bit1 path
 363 asm     shl     cl,1                            // advance to next bit position
 364 asm     jnc     sourceupshort
 365
 366 newbyteshort:
 367 asm     mov     ch,[si]                         // load next byte
 368 asm     inc     si
 369 asm     mov     cl,1                            // back to first bit
 370
 371 sourceupshort:
 372 asm     or      dh,dh                           // if dx<256 its a byte, else move node
 373 asm     jz      storebyteshort
 374 asm     mov     bx,dx                           // next node = (huffnode *)code
 375 asm     jmp     expandshort
 376
 377 storebyteshort:
 378 asm     mov     [es:di],dl
 379 asm     inc     di                                      // write a decopmpressed byte out
 380 asm     mov     bx,[headptr]            // back to the head node for next bit
 381
 382 asm     cmp     di,ax                           // done?
 383 asm     jne     expandshort
 384         }
 385         else
 386         {
 387
 388 //--------------------------
 389 // expand more than 64k of data
 390 //--------------------------
 391
 392   length--;
 393
 394 asm mov bx,[headptr]
 395 asm     mov     cl,1
 396
 397 asm     mov     si,[sourceoff]
 398 asm     mov     di,[destoff]
 399 asm     mov     es,[destseg]
 400 asm     mov     ds,[sourceseg]
 401
 402 asm     lodsb                   // load first byte
 403
 404 expand:
 405 asm     test    al,cl           // bit set?
 406 asm     jnz     bit1
 407 asm     mov     dx,[ss:bx]      // take bit0 path from node
 408 asm     jmp     gotcode
 409 bit1:
 410 asm     mov     dx,[ss:bx+2]    // take bit1 path
 411
 412 gotcode:
 413 asm     shl     cl,1            // advance to next bit position
 414 asm     jnc     sourceup
 415 asm     lodsb
 416 asm     cmp     si,0x10         // normalize ds:si
 417 asm     jb      sinorm
 418 asm     mov     cx,ds
 419 asm     inc     cx
 420 asm     mov     ds,cx
 421 asm     xor     si,si
 422 sinorm:
 423 asm     mov     cl,1            // back to first bit
 424
 425 sourceup:
 426 asm     or      dh,dh           // if dx<256 its a byte, else move node
 427 asm     jz      storebyte
 428 asm     mov     bx,dx           // next node = (huffnode *)code
 429 asm     jmp     expand
 430
 431 storebyte:
 432 asm     mov     [es:di],dl
 433 asm     inc     di              // write a decopmpressed byte out
 434 asm     mov     bx,[headptr]    // back to the head node for next bit
 435
 436 asm     cmp     di,0x10         // normalize es:di
 437 asm     jb      dinorm
 438 asm     mov     dx,es
 439 asm     inc     dx
 440 asm     mov     es,dx
 441 asm     xor     di,di
 442 dinorm:
 443
 444 asm     sub     [WORD PTR ss:length],1
 445 asm     jnc     expand
 446 asm     dec     [WORD PTR ss:length+2]
 447 asm     jns     expand          // when length = ffff ffff, done
 448
 449         }
 450
 451 asm     mov     ax,ss
 452 asm     mov     ds,ax
 453
 454 }
 455
 456
 457 //----------------- replaced by John C. ------------------------------------
 458 #if 0
 459 {
 460   unsigned bit,byte,node,code;
 461   unsigned sourceseg,sourceoff,destseg,destoff,endseg,endoff;
 462   huffnode *nodeon,*headptr;
 463
 464   nodeon = headptr = hufftable+254;     // head node is allways node 254
 465
 466   bit = 1;
 467   byte = *source++;
 468
 469   while (length)
 470   {
 471     if (byte&bit)
 472       code = nodeon->bit1;
 473     else
 474       code = nodeon->bit0;
 475
 476     bit<<=1;
 477     if (bit==256)
 478     {
 479       bit=1;
 480       byte = *source++;
 481     }
 482
 483     if (code<256)
 484     {
 485       *dest++=code;
 486       nodeon=headptr;
 487       length--;
 488     }
 489     else
 490       nodeon = (huffnode *)code;
 491   }
 492
 493
 494 #if 0
 495
 496   source++;     // normalize
 497   source--;
 498   dest++;
 499   dest--;
 500
 501   sourceseg = FP_SEG(source);
 502   sourceoff = FP_OFF(source);
 503   destseg = FP_SEG(dest);
 504   destoff = FP_OFF(dest);
 505
 506   length--;
 507 //
 508 // al = source byte
 509 // cl = bit in source (1,2,4,8,...)
 510 // dx = code
 511 //
 512 // ds:si source
 513 // es:di dest
 514 // ss:bx node pointer
 515 //
 516
 517 asm     mov     bx,headptr
 518 asm     mov     cl,1
 519
 520 asm     mov     si,sourceoff
 521 asm     mov     di,destoff
 522 asm     mov     es,destseg
 523 asm     mov     ds,sourceseg
 524
 525 asm     lodsb                   // load first byte
 526
 527 expand:
 528 asm     test    al,cl           // bit set?
 529 asm     jnz     bit1
 530 asm     mov     dx,ss:bx        // take bit0 path from node
 531 asm     jmp     gotcode
 532 bit1:
 533 asm     mov     dx,ss:bx+2      // take bit1 path
 534
 535 gotcode:
 536 asm     shl     cl,1            // advance to next bit position
 537 asm     jnc     sourceup
 538 asm     lodsb
 539 asm     cmp     si,0x10         // normalize ds:si
 540 asm     jb      sinorm
 541 asm     mov     cx,ds
 542 asm     inc     cx
 543 asm     mov     ds,cx
 544 asm     xor     si,si
 545 sinorm:
 546 asm     mov     cl,1            // back to first bit
 547
 548 sourceup:
 549 asm     or      dh,dh           // if dx<256 its a byte, else move node
 550 asm     jz      storebyte
 551 asm     mov     bx,dx           // next node = (huffnode *)code
 552 asm     jmp     expand
 553
 554 storebyte:
 555 asm     mov     [es:di],dl
 556 asm     inc     di              // write a decopmpressed byte out
 557 asm     mov     bx,headptr      // back to the head node for next bit
 558
 559 asm     cmp     di,0x10         // normalize es:di
 560 asm     jb      dinorm
 561 asm     mov     dx,es
 562 asm     inc     dx
 563 asm     mov     es,dx
 564 asm     xor     di,di
 565 dinorm:
 566
 567 asm     sub     WORD PTR ss:length,1
 568 asm     jnc     expand
 569 asm     dec     WORD PTR ss:length+2
 570 asm     jns     expand          // when length = ffff ffff, done
 571
 572 asm     mov     ax,ss
 573 asm     mov     ds,ax
 574
 575 #endif
 576
 577 }
 578 #endif
 579 //---------------------------------------------------------------------------
 580
 581 /*========================================================================*/
 582
 583 /*
 584 ======================
 585 =
 586 = RLEWcompress
 587 =
 588 ======================
 589 */
 590
 591 long RLEWCompress (unsigned huge *source, long length, unsigned huge *dest,
 592   unsigned rlewtag)
 593 {
 594   long complength;
 595   unsigned value,count,i;
 596   unsigned huge *start,huge *end;
 597
 598   start = dest;
 599
 600   end = source + (length+1)/2;
 601
 602 //
 603 // compress it
 604 //
 605   do
 606   {
 607     count = 1;
 608     value = *source++;
 609     while (*source == value && source<end)
 610     {
 611       count++;
 612       source++;
 613     }
 614     if (count>3 || value == rlewtag)
 615     {
 616     //
 617     // send a tag / count / value string
 618     //
 619       *dest++ = rlewtag;
 620       *dest++ = count;
 621       *dest++ = value;
 622     }
 623     else
 624     {
 625     //
 626     // send word without compressing
 627     //
 628       for (i=1;i<=count;i++)
 629         *dest++ = value;
 630     }
 631
 632   } while (source<end);
 633
 634   complength = 2*(dest-start);
 635   return complength;
 636 }
 637
 638
 639
 640 /*
 641 ======================
 642 =
 643 = RLEWexpand
 644 =
 645 ======================
 646 */
 647
 648 void RLEWExpand (unsigned huge *source, unsigned huge *dest,long length,
 649   unsigned rlewtag)
 650 {
 651   unsigned value,count,i;
 652   unsigned huge *end;
 653   unsigned sourceseg,sourceoff,destseg,destoff,endseg,endoff;
 654
 655
 656 //
 657 // expand it
 658 //
 659 #if 0
 660   do
 661   {
 662     value = *source++;
 663     if (value != rlewtag)
 664     //
 665     // uncompressed
 666     //
 667       *dest++=value;
 668     else
 669     {
 670     //
 671     // compressed string
 672     //
 673       count = *source++;
 674       value = *source++;
 675       for (i=1;i<=count;i++)
 676         *dest++ = value;
 677     }
 678   } while (dest<end);
 679 #endif
 680
 681   end = dest + (length)/2;
 682   sourceseg = FP_SEG(source);
 683   sourceoff = FP_OFF(source);
 684   destseg = FP_SEG(dest);
 685   destoff = FP_OFF(dest);
 686   endseg = FP_SEG(end);
 687   endoff = FP_OFF(end);
 688
 689
 690 //
 691 // ax = source value
 692 // bx = tag value
 693 // cx = repeat counts
 694 // dx = scratch
 695 //
 696 // NOTE: A repeat count that produces 0xfff0 bytes can blow this!
 697 //
 698
 699 asm     mov     bx,rlewtag
 700 asm     mov     si,sourceoff
 701 asm     mov     di,destoff
 702 asm     mov     es,destseg
 703 asm     mov     ds,sourceseg
 704
 705 expand:
 706 asm     lodsw
 707 asm     cmp     ax,bx
 708 asm     je      repeat
 709 asm     stosw
 710 asm     jmp     next
 711
 712 repeat:
 713 asm     lodsw
 714 asm     mov     cx,ax           // repeat count
 715 asm     lodsw                   // repeat value
 716 asm     rep stosw
 717
 718 next:
 719
 720 asm     cmp     si,0x10         // normalize ds:si
 721 asm     jb      sinorm
 722 asm     mov     ax,si
 723 asm     shr     ax,1
 724 asm     shr     ax,1
 725 asm     shr     ax,1
 726 asm     shr     ax,1
 727 asm     mov     dx,ds
 728 asm     add     dx,ax
 729 asm     mov     ds,dx
 730 asm     and     si,0xf
 731 sinorm:
 732 asm     cmp     di,0x10         // normalize es:di
 733 asm     jb      dinorm
 734 asm     mov     ax,di
 735 asm     shr     ax,1
 736 asm     shr     ax,1
 737 asm     shr     ax,1
 738 asm     shr     ax,1
 739 asm     mov     dx,es
 740 asm     add     dx,ax
 741 asm     mov     es,dx
 742 asm     and     di,0xf
 743 dinorm:
 744
 745 asm     cmp     di,ss:endoff
 746 asm     jne     expand
 747 asm     mov     ax,es
 748 asm     cmp     ax,ss:endseg
 749 asm     jb      expand
 750
 751 asm     mov     ax,ss
 752 asm     mov     ds,ax
 753
 754 }
 755
 756
 757 /*
 758 ======================
 759 =
 760 = RLEBcompress
 761 =
 762 ======================
 763 */
 764
 765 long RLEBCompress (unsigned char huge *source, long length,
 766   unsigned char huge *dest, unsigned char rlebtag)
 767 {
 768   long complength;
 769   unsigned char value,count;
 770   unsigned i;
 771   unsigned char huge *start,huge *end;
 772
 773   start = dest;
 774
 775   end = source+length;
 776
 777 //
 778 // compress it
 779 //
 780   do
 781   {
 782     count = 1;
 783     value = *source++;
 784     while (*source == value && source<end && count<255)
 785     {
 786       count++;
 787       source++;
 788     }
 789     if (count>3 || value == rlebtag)
 790     {
 791     //
 792     // send a tag / count / value string
 793     //
 794       *dest++ = rlebtag;
 795       *dest++ = count;
 796       *dest++ = value;
 797     }
 798     else
 799     {
 800     //
 801     // send byte without compressing
 802     //
 803       for (i=1;i<=count;i++)
 804         *dest++ = value;
 805     }
 806
 807   } while (source<end);
 808
 809   complength = dest-start;
 810   return complength;
 811 }
 812
 813
 814
 815 /*
 816 ======================
 817 =
 818 = RLEBExpand
 819 =
 820 ======================
 821 */
 822
 823 void RLEBExpand (unsigned char huge *source, unsigned char huge *dest,
 824   long length, unsigned char rlebtag)
 825 {
 826   unsigned char value,count;
 827   unsigned i;
 828   unsigned sourceseg,sourceoff,destseg,destoff,endseg,endoff;
 829   unsigned char huge *end;
 830
 831
 832 //
 833 // expand it
 834 //
 835 #if 0
 836   do
 837   {
 838     value = *source++;
 839     if (value != rlebtag)
 840     //
 841     // uncompressed
 842     //
 843       *dest++=value;
 844     else
 845     {
 846     //
 847     // compressed string
 848     //
 849       count = *source++;
 850       value = *source++;
 851       for (i=1;i<=count;i++)
 852         *dest++ = value;
 853     }
 854   } while (dest<end);
 855 #endif
 856
 857
 858   end = dest + (length);
 859   sourceseg = FP_SEG(source);
 860   sourceoff = FP_OFF(source);
 861   destseg = FP_SEG(dest);
 862   destoff = FP_OFF(dest);
 863   endseg = FP_SEG(end);
 864   endoff = FP_OFF(end);
 865
 866
 867 //
 868 // al = source value
 869 // bl = tag value
 870 // cl = repeat counts
 871 // dx = scratch
 872 //
 873 // NOTE: A repeat count that produces 0xfff0 bytes can blow this!
 874 //
 875
 876 asm     mov     bx,WORD PTR rlebtag
 877 asm     mov     si,sourceoff
 878 asm     mov     di,destoff
 879 asm     mov     es,destseg
 880 asm     mov     ds,sourceseg
 881 asm     xor     ch,ch
 882
 883 expand:
 884 asm     lodsb
 885 asm     cmp     al,bl
 886 asm     je      repeat
 887 asm     stosb
 888 asm     jmp     next
 889
 890 repeat:
 891 asm     lodsb
 892 asm     mov     cl,al           // repeat count
 893 asm     lodsb                   // repeat value
 894 asm     rep stosb
 895
 896 next:
 897
 898 asm     cmp     si,0x10         // normalize ds:si
 899 asm     jb      sinorm
 900 asm     mov     ax,si
 901 asm     shr     ax,1
 902 asm     shr     ax,1
 903 asm     shr     ax,1
 904 asm     shr     ax,1
 905 asm     mov     dx,ds
 906 asm     add     dx,ax
 907 asm     mov     ds,dx
 908 asm     and     si,0xf
 909 sinorm:
 910 asm     cmp     di,0x10         // normalize es:di
 911 asm     jb      dinorm
 912 asm     mov     ax,di
 913 asm     shr     ax,1
 914 asm     shr     ax,1
 915 asm     shr     ax,1
 916 asm     shr     ax,1
 917 asm     mov     dx,es
 918 asm     add     dx,ax
 919 asm     mov     es,dx
 920 asm     and     di,0xf
 921 dinorm:
 922
 923 asm     cmp     di,ss:endoff
 924 asm     jne     expand
 925 asm     mov     ax,es
 926 asm     cmp     ax,ss:endseg
 927 asm     jb      expand
 928
 929 asm     mov     ax,ss
 930 asm     mov     ds,ax
 931
 932
 933 }
 934
 935
 936
 937 ////////////////////////////////////////////////////////////////////////
 938 ////////////////////////////////////////////////////////////////////////
 939 ////////////////////////////////////////////////////////////////////////
 940 ////////////////////////////////////////////////////////////////////////
 941 ////////////////////////////////////////////////////////////////////////
 942 ////////////////////////////////////////////////////////////////////////
 943 ////////////////////////////////////////////////////////////////////////
 944
 945
 946 #if 1
 947 /*
 948 ==================
 949 =
 950 = CarmackCompress
 951 =
 952 = Compress a string of words
 953 =
 954 ==================
 955 */
 956
 957 #define NEARTAG 0xa7
 958 #define FARTAG  0xa8
 959
 960 long CarmackCompress (unsigned far *source,long length,
 961         unsigned far *dest)
 962 {
 963         unsigned        ch,chhigh;
 964         unsigned        far *instart, far *inptr, far *inscan,
 965                                 far *stopscan, far *outptr;
 966         unsigned        far *bestscan, beststring;
 967         unsigned        dist,maxstring,string,outlength;
 968         unsigned        nearrepeats,farrepeats;
 969         unsigned        dups,count;
 970         unsigned        newwords;
 971
 972 //
 973 // this compression method produces a stream of words
 974 // If the words high byte is NEARTAG or FARTAG, the low byte is a word
 975 // copy count from the a position specified by either the next byte
 976 // or the next word, respectively.  A copy count of 0 means to insert the
 977 // next byte as the low byte of the tag into the output
 978 //
 979
 980
 981 //
 982 // set up
 983 //
 984         instart = inptr = source;
 985         outptr = dest;
 986
 987         outlength = 0;
 988         length = (length+1)/2;
 989
 990         nearrepeats = farrepeats = dups = 0;
 991         count = 10;
 992         newwords = 0;
 993 //
 994 // compress
 995 //
 996         do
 997         {
 998                 ch = *inptr;
 999
1000 //
1001 // scan from start for patterns that match current data
1002 //
1003                 beststring = 0;
1004                 for (inscan = instart; inscan<inptr; inscan++)
1005                 {
1006                         if (*inscan != ch)
1007                                 continue;
1008
1009                         maxstring = inptr-inscan;
1010                         if (maxstring > length)
1011                                 maxstring = length;
1012                         if (maxstring > 255)
1013                                 maxstring = 255;
1014
1015                         string = 1;
1016                         while (*(inscan+string) == *(inptr+string) && string<maxstring)
1017                                 string++;
1018
1019                         if (string >= beststring)
1020                         {
1021                                 beststring = string;
1022                                 bestscan = inscan;
1023                         }
1024                 }
1025
1026                 if (beststring > 1 && inptr-bestscan <= 255)
1027                 {
1028                 // near string
1029                         *outptr++ = beststring + (NEARTAG*256);
1030                         *((unsigned char far *)outptr)++ = inptr-bestscan;
1031                         outlength += 3;
1032                         nearrepeats++;
1033                         inptr += beststring;
1034                         length -= beststring;
1035                 }
1036                 else if (beststring > 2)
1037                 {
1038                 // far string
1039                         *outptr++ = beststring + (FARTAG*256);
1040                         *outptr++ = bestscan-instart;
1041                         outlength += 4;
1042                         farrepeats++;
1043                         inptr += beststring;
1044                         length -= beststring;
1045                 }
1046                 else                                                    // no compression
1047                 {
1048                         chhigh = ch>>8;
1049                         if (chhigh == NEARTAG || chhigh == FARTAG)
1050                         {
1051                         // special case of encountering a
1052                         // tag word in the data stream
1053                         // send a length of 0, and follow it with the real low byte
1054                                 *outptr++ = ch & 0xff00;
1055                                 *((unsigned char far *)outptr)++ = ch&0xff;
1056                                 outlength += 3;
1057                                 dups++;
1058                         }
1059                         else
1060                         {
1061                                 *outptr++ = ch;
1062                                 outlength += 2;
1063                         }
1064                         inptr++;
1065                         length--;
1066                         newwords++;
1067                 }
1068
1069                 if (!--count)
1070                 {
1071                         static char cc[2]="-";
1072                         cc[0]^='+'^'-';
1073                         print(cc);
1074                         sx--;
1075
1076                         count = 10;
1077
1078                  if (keydown[1])
1079                  {
1080                   while(keydown[1]);
1081                   return 0;
1082                  }
1083                 }
1084                 if (length<0)
1085                         Quit ("Length < 0!");
1086         } while (length);
1087         #if 0
1088         printf ("%u near patterns\n",nearrepeats);
1089         printf ("%u far patterns\n",farrepeats);
1090         printf ("%u new words\n",newwords);
1091         printf ("%u dups\n\n",dups);
1092         #endif
1093         return outlength;
1094 }
1095 #else
1096
1097
1098
1099 /*
1100 ==================
1101 =
1102 = CarmackCompress
1103 =
1104 = Compress a string of words
1105 =
1106 ==================
1107 */
1108
1109 #define NEARTAG 0xa7
1110 #define FARTAG  0xa8
1111
1112 long CarmackCompress (unsigned far *source,long length,
1113         unsigned far *dest)
1114 {
1115         unsigned        wch,chhigh;
1116         unsigned        inptrx;
1117         unsigned        far *instart, far *inptr, far *inscan,
1118                                 far *stopscan, far *outptr;
1119         unsigned        far *bestscan, beststring;
1120         unsigned        dist,maxstring,string,outlength;
1121         unsigned        nearrepeats,farrepeats;
1122         unsigned        dups,count;
1123         unsigned        newwords;
1124
1125 //
1126 // this compression method produces a stream of words
1127 // If the words high byte is NEARTAG or FARTAG, the low byte is a word
1128 // copy count from the a position specified by either the next byte
1129 // or the next word, respectively.  A copy count of 0 means to insert the
1130 // next byte as the low byte of the tag into the output
1131 //
1132
1133
1134 //
1135 // set up
1136 //
1137         instart = inptr = bestscan = source;
1138         outptr = dest;
1139
1140         outlength = 0;
1141         length = (length+1)/2;
1142
1143         nearrepeats = farrepeats = dups = 0;
1144         count = 10;
1145         newwords = 0;
1146 //
1147 // compress
1148 //
1149         do
1150         {
1151                 wch = *inptr;
1152                 inptrx = FP_OFF(inptr)+2;       // convienient for cmpsw
1153
1154 //
1155 // scan from start for patterns that match current data
1156 //
1157 //================
1158
1159 //
1160 // ax:
1161 // bx: beststring
1162 // cx: repeat counts
1163 // dx: holding spot for inscan repeat count
1164 // si: inptr
1165 // di: inscan
1166 //
1167 asm     xor     bx,bx                                   // beststring = 0
1168
1169 asm     les     di,[instart]                    // start looking for an identical string
1170                                                                 // at the start of uncompressed text
1171 asm     mov     ax,es
1172 asm     mov     ds,ax                                   // both DS and ES point to uncompressed data
1173
1174 asm     mov     cx,WORD PTR [inptr]
1175 asm     sub     cx,di
1176 asm     shr     cx,1                                    // cx = words between inscan and inptr
1177
1178 search:
1179 asm     mov     ax,[wch]                                // current uncompressed word
1180 asm     repne   scasw                           // search from DI for up to CX words for a
1181                                                                 // first char match
1182 asm     jcxz    done                            // no more to search
1183
1184 asm     mov     dx,cx                                   // save off remaining repeat count
1185                                                                 // the number in CX is the remaining words
1186                                                                 // to search for a long pattern
1187 asm     mov     si,[inptrx]                             // SI is the current source data
1188                                                                 // DI is the match in previously compressed
1189                                                                 // data
1190 asm     mov     ax,di                                   // save off DI so we can back up after scan
1191 asm     repe    cmpsw                           // decrement CX for each additional match
1192 asm     jne     ended
1193 asm     dec     cx                                              // the search ended because of CX, not bad match
1194 ended:
1195 asm     mov     di,ax                                   // back to original spot
1196 asm     mov     ax,dx
1197 asm     sub     ax,cx                                   // AX is the total number of matching words
1198 asm     cmp     ax,bx                                   // more chars than beststring?
1199 asm     jb      next
1200 asm     mov     bx,ax
1201 asm     mov     WORD PTR [bestscan],di  // bestscan is start of the best match+2
1202
1203 next:
1204 asm     mov     cx,dx                                   // restore the remaining count
1205 asm     jmp     search
1206
1207 done:
1208 asm     mov     ax,ss
1209 asm     mov     ds,ax
1210 asm     mov     [beststring],bx                 // save out the register variable
1211 asm     dec     bx
1212 asm     sub     WORD PTR [bestscan],2   // scasw incremented past start
1213 //================
1214
1215                 if (beststring>length)
1216                         beststring = length;
1217
1218                 if (beststring > 1 && inptr-bestscan <= 255)
1219                 {
1220                 // near string
1221                         *outptr++ = beststring + (NEARTAG*256);
1222                         *((unsigned char far *)outptr)++ = inptr-bestscan;
1223                         outlength += 3;
1224                         nearrepeats++;
1225                         inptr += beststring;
1226                         length -= beststring;
1227                 }
1228                 else if (beststring > 2)
1229                 {
1230                 // far string
1231                         *outptr++ = beststring + (FARTAG*256);
1232                         *outptr++ = bestscan-instart;
1233                         outlength += 4;
1234                         farrepeats++;
1235                         inptr += beststring;
1236                         length -= beststring;
1237                 }
1238                 else                                                    // no compression
1239                 {
1240                         chhigh = wch>>8;
1241                         if (chhigh == NEARTAG || chhigh == FARTAG)
1242                         {
1243                         // special case of encountering a
1244                         // tag word in the data stream
1245                         // send a length of 0, and follow it with the real low byte
1246                                 *outptr++ = wch & 0xff00;
1247                                 *((unsigned char far *)outptr)++ = wch&0xff;
1248                                 outlength += 3;
1249                                 dups++;
1250                         }
1251                         else
1252                         {
1253                                 *outptr++ = wch;
1254                                 outlength += 2;
1255                         }
1256                         inptr++;
1257                         length--;
1258                         newwords++;
1259                 }
1260
1261                 if (!--count)
1262                 {
1263                  static char cc[2]="-";
1264                  cc[0]^='+'^'-';
1265                  print(cc);
1266                  sx--;
1267
1268                  count = 10;
1269
1270                  if (keydown[1])
1271                  {
1272                   while(keydown[1]);
1273                   return 0;
1274                  }
1275                 }
1276         } while (length);
1277 #if 0
1278         printf ("\r%u near patterns\n",nearrepeats);
1279         printf ("%u far patterns\n",farrepeats);
1280         printf ("%u new words\n",newwords);
1281         printf ("%u dups\n\n",dups);
1282 #endif
1283         return outlength;
1284 }
1285 #endif
1286
1287