From: sparky4 Date: Fri, 13 Jun 2014 17:06:51 +0000 (-0500) Subject: modified: 16/DOS_GFX.EXE X-Git-Url: http://4ch.mooo.com/gitweb/?a=commitdiff_plain;h=b50a0eb714c64dee65050539243e02ef2aa308b5;p=16.git modified: 16/DOS_GFX.EXE modified: 16/DOS_GFX.OBJ modified: 16/Project 16.bfproject modified: 16/dos_gfx.cpp modified: 16/dos_gfx.h new file: 16/lib/x/MODEX.BAK new file: 16/scrasm/80X86.ASC new file: 16/scrasm/80X86.TXT new file: 16/scrasm/CONSTANT.INC new file: 16/scrasm/DIAGONAL.MAP new file: 16/scrasm/DIAGONAL.PAL new file: 16/scrasm/DIAGONAL.TIL new file: 16/scrasm/GENMAP.C new file: 16/scrasm/GENMAP.EXE new file: 16/scrasm/GENMAP.LNK new file: 16/scrasm/GENMAP.OBJ new file: 16/scrasm/GENPAL.C new file: 16/scrasm/GENPAL.EXE new file: 16/scrasm/GENPAL.LNK new file: 16/scrasm/GENPAL.OBJ new file: 16/scrasm/GENSQ.C new file: 16/scrasm/GENSQ.EXE new file: 16/scrasm/GENSQ.LNK new file: 16/scrasm/GENSQ.OBJ new file: 16/scrasm/INIT.INC new file: 16/scrasm/KEYB.INC new file: 16/scrasm/LZTIMER.ASM new file: 16/scrasm/LZTIMER.OBJ new file: 16/scrasm/MAIN.ASM new file: 16/scrasm/MAIN.OBJ new file: 16/scrasm/MAKEFILE new file: 16/scrasm/MAP.INC new file: 16/scrasm/MODEX.INC new file: 16/scrasm/PAGE.INC new file: 16/scrasm/PALETTE.INC new file: 16/scrasm/SCROLL.DOC new file: 16/scrasm/SCROLL.EXE new file: 16/scrasm/SCROLL.INC new file: 16/scrasm/SCROLL.LNK new file: 16/scrasm/SCROLL.MAP new file: 16/scrasm/SCROLL.PAL new file: 16/scrasm/SCROLL.TIL new file: 16/scrasm/SPRITE.INC --- diff --git a/16/DOS_GFX.EXE b/16/DOS_GFX.EXE index 13b89317..d4df1da3 100644 Binary files a/16/DOS_GFX.EXE and b/16/DOS_GFX.EXE differ diff --git a/16/DOS_GFX.OBJ b/16/DOS_GFX.OBJ index 35e3f4ed..f17a0b16 100644 Binary files a/16/DOS_GFX.OBJ and b/16/DOS_GFX.OBJ differ diff --git a/16/Project 16.bfproject b/16/Project 16.bfproject index b5009ceb..378d9de2 100644 --- a/16/Project 16.bfproject +++ b/16/Project 16.bfproject @@ -1,7 +1,7 @@ c2e.convert_special: 0 e2c.convert_num: 0 -openfiles: /dos/z/16/16/dos_gfx.cpp:8135:7436:1: -openfiles: /dos/z/16/16/dos_gfx.h:327:0:0: +openfiles: /dos/z/16/16/dos_gfx.cpp:1620:647:1: +openfiles: /dos/z/16/16/dos_gfx.h:665:373:0: openfiles: /dos/z/16/16/dos_kb.c:1039:46:0: openfiles: /dos/z/16/16/dos_kb.h:23:0:0: openfiles: /dos/z/16/16/lib/lib_com.cpp:0:0:0: @@ -80,25 +80,25 @@ recent_files: file:///dos/z/16/16/xx.bat recent_files: file:///dos/z/16/16/lib/x/MXPN.ASM recent_files: file:///dos/z/4x4_16/!/c/TUT10.C recent_files: file:///dos/z/16/16/lib/x/MXVS.ASM -recent_files: file:///dos/z/16/16/lib/x/MODEX.H -recent_files: file:///dos/z/4x4_16/modex/DEMO01.PAS -recent_files: file:///dos/z/4x4_16/modex/DEMO07.PAS +recent_files: file:///dos/z/16/16/lib/x/MAKEFILE +recent_files: file:///dos/z/16/16/lib/x/MODEX.DEF recent_files: file:///dos/z/16/16/dos_gfx.h +recent_files: file:///dos/z/16/16/dos_gfx.cpp recent_files: file:///dos/z/16/16/dos_kb.c recent_files: file:///dos/z/16/16/dos_kb.h recent_files: file:///dos/z/16/16/lib/lib_com.cpp -recent_files: file:///dos/z/16/16/lib/lib_com.h recent_files: file:///dos/z/16/16/16.txt +recent_files: file:///dos/z/16/16/lib/lib_com.h recent_files: file:///dos/z/16/16/scroll.txt recent_files: file:///dos/z/16/16/project16.txt -recent_files: file:///dos/z/16/16/lib/intro/lib.c -recent_files: file:///dos/z/16/src/lib/dos_gfx.h +recent_files: file:///dos/z/16/16/lib/x/MODEX.H +recent_files: file:///dos/z/4x4_16/modex/DEMO07.PAS recent_files: file:///dos/z/16/16/lib/x/MXBB.ASM -recent_files: file:///dos/z/16/src/lib/dos_gfx.cpp +recent_files: file:///dos/z/16/src/lib/dos_gfx.h +recent_files: file:///dos/z/4x4_16/modex/DEMO01.PAS recent_files: file:///dos/z/16/16/lib/x/MXCR.ASM -recent_files: file:///dos/z/16/16/dos_gfx.cpp -recent_files: file:///dos/z/16/16/lib/x/MAKEFILE -recent_files: file:///dos/z/16/16/lib/x/MODEX.DEF +recent_files: file:///dos/z/16/16/lib/intro/lib.c +recent_files: file:///dos/z/16/src/lib/dos_gfx.cpp snr_replacetype: 0 savedir: file:///dos/z/16/16 spell_check_default: 1 diff --git a/16/dos_gfx.cpp b/16/dos_gfx.cpp index 41b7d533..52e259db 100644 --- a/16/dos_gfx.cpp +++ b/16/dos_gfx.cpp @@ -48,10 +48,11 @@ void setvideo(/*byte mode, */short vq){ mxSetMode( MX_320x240 ); // mxSetVirtualScreen(SW+(SW/4), SH+(SH/4)); // mxSetVirtualScreen(SW*2, SH*2); - mxSetVirtualScreen(VW,(VH+(TILEWH*BUFFMX))); + mxSetVirtualScreen(VW,BH); // mxSetVirtualScreen((640-(TILEWH*4)),(480-(TILEWH*4))); mxSetClip(true); - mxSetClipRegion(0, 0, VW, (VH+(TILEWH*BUFFMX))); + mxSetClipRegion(0, 0, VW, BH); + mxPan(TILEWH*2,TILEWH*2); //mxSetClipRegion(0, VH+1, VW, (TILEWH*BUFFMX)); } } @@ -212,17 +213,17 @@ short ding(int q){ } } // fixer - if(q!=16){ + if(q!=16){ #ifdef TILE if(xx<0) xx=(VW-TILEWH); if(yy<0) yy=(VH-TILEWH); if(xx>(VW-TILEWH)) xx=0; - if(yy>(VH-TILEWH)/*+(TILEWH*BUFFMX)*/) yy=0; - #else + if(yy>(VH-TILEWH)/*+(TILEWH*BUFFMX)*/) yy=0; + #else if(xx<0) xx=VW; if(yy<0) yy=VH; if(xx>VW) xx=0; - if(yy>VH) yy=0; + if(yy>VH) yy=0; #endif } @@ -272,8 +273,8 @@ int main(void) // main variables d=4; // switch variable key=4; // default screensaver number - xpos=0; - ypos=0; + xpos=TILEWH*2; + ypos=TILEWH*2; xdir=1; ydir=1; setvideo(1); @@ -319,21 +320,21 @@ int main(void) mxPutPixel(VW-1, y, 15); } - getch(); + getch(); //text box - mxSetTextColor(10, OP_TRANS); //set font - mxBitBlt(xpos, ypos+(TILEWH*12), 320, TILEWH*BUFFMX, 0, VH); //copy background - mxFillBox(xpos, ypos+(TILEWH*12), 320, TILEWH*BUFFMX, 0, OP_SET); // background for text box - //+(QUADWH*6) - mxOutText(xpos+1, ypos+SH-48, "========================================"); + mxSetTextColor(10, OP_TRANS); //set font + mxBitBlt(xpos, ypos+(TILEWH*12), 320, TILEWH*BUFFMX, 0, BS); //copy background + mxFillBox(xpos, ypos+(TILEWH*12), 320, TILEWH*BUFFMX, 0, OP_SET); // background for text box + //+(QUADWH*6) + mxOutText(xpos+1, ypos+SH-48, "========================================"); mxOutText(xpos+1, ypos+SH-40, "| |Chikyuu:$line1"); mxOutText(xpos+1, ypos+SH-32, "| |$line2"); mxOutText(xpos+1, ypos+SH-24, "| |$line3"); - mxOutText(xpos+1, ypos+SH-16, "| |$line4"); - mxOutText(xpos+1, ypos+SH-8, "========================================"); - mxFillBox(xpos+QUADWH, ypos+QUADWH+(TILEWH*12), TILEWH*2, TILEWH*2, 9, OP_SET); - getch(); - mxBitBlt(0, VH, 320, TILEWH*BUFFMX, xpos, ypos+(TILEWH*12)); //copy background + mxOutText(xpos+1, ypos+SH-16, "| |$line4"); + mxOutText(xpos+1, ypos+SH-8, "========================================"); + mxFillBox(xpos+QUADWH, ypos+QUADWH+(TILEWH*12), TILEWH*2, TILEWH*2, 9, OP_SET); //portriat~ + getch(); + mxBitBlt(0, BS, 320, TILEWH*BUFFMX, xpos, ypos+(TILEWH*12)); //copy background //mxBitBlt(0, (TILEWH*12)+1, 320, TILEWH*3, 0, 0); getch(); while(!kbhit()){ @@ -341,18 +342,19 @@ int main(void) // scrolly(1); // vScroll(1); // delay(100); - //for(int i=0;i(VW-SW-1)) || (xpos<1))delay(500); + mxWaitRetrace(); //} if( (xpos>(VW-SW-1)) || (xpos<1)){xdir=-xdir;} - if( (ypos>((VH+(TILEWH*BUFFMX))-SH-1)) || (ypos<1)){ydir=-ydir;} // { Hit a boundry, change + if( (ypos>(BH-SH-1)) || (ypos<1)){ydir=-ydir;} // { Hit a boundry, change // direction! } } ch=getch(); @@ -360,7 +362,8 @@ int main(void) if(ch==0x1b)break; // 'ESC' } setvideo(0); - printf("wwww\nVirtual Resolution: %dx%d\n", VW,VH); + printf("wwww\nFull Buffer Virtual Resolution: %dx%d\n", VW,BH); + printf("Virtual Resolution: %dx%d\n", VW,VH); printf("Resolution: %dx%d\n", SW,SH); printf("Mode X Library Version: %d\n", mxGetVersion()); printf("bakapi ver. 1.04.09.04\nis made by sparky4i†ƒÖ…j feel free to use it ^^\nLicence: GPL v2\n"); diff --git a/16/dos_gfx.h b/16/dos_gfx.h index b9045057..231c1061 100644 --- a/16/dos_gfx.h +++ b/16/dos_gfx.h @@ -21,8 +21,12 @@ #define SW 320 #define SH 240 -#define VW 560 -#define VH 416 +//#define VW 560 +//#define VH 416 +#define VW (SW+64) +#define VH (SH+64) +#define BS (VH*2) // buffer space! not BULLSHIT +#define BH BS+(TILEWH*BUFFMX) // buffer resolution //void drawChar(int x, int y, int color, byte c); //void drawText(int x, int y, int color, byte string); diff --git a/16/lib/x/MODEX.BAK b/16/lib/x/MODEX.BAK new file mode 100644 index 00000000..560a1c68 Binary files /dev/null and b/16/lib/x/MODEX.BAK differ diff --git a/16/scrasm/80X86.ASC b/16/scrasm/80X86.ASC new file mode 100644 index 00000000..e16af972 --- /dev/null +++ b/16/scrasm/80X86.ASC @@ -0,0 +1,164 @@ +_80x86 OPTIMIZATION_ +by Michael Abrash + + +[LISTING ONE] + +; Copies one string to another string, converting all characters to +; uppercase in the process, using a loop containing LODSB and STOSB. +; Adapted from Zen of Assembly Language, by Michael Abrash; not a +; standalone program, but designed to be used with the Zen timer from +; that book via the Zen timer's PZTIME.BAT batch file: ZTimerOn starts +; the clock, ZTimerOff stops it, and the test-bed program linked in by +; PZTIME.BAT starts the program, reports the results, and ends. + + jmp Skip ;skip over data in CS and subroutine + +SourceString label word ;sample string to copy + db 'This space intentionally left not blank',0 +DestString db 100 dup (?) ;destination for copy + +; Copies one zero-terminated string to another string, +; converting all characters to uppercase. +; Input: DS:SI = start of source string; DS:DI = start of destination buffer +; Output: none +; Registers altered: AX, BX, SI, DI, ES +; Direction flag cleared + +CopyStringUpper: + mov ax,ds + mov es,ax ;for STOS + mov bl,'a' ;set up for fast register-register + mov bh,'z' ; comparisons + cld +StringUpperLoop: + lodsb ;get next character and point to following character + cmp al,bl ;below 'a'? + jb IsUpper ;yes, not lowercase + cmp al,bh ;above 'z'? + ja IsUpper ;yes, not lowercase + and al,not 20h ;is lowercase-make uppercase +IsUpper: + stosb ;put character into new string and point to + ; following location + and al,al ;is this the zero that marks end of the string? + jnz StringUpperLoop ;no, do the next character + ret + +; Calls CopyStringUpper to copy & convert SourceString->DestString. +Skip: + call ZTimerOn ;start timing + mov si,offset SourceString ;point SI to the string to copy from + mov di,offset DestString ;point DI to the string to copy to + call CopyStringUpper ;copy & convert to uppercase + call ZTimerOff ;stop timing + + +[LISTING TWO] + +; Copies one string to another string, converting all characters to +; uppercase in the process, using no string instructions. +; Not a standalone program, but designed to be used with the Zen +; timer, as described in Listing 1. + + jmp Skip ;skip over data in CS and subroutine + +SourceString label word ;sample string to copy + db 'This space intentionally left not blank',0 +DestString db 100 dup (?) ;destination for copy + +; Copies one zero-terminated string to another string, +; converting all characters to uppercase. +; Input: DS:SI = start of source string; DS:DI = start of destination string +; Output: none +; Registers altered: AL, BX, SI, DI + +CopyStringUpper: + mov bl,'a' ;set up for fast register-register + mov bh,'z' ; comparisons +StringUpperLoop: + mov al,[si] ;get the next character and + inc si ; point to the following character + cmp al,bl ;below 'a'? + jb IsUpper ;yes, not lowercase + cmp al,bh ;above 'z'? + ja IsUpper ;yes, not lowercase + and al,not 20h ;is lowercase-make uppercase +IsUpper: + mov [di],al ;put the character into the new string and + inc di ; point to the following location + and al,al ;is this the zero that marks the end of the string? + jnz StringUpperLoop ;no, do the next character + ret + +; Calls CopyStringUpper to copy & convert SourceString->DestString. +Skip: + call ZTimerOn + mov si,offset SourceString ;point SI to the string to copy from + mov di,offset DestString ;point DI to the string to copy to + call CopyStringUpper ;copy & convert to uppercase + call ZTimerOff + + +[LISTING THREE] + +; Clears a buffer using MOV/ADD in a loop. +; Not a standalone program, but designed to be used with the Zen +; timer, as described in Listing 1. + + mov dx,2 ;repeat the test code twice, to make + ; sure it's in the cache (if there is one) + mov bx,dx ;distance from the start of one word + ; to the start of the next + sub ax,ax ;set buffer to zeroes +TestTwiceLoop: + mov cx,1024 ;clear 1024 words starting at address + mov di,8000h ; DS:8000h (this is just unused memory + ; past the end of the program) + call ZTimerOn ;start timing (resets timer to 0) +StoreLoop: + mov [di],ax ;clear the current word + add di,bx ;point to the next word + dec cx ;count off words to clear until none + jnz StoreLoop ; remain + call ZTimerOff ;stop timing + dec dx ;count off passes through test code + jz StoreDone ;that was the second pass; we're done + jmp TestTwiceLoop ;that was first pass; do second pass with all + ; instructions and data in the cache +StoreDone: + + +[LISTING FOUR] + +; Clears a buffer using MOV/ADD in an unrolled loop. +; Not a standalone program, but designed to be used with the Zen +; timer, as described in Listing 1. + + mov dx,2 ;repeat the test code twice, to make + ; sure it's in the cache (if there is one) + mov bx,dx ;distance from the start of one word + ; to the start of the next + sub ax,ax ;set buffer to zeroes +TestTwiceLoop: + mov si,1024 ;clear 1024 words starting at address + mov di,8000h ; DS:8000h (this is just unused memory + ; past the end of the program) + call ZTimerOn ;start timing (resets timer to 0) + mov cl,4 ;divide the count of words to clear by + shr si,cl ; 16, because we'll clear 16 words + ; each time through the loop +StoreLoop: + REPT 16 ;clear 16 words in a row without looping + mov [di],ax ;clear the current word + add di,bx ;point to the next word + ENDM + dec si ;count off blocks of 16 words to clear + jnz StoreLoop ; until none remain + call ZTimerOff ;stop timing + dec dx ;count off passes through test code + jz StoreDone ;that was the second pass; we're done + jmp TestTwiceLoop ;that was the first pass; do the second pass + ; with all instructions and data in the cache +StoreDone: + diff --git a/16/scrasm/80X86.TXT b/16/scrasm/80X86.TXT new file mode 100644 index 00000000..afba3707 --- /dev/null +++ b/16/scrasm/80X86.TXT @@ -0,0 +1,494 @@ +Journal: Dr. Dobb's Journal March 1991 v16 n3 p16(8) +----------------------------------------------------------------------------- +Title: 80x86 optimization: aim down the middle and pray. (80x86 family of + microprocessors) (tutorial) +Author: Abrash, Michael. +AttFile: Program: 80X86.ASC Source code listing. + +Summary: Optimizing code for 8088, 80286, 80386 and 80486 microprocessors + is difficult because the chips use significantly different memory + architectures and instruction execution times. Code cannot be + optimized for the 80x86 family; rather, code must be designed to + produce good performance on a range of systems or optimized for + particular combinations of processors and memory. Programmers + must avoid the unusual instructions supported by the 8088, which + have lost their performance edge in subsequent chips. String + instructions should be used but not relied upon. Registers should + be used rather than memory operations. Branching is also slow for + all four processors. Memory accesses should be aligned to improve + performance. Generally, optimizing an 80486 requires exactly the + opposite steps as optimizing an 8088. +----------------------------------------------------------------------------- +Descriptors.. +Company: Intel Corp. (Products). +Ticker: INTC. +Product: Intel 80286 (Microprocessor) (Programming) + Intel 80386 (Microprocessor) (Programming) + Intel 80486 (Microprocessor) (Programming) + Intel 8088 (Microprocessor) (Programming). +Topic: Microprocessors + Optimization + Programming + Tutorial + Assembly Language + Guidelines + Type-In Programs + Microcode + Processor Architecture. +Feature: illustration + graph. +Caption: Official and actual cycles per binary-to-hex ASCII conversion. + (graph) + Actual performance in microseconds of two solutions to a problem. + (graph) + Actual performance of three clearing approaches across the 80x86 + family. (graph) + +----------------------------------------------------------------------------- +Full Text: + +Optimization + +Picture this: You're an archer aiming at a target 100 feet away. A strong +wind comes up and pushes each arrow to the left as it flies. Naturally, you +compensate by aiming farther to the right. That's what it's like optimizing +for the 8088; once you learn to compensate for the strong but steady effects +of the prefetch queue and the 8-bit bus, you can continue merrily on your +programming way. + +Now the wind starts gusting unpredictably. There's no way to compensate, so +you just aim for the bull's-eye and hope for the best. That's what it's like +writing code for good performance across the entire 80x86 family, or even for +the 286/386SX/386 heart of today's market. You just aim down the middle and +pray. + +The New World of the 80x86 + +In the beginning, the 8088 was king, and that was good. The optimization +rules weren't obvious, but once you learned them, you could count on them +serving you well on every computer out there. + +Not so these days. There are four major processor types--8088, 80286, 80386, +and 80486--with a bewildering array of memory architectures: cached (in +several forms), page mode, static-column RAM, interleaved, and, of course, +the 386SX, with its half-pint memory interface. The processors offer wildly +differing instruction execution times, and memory architectures warp those +times further by affecting the speed of instruction fetching and access to +memory operands. Because actual performance is a complex interaction of +instruction characteristics, instruction execution times, and memory access +speed, the myriad processor-memory combinations out there make "exact +performance" a meaningless term. A specific instruction sequence may run at +a certain speed on a certain processor in a certain system, but that often +says little about the performance of the same instructions on a different +processor, or even on the same processor with a different memory system. The +result: Precise optimization for the general PC market is a thing of the +past. (We're talking about optimizing for speed here; optimizing for size is +the same for all processors so long as you stick to 8088-compatible code.) + +So there is no way to optimize performance ideally across the 80x86 family. +An optimization that suits one processor beautifully is often a dog on +another. Any 8088 programmer would instinctively replace: + +DEC CX JNZ LOOPTOP + +with: + +LOOP LOOPTOP + +because LOOP is significantly faster on the 8088. LOOP is also faster on the +286. On the 386, however, LOOP is actually two cycles slower than DEC/JNZ. +The pendulum swings still further on the 486, where LOOP is about twice as +slow as DEC/JNZ--and, mind you, we're talking about what was originally +perhaps the most obvious optimization in the entire 80x86 instruction set. + +In short, there is no such thing as code that's truly optimized for the +80x86. Instead, code is either optimized for specific processor-memory +combinations, or aimed down the middle, designed to produce good performance +across a range of systems. Optimizing for the 80x86 family by aiming down +the middle is quite different from optimizing for the 8088, but many PC +programmers are inappropriately still applying the optimization lore they've +learned over the years on the PC (or AT). The world has changed, and many of +those old assumptions and tricks don't hold true anymore. + +You will not love the new world of 80x86 optimization, which is less precise +and offers fewer clever tricks than optimizing for the 8088 alone. Still, +isn't it better to understand the forces affecting your code's performance +out in the real world than to optimize for a single processor and hope for +the best? + +Better, yes. As much fun, no. Optimizing for the 8088 was just about as +good as it gets. So it goes. + +Optimization Rules for a New World + +So, how do you go about writing fast code nowadays? One way is to write +different versions of critical code for various processors and memory access +speeds, selecting the best version at runtime. That's a great solution, but +it requires an awful lot of knowledge and work. + +An alternative is to optimize for one particular processor and settle for +whatever performance you get on the others. This might make sense when the +8088 is the target processor because it certainly needs the optimization more +than any other processor. However, 8088 optimization works poorly at the +upper end of the 80x86 family. + +Nowadays, though, most of us want to optimize for the 286 and 386 systems +that dominate the market, or across all 80x86 processors, and that's a tough +nut to crack. The 286 and 386 come in many configurations, and you can be +sure, for example, that a 386SX, an interleaved 386, and a cached 386 have +markedly different performance characteristics. There are, alas, no hard and +fast optimization rules that apply across all these environments. + +My own approach to 80x86 optimization has been to develop a set of general +rules that serve reasonably well throughout the 80x86 line, especially the +286 and 386, and to select a specific processor (in my case a cached 386, for +which cycle times tend to be accurate) to serve as the tiebreaker when +optimization details vary from one processor to another. (Naturally, it's +only worth bothering with these optimizations in critical code.) The rules +I've developed are: + +* Avoid accessing memory operands; use the registers to the max. + +* Don't branch. + +* Use string instructions, but don't go much out of your way to do so. + +* Keep memory accesses to a minimum by avoiding memory operands and keeping +instructions short. + +* Align memory accesses. + +* Forget about many of those clever 8088 optimizations, using oddball +instructions such as DAA and XLAT, that you spent years learning. + +Next I'll discuss each of these rules in turn in the context of +8088-compatible real mode, which is still the focus of the 80x86 world. +Later, I'll touch on protected mode. + +Let's start by looking at the last--and most surprising--rule. + +Kiss Those Tricks Goodbye + +To skilled assembly language programmers, the 8088 is perhaps the most +wonderful processor ever created, largely because the instruction set is +packed with odd instructions that are worthless to compilers but can work +miracles in the hands of clever assembly programmers. Unfortunately, each +new generation of the 80x86 has rendered those odd instructions and marvelous +tricks less desirable. As the execution time for the commonly used +instruction ADD BX, 4 has gone down from four cycles (8088) to three cycles +(286) to two cycles (386) to one cycle (486), the time for the less +frequently used instruction CBW has gone from two cycles (8088 and 286) up to +three cycles (386 and 486)! + +Consider this ancient optimization for converting a binary digit to hex +ASCII: + +ADD AL,90H DAA ADC AL,40H DAA + +Now consider the standard alternative: + +ADD AL,'0' CMP AL,'9' JBE HaveAscii ADD AL,'A'-('9'+1) HaveAscii: + +As Figure 1 indicates, the standard code should be slower on an 8088 or 286, +but faster on a 386 or a 486--and real-world tests confirm those results, as +shown in Figure 2. (All "actual performance" timings in this article were +performed with the Zen timer from Zen of Assembly Language, see "References" +for details. The systems used for the tests were: 8088, standard 4.77 MHz PC +XT; 80286, standard one-wait-state, 8 MHz PC AT; 386SX, 16 MHz noncached; +80386, 20 MHz externally cached with all instructions and data in external +cache for all tests except Listings One and Two; 80486, 25 MHz internally +cached, with all instructions and data in internal cache for all tests except +Listings One and Two.) + +In other words, this nifty, time-tested optimization is an anti-optimization +on the 386 and 486. + +Why is this? On the 386, DAA--a rarely used instruction--takes four cycles, +and on the 486 it takes two cycles, in both cases twice as long as the more +common instructions CMP and ADD; in contrast, on the 8088 all three +instructions are equally fast at four cycles. Also, the instruction-fetching +advantage that the 1-byte DAA provides on the 8088 means nothing on a cached +386. + +Nor is this an isolated example. Most oddball instructions, from AAA to +XCHG, have failed to keep pace with the core instructions--ADC, ADD, AND, +CALL, CMP, DEC, INC, Jcc, JMP, LEA, MOV, OR, POP, PUSH, RET, SBB, SUB, TEST, +and XOR--during the evolution from 8088 to 486. As we saw earlier, even LOOP +lags behind on the 386 and 486. Check your favorite tricks for yourself; +they might or might not hold up on the 386, but will most likely be +liabilities on the 486. Sorry, but I just report the news, and the news is: +Kiss most of those tricks goodbye as the 386 and 486 come to dominate the +market. (This means that hand-optimization in assembly language yields less +of a performance boost nowadays than it did when the 8088 was king; the +improvement is certainly significant, but rarely in the 200-500 percent range +anymore. Sic transit gloria mundi.) Most startling of all, string +instructions lose much of their allure as we move away from the 8088, hitting +bottom on the 486. + +The 486: All the Rules Change + +The 486 represents a fundamental break with 8088-style optimization. +Virtually all the old rules fail on the 486, where, incredibly, a move to or +from memory often takes just one cycle, but exchanging two registers takes +three cycles. The nonbranching core instructions mentioned earlier take only +one cycle on the 486 when operating on registers; MOV can, under most +conditions, access memory in one cycle; and CALL and JMP take only three +cycles, given a cache hit. However, noncore instructions take considerably +longer. XLAT takes four cycles; even STC and CLC take two cycles each. The +486's highly asymmetric execution times heavily favor core instructions and +defeat most pre-486 optimizations. + +Core instructions do have a weakness on the 486. While 486 MOVs involving +memory are remarkably fast, accessing memory for an operand to OR, ADD, or +the like costs cycles. Even with the 8K internal cache, memory is not as +fast as registers, except when MOV is used (and sometimes not even then), so +registers are still preferred operands. (AND [BX],1 is fast, at only three +cycles, but AND BX,1 takes only one cycle--three times as fast.) + +OUT should be avoided whenever possible on the 486, and likewise for IN. OUT +takes anywhere from 10 to 31 cycles, depending on processor mode and +privileges, more than an order of magnitude slower than MOV. The lousy +performance of OUT -- true on the 386 as well -- has important implications +for graphics applications. + +String instructions are so slow on the 486 that you should check cycle times +before using any string instruction other than the always superior REP MOV's. +For example, LODSB takes five cycles on the 486, but MOV AL,[SI]/INC SI takes +only two cycles; likewise for STOSB and MOV [DI],AL/INC DI. Listing One +(page 73) uses LODSB/STOSB to copy a string, converting lowercase to +uppercase while copying; Listing Two (page 73) uses MOV/INC instead. Figure +3 summarizes the performance of the two routines on a variety of processors; +note the diminishing effectiveness of string instructions on the newer +processors. Think long and hard before using string instructions other than +REP MOVS on the 486. + +Optimization for the 486 is really a whole new ball game. When optimizing +across the 80x86 family, the 486 will generally be the least of your worries +because it is so much faster than the rest of the family; anything that runs +adequately on any other processor will look terrific on the 486. Still, the +future surely holds millions of 486s, so it wouldn't hurt to keep one eye on +the 486 as you optimize. + +String Instructions: Fading Stars + +On the 8088, string instructions are so far superior to other instructions +that it's worth going to great lengths to use them, but they lose much of +that status on newer processors. One of the best things about string +instructions on the 8088 is that they require little instruction fetching, +because they're 1-byte instructions and because of the REP prefix; however, +instruction fetching is less of a bottleneck on newer processors. String +instructions also have superior cycle times on the 8088, but that advantage +fades on the 286 and 386 as well. + +On the 286, string instructions (when they do exactly what you need) are +still clearly better than the alternatives. On the 386, however, some string +instructions are, even under ideal circumstances, the best choice only by a +whisker, if at all. For example, since Day One, clearing a buffer has been +done with REP STOS. That's certainly faster than the looping MOV/ADD +approach shown in Listing Three (page 73), but on the 386 and 486 it's no +faster than the unrolled loop MOV/ADD approach of Listing Four (page 73), as +shown in Figure 4. (Actually, in my tests REP STOS was a fraction of a cycle +slower on the 386, and fractionally faster on the 486.) REP STOS is much +easier to code and more compact, so it's still the approach of choice for +buffer clearing--but it's not necessarily fastest on a 486 or fast-memory +386. This again demonstrates just how unreliable the old optimization rules +are on the newer processors. + +The point is not that you shouldn't use string instructions on the 386. REP +MOVs is the best way to move data, and the other string instructions are +compact and usually faster, especially on uncached systems. However, on the +386 it's no longer worth going to the trouble of juggling registers and +reorganizing data structures to use string instructions. Furthermore, when +you truly need maximum performance on the 386, check out nonstring +instructions in unrolled loops. It goes against every lesson learned in a +decade of 8088 programming, but avoiding string instructions sometimes pays +on the 386. + +The Siren Song of Memory Accesses + +Finally, here's a rule that's constant from the 8088 to the 486: Use the +registers. Avoid memory. + +Don't be fooled by the much faster memory access times of the 286 and 386. +The effective address calculation time of the 8088 is mostly gone, so MOV +AX,[BX] takes only five cycles on the 286, and ADD [SI],DX takes only seven +on the 386. That's so much faster than the 17 and 29 cycles, respectively, +that they take on the 8088 that you might start thinking that memory is +pretty much interchangeable with registers. + +Think again. MOV AX,BX is still more than twice as fast as MOV AX,[BX] on +the 286, and ADD SI,DX is more than three times as fast as ADD [SI],DX on the +386. Memory operands can also reduce performance by slowing instruction +fetching. Memory is fast on the 286 and 386. Registers are faster. Use +them as heavily as possible. + +Don't Branch + +Here's another rule that stays the same across the 80x86 family: Don't +branch. Branching suffers on the 8088 from lengthy cycle counts and emptying +the prefetch queue. Emptying the prefetch queue is a lesser but nonetheless +real problem in the post-8088 world, and the cycle counts of branches are +still killers. As Figure 4 indicates, it pays to eliminate branches by +unrolling loops or using repeated string instructions. + +Modern-Day Instruction Fetching + +Instruction fetching is the bugbear of 8088 performance; the 8088 simply +can't fetch instruction bytes as quickly as it can execute them, thanks to +its undersized bus. Minimizing all memory accesses, including instruction +fetches, is paramount on the 8088. + +Instruction fetching is less of a problem nowadays. Figure 5 shows the +maximum rates at which various processors can fetch instruction bytes; +clearly, matters have improved considerably since the 8088, although +instructions also execute in fewer cycles on the newer processors. Fetching +problems can occur on any 80x86 processor, even the 486, but the only +processors other than the 8088 that face major instruction fetching problems +are the one-wait-state 286 and the 386SX, although uncached 386s may also +outrun memory. However, the problems here are different from and less +serious than with the 8088. + +Consider: An 8088 executes a register ADD in three cycles, but requires eight +cycles to fetch that instruction, a fetch/execute ratio of 2.67. A +one-wait-state 286 requires three cycles to fetch a register ADD and executes +it in two cycles, a ratio of 1.5. A 386SX can fetch a register ADD in two +cycles, matching the execution time nicely, and a cached 386 can fetch two +register ADDs in the two cycles it takes to execute just one. For +register-only code--the sort of code critical loops should contain--the 386 +generally runs flat out, and the 286 and 386SX usually (not always, but +usually) outrun memory by only a little at worst. Greater fetching problems +can arise when working with large instructions or instruction sequences that +access memory nonstop, but those are uncommon in critical code. This is a +welcome change from the 8088, where small, register-only instructions tend to +suffer most from inadequate instruction fetching. + +Also, uncached 386 systems often use memory architectures that provide +zero-wait-state performance when memory is accessed sequentially. In +register-only code, instruction fetches are the only memory accesses, so +fetching proceeds at full speed when the registers are used heavily. + +So, is instruction fetching a problem in the post-8088 world? Should +instructions be kept short? + +Yes. Smaller instructions can help considerably on the one-wait-state 286 +and on the 386SX. Not as much as on the 8088, but it's still worth the +trouble. Even a cached 386 can suffer from fetching problems, although +that's fairly uncommon. For example, when several MOV WORD PTR [MemVar],0 +instructions are executed in a row, as might happen when initializing memory +variables, performance tends to fall far below rated speed, as shown in +Figure 6. The particular problem with MOV WORD PTR [MemVar],0 is that it +executes in just two (386) or three (286) cycles, yet has both an addressing +displacement field and a constant field. This eats up memory bandwidth by +requiring more instruction fetching. It also accesses memory, eating up +still more bandwidth. We'll see this again, and worse, when we discuss +protected mode. + +Generally, though, post-8088 processors with fast memory systems and +full-width buses run most instructions at pretty near their official cycle +times; for these systems, optimization consists mostly of counting cycles. +Slower memory or constricted buses (as in the 386SX) require that memory +accesses (both instruction fetches and operand accesses) be minimized as +well. Fortunately, the same sort of code--register only--meets both +requirements. + +Use the registers. Avoid constants. Avoid displacements. Don't branch. +That's the big picture. Don't sweat the details. + +Alignment: The Easy Optimization + +The 286, 386SX, and 386 take twice as long to access memory words at odd +addresses as at even addresses. The 386 takes twice as long to access memory +dwords at addresses that aren't multiples of four as those that are. You +should use ALIGN 2 to word align all word-sized data, and ALIGN 4 to dword +align all data that's accessed as a dword operand, as in: + +ALIGN 4 MemVar dd ? : MOV EAX,[MemVar] + +Alignment also applies to code; you may want to word or dword align the +starts of procedures, labels that can only be reached by branching, and the +tops of loops. (Code alignment matters only at branch targets, because only +the first instruction fetch after a branch can suffer from nonalignment.) +Dword alignment of code is optimal, and will help on the 386 even in real +mode, but word alignment will produce nearly as much improvement as dword +alignment without wasting nearly as many bytes. + +Alignment improves performance on many 80x86 systems without hindering it on +any. Recommended. + +Protected Mode + +There are two sorts of protected mode, 16-bit and 32-bit. The primary +optimization characteristic of 16-bit protected mode (OS/2 1.X, Rational DOS +Extender) is that it takes an ungodly long time to load a segment register +(for example, MOV ES,AX takes 17 cycles on a 286) so load segment registers +as infrequently as possible in 16-bit protected mode. + +Optimizing for 32-bit protected mode (OS/2 2.0, SCO Unix, Phar Lap DOS +Extender) is another matter entirely. Typically, no segment loads are needed +because of the flat address space. However, 32-bit protected mode code can +be bulky, and that can slow instruction fetching. Constants and addressing +displacements can be as large as 4 bytes each, and an extra byte, the SIB +byte, is required whenever two 32-bit registers are used to address an +operand or scaled addressing is used. So, for example, MOV DWORD PTR +[MemVar],0 is a 10-byte instruction in 32-bit protected mode. The +instruction is supposed to execute in two cycles, but even a 386 needs four +to six cycles to fetch it, plus another two cycles to access memory; a few +such instructions in a row can empty the prefetch queue and slow performance +considerably. The slowdown occurs more quickly and is more acute on a 386SX, +which needs 14 cycles to perform the memory accesses for this nominally +2-cycle instruction. + +Code can get even larger when 32-bit instructions are executed in 16-bit +segments, adding prefix bytes. (Avoid prefix bytes if you can; they increase +instruction size and can cost cycles.) Figure 7 shows actual versus nominal +cycle times of multiple MOV DWORD PTR [EBX*4+MemVar],0 instructions running +in a 16-bit segment. Although cache type (write-back, write-through) and +main-memory write time also affect the performance of stores to memory, there +is clearly a significant penalty for using several large (in this case, +13-byte) instructions in a row. + +Fortunately, this is a worst case, easily avoided by keeping constants and +displacements out of critical loops. For example, you should replace: + +ADDLOOP: MOV DWORD PTR BaseTable[EDX+EBX],0 ADD EBX,4 DEC ECX JNZ ADDLOOP + +with: + +LEA EBX,BaseTable[EDX+EBX] SUB EAX,EAX ADDLOOP: MOV [EBX],EAX ADD EBX,4 +DEC ECX JNZ ADDLOOP + +Better yet, use REP STOSD or unroll the loop! + +Happily, register-only instructions are no larger in 32-bit protected mode +than otherwise and run at or near their rated speed in 32-bit protected mode +on all processors. All in all, in protected mode it's more important than +ever to avoid large constants and displacements and to use the registers as +much as possible. + +Conclusion + +Optimization across the 80x86 family isn't as precise as 8088 optimization, +and it's a lot less fun, with fewer nifty tricks and less spectacular +speed-ups. Still, familiarity with the basix 80x86 optimization rules can +give you a decided advantage over programmers still laboring under the +delusion that the 286, 386, and 486 are merely faster 8088s. + +References + +Abrash, Michael. Zen of Assembly Language. Glenview, Ill.: Scott, Foresman, +1990. + +Barrenechea, Mark. "Peak Performance: On to the 486." Programmer's Journal, +(November-December 1990). + +Paterson, Tim. "Assembly Language Tricks of the Trade." Dr. Dobb's Journal +(March 1990). + +Turbo Assembler Quick Reference Guide. Borland International, 1990. + +i486 Microprocessor Programmer's Reference Manual. Intel Corporation, 1989. + +80386 Programmer's Reference Manual. Intel Corporation, 1986. + +Microsystems Components Handbook: Microprocessors Volume I. Intel +Corporation, 1985. diff --git a/16/scrasm/CONSTANT.INC b/16/scrasm/CONSTANT.INC new file mode 100644 index 00000000..02ce404b --- /dev/null +++ b/16/scrasm/CONSTANT.INC @@ -0,0 +1,127 @@ +PEL_READ_REG EQU 03C7h ;Color register, read address +PEL_WRITE_REG EQU 03C8h ;Color register, write address +PEL_DATA_REG EQU 03C9h ;Color register, data port +SC_INDEX equ 03C4h ;Sequence Controller Index +CRTC_INDEX equ 03D4h ;CRT Controller Index +MISC_OUTPUT equ 03C2h ;Miscellaneous Output register +SCREEN_SEG equ 0a000h ;segment of display memory in mode X +INPUT_STATUS_1 equ 03DAh ;Input Status 1 register +ATC_INDEX equ 03C0h ;Attribute Controller +START_ADDRESS_HIGH equ 0Ch ;bitmap start address high byte +START_ADDRESS_LOW equ 0Dh ;bitmap start address low byte +GC_INDEX EQU 03CEh +BIT_MASK EQU 08h +MAP_MASK EQU 02h + +ALL_COPY_BITS EQU 00000h+BIT_MASK +ALL_DRAW_BITS EQU 0FF00h+BIT_MASK + +SQUARE_WIDTH EQU 16 +SQUARE_HEIGHT EQU 16 +SCREEN_WIDTH EQU 320 +SCREEN_HEIGHT EQU 240 +VIRTUAL_WIDTH EQU 352 +VIRTUAL_HEIGHT EQU 240 + +PAGE_0 EQU 0 +PAGE_1 EQU 05540h ;05470h ;5540h +PAGE_2 EQU 0AA80h ;0A8E0h ;AA80h + +SCROLL_SPEED EQU 1 ; Don't let it go above 8! +MAGIC_NUM EQU 100 + +CPU8086 EQU 0 +CPU80286 EQU 1 +CPU80386 EQU 2 +CPU80486 EQU 3 + +;====================================================================== +; Key Assignments +;====================================================================== +kESC EQU 2 +kONE EQU 4 +kTWO EQU 6 +kTHREE EQU 8 +kFOUR EQU 10 +kFIVE EQU 12 +kSIX EQU 14 +kSEVEN EQU 16 +kEIGHT EQU 18 +kNINE EQU 20 +kZERO EQU 22 +kMINUS EQU 24 +kEQUAL EQU 26 +kBACKSPACE EQU 28 +kTAB EQU 30 +kQ EQU 32 +kW EQU 34 +kE EQU 36 +kR EQU 38 +kT EQU 40 +kY EQU 42 +kU EQU 44 +kI EQU 46 +kO EQU 48 +kP EQU 50 +kL_BRACE EQU 52 +kR_BRACE EQU 54 +kENTER EQU 56 +kCTRL EQU 58 +kA EQU 60 +kS EQU 62 +kD EQU 64 +kF EQU 66 +kG EQU 68 +kH EQU 70 +kJ EQU 72 +kK EQU 74 +kL EQU 76 +kSEMICOLON EQU 78 +kQUOTE EQU 80 +kBACKQUOTE EQU 82 +kL_SHIFT EQU 84 +kBACKSLASH EQU 86 +kZ EQU 88 +kX EQU 90 +kC EQU 92 +kV EQU 94 +kB EQU 96 +kN EQU 98 +kM EQU 100 +kCOMMA EQU 102 +kPERIOD EQU 104 +kSLASH EQU 106 +kR_SHIFT EQU 108 +kGREY_STAR EQU 110 +kALT EQU 112 +kSPACE EQU 114 +kCAPSLOCK EQU 116 +kF1 EQU 118 +kF2 EQU 120 +kF3 EQU 122 +kF4 EQU 124 +kF5 EQU 126 +kF6 EQU 128 +kF7 EQU 130 +kF8 EQU 132 +kF9 EQU 134 +kF10 EQU 136 +kNUMLOCK EQU 138 +kSCRLLOCK EQU 140 +kHOME EQU 142 +kUP EQU 144 +kPAGE_UP EQU 146 +kGREY_MINUS EQU 148 +kLEFT EQU 150 +kPAD_FIVE EQU 152 +kRIGHT EQU 154 +kGREY_PLUS EQU 156 +kEND EQU 158 +kDOWN EQU 160 +kPAGE_DOWN EQU 162 +kINSERT EQU 164 +kDELETE EQU 166 + +kF11 EQU 174 +kF12 EQU 176 + \ No newline at end of file diff --git a/16/scrasm/DIAGONAL.MAP b/16/scrasm/DIAGONAL.MAP new file mode 100644 index 00000000..1fb5529e Binary files /dev/null and b/16/scrasm/DIAGONAL.MAP differ diff --git a/16/scrasm/DIAGONAL.PAL b/16/scrasm/DIAGONAL.PAL new file mode 100644 index 00000000..5dee1969 Binary files /dev/null and b/16/scrasm/DIAGONAL.PAL differ diff --git a/16/scrasm/DIAGONAL.TIL b/16/scrasm/DIAGONAL.TIL new file mode 100644 index 00000000..95eee9f2 Binary files /dev/null and b/16/scrasm/DIAGONAL.TIL differ diff --git a/16/scrasm/GENMAP.C b/16/scrasm/GENMAP.C new file mode 100644 index 00000000..01652048 --- /dev/null +++ b/16/scrasm/GENMAP.C @@ -0,0 +1,99 @@ +#include +#include +#include +#include + +#define WIDTH 255 + +#define MAPNAME "Diagonal" +#define FILENAME "%s.MAP" +char fn[100] = FILENAME; +typedef unsigned char BYTE; +typedef unsigned short int WORD; +typedef BYTE ROW[WIDTH]; + +ROW r; + +#define MAGIC_NUM 100 +#define SQUARE_WIDTH 16 +#define SQUARE_HEIGHT 16 +#define SCREEN_WIDTH 320 +#define SCREEN_HEIGHT 200 +#define VIRTUAL_WIDTH 352 +#define VIRTUAL_HEIGHT 240 +typedef struct MAPHEADER { + BYTE name[12]; /* 12 Includes [n]=0 and [n+1]=26 */ + WORD width; /* 2 */ + WORD height; /* 2 */ + WORD extent; /* 2 */ + WORD off_x1; /* 2 */ + WORD off_y1; /* 2 */ + WORD off_x2; /* 2 */ + WORD off_y2; /* 2 */ + WORD x_wrap; /* 2 */ + WORD y_wrap; /* 2 */ + WORD magic; /* 2 */ + } MAPHEADER, far *LPMAPHEADER; +MAPHEADER mh; + +void main(int argc, char *argv[]) + { + FILE *fp; + int i,j; + BYTE b; + int width = WIDTH; + int height = WIDTH; + + if (argc > 1) { + width = atoi(argv[1]); + if (width > WIDTH) + width = WIDTH; + printf("Width = %d\n",width); + height=width; + if (argc > 2) { + height = atoi(argv[2]); + if (height > WIDTH) + height = WIDTH; + printf("Height = %d\n",height); + } + } + + sprintf(fn,FILENAME,MAPNAME); + fp = fopen(fn,"wb"); + if (!fp) { + printf("Couldn't open %s for write.\n",fn); + exit(1); + } + + memset(&mh, 0xFF, sizeof(MAPHEADER)); /* Will reveal missing initializing */ + strcpy((char *)mh.name,MAPNAME); + mh.name[8]=0; + mh.name[9]=26; /* Ctrl-Z */ + mh.width = (WORD)width; + mh.height = (WORD)height; + mh.extent = (WORD)((WORD)width * (WORD)height); + mh.off_x1 = (WORD)0; + mh.off_y1 = (WORD)0; + mh.off_x2 = (WORD)(((VIRTUAL_WIDTH / SQUARE_WIDTH) - 1) % width); + mh.off_y2 = (WORD)((((VIRTUAL_HEIGHT / SQUARE_HEIGHT) - 1) % height) * width); + mh.x_wrap = (WORD)width; + mh.y_wrap = (WORD)height; + mh.magic = MAGIC_NUM; + fwrite(&mh, 1, sizeof(MAPHEADER), fp); + + for (i = 0; i +#include +#include +#include + +#define COLORS 256 +#define PALNAME "Diagonal" +#define FILENAME "%s.PAL" +char fn[100] = FILENAME; +typedef unsigned char BYTE; +typedef unsigned short int WORD; +typedef struct COLOR { + BYTE r,g,b; + } COLOR, far *LPCOLOR; + +void main() /* int argc, char *argv[]) */ + { + FILE *fp; + int i; + int colors = COLORS; + COLOR c; + int r,dr,g,dg,b,db; + + sprintf(fn,FILENAME,PALNAME); + fp = fopen(fn,"wb"); + if (!fp) { + printf("Couldn't open %s for write.\n",fn); + exit(1); + } + + r=0; dr=2; + g=0; dg=3; + b=0; db=5; + for (i = 0; i < colors; i++) { + c.r = (BYTE)r; r+=dr; + if (r > 63) { r = 63; dr = -dr; } + else if (r < 0) { r = 0; dr = -dr; } + c.g = (BYTE)g; g+=dg; + if (g > 63) { g = 63; dg = -dg; } + else if (g < 0) { g = 0; dg = -dg; } + c.b = (BYTE)b; b+=db; + if (b > 63) { b = 63; db = -db; } + else if (b < 0) { b = 0; db = -db; } + fwrite(&c, sizeof(c),1, fp); + printf("Palette %d\r",i); + } + fclose(fp); + printf("All done! \n"); + exit(0); + } + \ No newline at end of file diff --git a/16/scrasm/GENPAL.EXE b/16/scrasm/GENPAL.EXE new file mode 100644 index 00000000..c242a3d4 Binary files /dev/null and b/16/scrasm/GENPAL.EXE differ diff --git a/16/scrasm/GENPAL.LNK b/16/scrasm/GENPAL.LNK new file mode 100644 index 00000000..4b4ff212 --- /dev/null +++ b/16/scrasm/GENPAL.LNK @@ -0,0 +1 @@ +genpal.obj; diff --git a/16/scrasm/GENPAL.OBJ b/16/scrasm/GENPAL.OBJ new file mode 100644 index 00000000..ae94092e Binary files /dev/null and b/16/scrasm/GENPAL.OBJ differ diff --git a/16/scrasm/GENSQ.C b/16/scrasm/GENSQ.C new file mode 100644 index 00000000..402853f3 --- /dev/null +++ b/16/scrasm/GENSQ.C @@ -0,0 +1,102 @@ +#include +#include +#include +#include + +#define WIDTH 256 + +#define FILENAME "DIAGONAL.TIL" +char fn[100] = FILENAME; +typedef unsigned char BYTE; +typedef BYTE ROW[16]; +typedef ROW BITMAP[16]; + +BITMAP b; +BITMAP c; + +BITMAP pattern={{1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0}, + {1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0}, + {1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0}, + {1,0,0,1,0,0,0,0,0,0,0,2,0,0,0,0}, + {0,0,1,0,0,1,1,0,1,1,0,0,2,0,0,0}, + {0,0,1,0,0,1,2,0,1,2,0,0,2,0,0,0}, + {0,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0}, + {0,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0}, + {0,1,0,0,1,2,0,0,0,1,2,0,0,2,0,0}, + {0,0,1,0,1,2,1,1,1,1,2,0,2,0,0,0}, + {0,0,1,0,0,2,2,2,2,2,0,0,2,0,0,2}, + {0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,2}, + {0,0,0,0,2,2,0,0,0,2,2,0,0,0,0,2}, + {0,0,0,0,0,0,2,2,2,0,0,0,0,0,2,2}, + {0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2}}; + +void copy_pattern(BITMAP b,BITMAP patt, BYTE l, BYTE m, BYTE h) + { + int x,y; + + for (y=0; y<16; y++) { + for (x=0; x<16; x++) { + switch (patt[y][x]) { + case 0: + b[y][x] = m; + break; + case 1: + b[y][x] = l; + break; + case 2: + b[y][x] = h; + break; + } + } + } + } + +/* Transforms linear to planar */ +void transform(BITMAP b,BITMAP c) + { + int x,y,p; + BYTE *pb = (BYTE *)c; + + for (p=0; p<4; p++) { + for (y=0; y<16; y++) { + for (x=0; x<16; x+=4) { + *(pb++) = b[y][x+p]; + } + } + } + } + +void main(int argc,char *argv[]) + { + FILE *fp; + int i; + int width = WIDTH; + + fp = fopen(fn,"wb"); + if (!fp) { + printf("Couldn't open %s for write.\n",fn); + exit(1); + } + if (argc > 1) { + width = atoi(argv[1]); + if (width > WIDTH) + width = WIDTH; + printf("Width = %d\n",width); + } + + for (i = 0; i< width; i++) { + BYTE less,more; + + less = (BYTE)((i + width - 1) % width); + more = (BYTE)((i + 1) % width); + copy_pattern(b, pattern, less, (BYTE)i, more); + transform(b,c); + fwrite(c, 16,16, fp); + printf("Square %d\r",i); + } + fclose(fp); + printf("All done! \n"); + exit(0); + } + \ No newline at end of file diff --git a/16/scrasm/GENSQ.EXE b/16/scrasm/GENSQ.EXE new file mode 100644 index 00000000..71cf2657 Binary files /dev/null and b/16/scrasm/GENSQ.EXE differ diff --git a/16/scrasm/GENSQ.LNK b/16/scrasm/GENSQ.LNK new file mode 100644 index 00000000..a9d9b795 --- /dev/null +++ b/16/scrasm/GENSQ.LNK @@ -0,0 +1 @@ +gensq.obj; diff --git a/16/scrasm/GENSQ.OBJ b/16/scrasm/GENSQ.OBJ new file mode 100644 index 00000000..7166ee65 Binary files /dev/null and b/16/scrasm/GENSQ.OBJ differ diff --git a/16/scrasm/INIT.INC b/16/scrasm/INIT.INC new file mode 100644 index 00000000..eddeeed4 --- /dev/null +++ b/16/scrasm/INIT.INC @@ -0,0 +1,375 @@ +;; Error messages +ERR_OK EQU 0 +msgErr0 db 'Later!',13,10,'$' +ERR_MEM EQU 1 +msgErr1 db 'Error 001: Out of memory?',13,10,'$' +ERR_CPU EQU 2 +msgErr2 db 'Error 002: CPU must be at least an 80386.',13,10,'$' +ERR_FILE EQU 3 +msgErr3 db 'Error 003: File error.',13,10,'$' +ERR_FILENOTFOUND EQU 4 +msgErr4 db 'Error 004: File not found.',13,10,'$' +msgtblError dw offset msgErr0, offset msgErr1, offset msgErr2, + offset msgErr3, offset msgErr4 +nError db 0 + +;; CPU name strings +CPUName86 DB "8088/8086$" +CPUName286 DB "80286DX/SX$" +CPUName386 DB "80386DX/SX$" +CPUName486 DB "80486DX/SX or better$" +CPUNameTable DW CPUName86,CPUName286,CPUName386,CPUName486 + +EVEN +msgCPUTypeIs DB "Your CPU type: $" +EVEN +msgCPUTypeIsEnd DB 13,10,'$' +nCPU DB 0 + +EVEN +msgPages DB 'Pages displayed: ' +strNumPages DB 6 dup (?),13,10,'$' + +EVEN +bufText DW 80*50 DUP (?) ; Needs this much to hold + ; a 50-line screen... +wCPos DW 0 +nDisplay DB 0 + +EVEN +fnMap1 db 'DIAGONAL.MAP',0 +fnTiles1 db 'DIAGONAL.TIL',0 +fnPalette db 'DIAGONAL.PAL',0 ; only one allowed, for now +fnMap2 db 'SCROLL.MAP',0 +fnTiles2 db 'SCROLL.TIL',0 + +fntblMap dw offset fnMap1,offset fnMap2 +fntblTiles dw offset fnTiles1,offset fnTiles2 +nMap dw 0 + +;; CPUType routine snatched from Ray Duncan's _Power Programming MASM_ +;; chapter 14. Reformatted to my style, but I left the code alone +;; except that it used to push a bunch of stuff, but doesn't any more +;; because I don't care what gets destroyed. +CPUType PROC near + pushf ; now try to clear bits 12-15 + pop ax ; of CPU flags + and ax,0fffh + push ax ; set modified CPU flags + popf + pushf + pop ax ; get flags again + and ax,0f000h ; if bits 12-15 are still + cmp ax,0f000h ; set, this is 8086/88 + jne cpu1 ; jump, not 8086/88 + mov nCPU,CPU8086 ; set nCPU = 86/88 CPU type + jmp cpux ; and exit + +cpu1: or ax,0f000h ; must be 286 or later, + push ax ; now try to set bits 12-15 + popf ; of CPU flags + pushf + pop ax ; if bits 12-15 can't be + and ax,0f000h ; set, this is a 286 + jnz cpu2 ; jump, not 80286 + mov nCPU,CPU80286 ; set nCPU = 286 CPU type + jmp cpux ; and exit + +cpu2: mov bx,sp ; 386 or later, save SP + and sp,not 3 ; avoid stack alignment fault + pushfd ; get value of EFLAGS + pop eax + mov ecx,eax ; save copy of EFLAGS + xor eax,40000h ; flip AC bit in EFLAGS + push eax ; try and force EFLAGS + popfd + pushfd ; get back EFLAGS value + pop eax + mov sp,bx ; restore old stack pointer + xor eax,ecx ; can AC bit be changed? + jnz cpu3 ; no, jump, not a 386 + mov nCPU,CPU80386 ; set nCPU = 386 CPU type + jmp cpux ; and exit + +cpu3: mov nCPU,CPU80486 ; set nCPU = 486 CPU type + +cpux: mov bl,nCPU + xor bh,bh + shl bx,1 + DOSPRINT + DOSPRINT CPUNameTable[bx] + DOSPRINT + ret ; return with nCPU = CPU type +CPUType ENDP + +;; Initialize: So far, all it does is make sure you have a 386 + +;; (because that's what I assembled the code for). +Initialize PROC near + ; Set DS = CS in this program, since data is local + mov ax,cs + mov segCode,ax ; Store the Code Segment + mov bx,ds + mov segPSP,bx ; Store the PSP Segment + mov ds,ax ; Set DS = CS + + ; Resize code to 64K + CODE_SIZE EQU 64 ; <- this is arbitrary. + ; ES already -> allocated segment + mov ah,4ah + mov bx,64*CODE_SIZE + int 21h + mov nError,ERR_MEM + jc TerminateError + +;; I've chosen not to implement sprites yet so that I can get this out +;; the door... +;; ; 320x200 buffer for sprite drawing. To draw sprites, first draw them +;; ; into this buffer, adding rectangles to the current rectangle list. +;; ; Then, use BUFFER_COPY to put out the buffers with the current +;; ; rectangle list to the screen. BUFFER_COPY will ensure minimal VGA +;; ; writing. +;; ; Create a buffer segment +;; mov bx,(320 * 200) / 16 +;; mov ah,48h +;; int 21h +;; mov nError,ERR_MEM +;; jc TerminateError +;; mov segBuffer,ax + + call CPUType + mov nError,ERR_CPU + cmp nCPU,2 + jl TerminateError + + mov ds,segCode + mov dx,offset fnPalette + call LoadPaletteFile + jc TerminateError + + call LoadIndex + jc TerminateError + + KEYB_START + + call Beginning ; Can display an entry screen here + + ; This is linked in from Michael Abrash's zen timer code. + ; (But I wrote the Click myself) + call Click + call ZTimerOn + + call MainLoop + + call ZTimerOff + call Click + + call Ending ; Can display an exit screen here + + KEYB_END + +Terminate: mov nError,ERR_OK +TerminateError: + mov ax,cs ;DOS functions require that DS point + mov ds,ax ; to text to be displayed on the screen + mov bh,0 + mov bl,nError + shl bx,1 + DOSPRINT msgtblError[bx] + + mov ax,pages + mov ds,segCode + mov si,offset strNumPages + call Int2Ascii + DOSPRINT + + call ZTimerReport + + mov al,nError + mov ah,4ch ; DOS Terminate + int 21h + ; Don't need to RET! We're outta here +Initialize ENDP + +;; Clicks the internal speaker. I use this to indicate that page timing +;; has started. +Click PROC + in al,61h + mov ah,al + or al,3 + out 61h,al + + mov cx,5000 ; (this is an arbitrary delay!) +spkr_on: loop spkr_on + mov al,ah + out 61h,al + ret +Click ENDP + +;; Copied from an old 8088 "Learn Assembly" book and changed a bit +Int2Ascii PROC + mov cx,6 + mov byte ptr cs:[si],' ' + mov byte ptr cs:[si+1],'0' + mov byte ptr cs:[si+2],'0' + mov byte ptr cs:[si+3],'0' + mov byte ptr cs:[si+4],'0' + mov byte ptr cs:[si+5],'0' + add si,6 + mov cx,10 + or ax,ax + jns clear_divide + neg ax + mov byte ptr cs:[si-6],'-' +clear_divide: mov dx,0 + div cx + add dx,'0' + dec si + mov cs:[si],dl + or ax,ax + jnz clear_divide + ret +Int2Ascii ENDP + +;; Given a filename at DS:DX, reads the file into memory and returns +;; a pointer to it as DX:0000. +;; Note that this routine obviously will only work correctly for +;; a file < 640k in size, but you can bring in files bigger than 64k. +;; This code comes from Future Crew's STMIK sampler "Mental Surgery" +;; and I commented it up to make it fit in with my stuff a little better. +;; Thank you, FC, for releasing that code! Several of the routines +;; in this program were inspired or helped along by having that source... +;; Most recently, added in error codes. +EVEN +LoadFile PROC NEAR + ;set: DX=offset to filename + ;return: DX=segment of file + + ; Open the datafile at DS:DX. + mov ax,3D00h ; 3D,00 -> Open file, read only + ; DS:DX already points at filename + int 21h ; returns AX=file handle + mov cl,ERR_FILENOTFOUND + jc ferror + mov bx,ax ; Store file handle in BX + mov si,bx ; and also in a variable + + ; Get the length of the file so we know how much to allocate + mov ax,4202h ; 42,02 -> Seek, signed from end + mov cx,0 ; CX:DX is a long file offset, + ; BX is already set as file handle + mov dx,0 ; zero in this case = end of file + int 21h ; (returns long offset in DX:AX) + mov cl,ERR_FILE + jc ferror + +;;; shr dx,1 ; This is original FC code, +;;; rcr ax,1 ; which I removed because the +;;; shr dx,1 ; 386 has a nice instruction +;;; rcr ax,1 ; to do this all! +;;; shr dx,1 ; But 286 users will want to +;;; rcr ax,1 ; return to this code, instead +;;; shr dx,1 ; of SHRD dx,ax,4 +;;; rcr ax,1 ; + + ; Now turn that long DX:AX into a number of paragraphs to allocate + ; for when we read the file. + shrd ax,dx,4 ; Divides long DX:AX by 4, + mov bx,ax ; and stores this in BX + inc bx ; HHMMMM? One more needed for small #'s + mov ah,48h ; 48 -> Allocate memory + ; BX already = # of paragraphs + int 21h + mov cl,ERR_MEM + jc ferror + mov di,ax ; store this in a variable + + ; Seek the file back to the beginning in order to read it into + ; the memory we just allocated. + mov ax,4200h ; 42,00 -> Seek, absolute offset + mov bx,si ; BX is the file handle. + mov cx,0 ; CX:DX is a long offset + mov dx,0 + int 21h + jc ferror + + ; Now read the file into memory + mov ds,di ; DS points at alloc'd memory +ReadBlock: mov ah,3fh ; 3F -> Read file + mov cx,32768 ; read 32768 bytes at a time + mov dx,0 ; DS:DX points at beginning of + int 21h ; this block of memory. + mov cl,ERR_FILE + jc ferror + mov dx,ds ; Offset DS by (32768/16), which + add dx,800h ; is the number of paragraphs in + mov ds,dx ; each block of 32768 bytes. + cmp ax,32768 ; Did we actually read 32768 bytes? + je ReadBlock ; If so, there's more to read... + ; Otherwise, we've read all the + ; data in the file. + + ; So now, close the file handle. + mov ah,3Eh ; 3E -> Close file + ; BX still is the file handle + int 21h + + ; Everything went ok. Return the segment in DX. + mov dx,di + mov nError,ERR_OK + ret +ferror: mov nError,cl + ret +LoadFile ENDP + +;; Eventually, this should load in an index of all data files to +;; allow for filenames to be specified outside of the program. The +;; goal is to make the program have no hardcoded filenames... +;; Of course, the structure of this index and its entries will be +;; hardcoded, as will the structures of all of the files it includes. +LoadIndex PROC near + ret +LoadIndex ENDP + +;; Save the current video mode and cursor position with standard +;; BIOS calls +SaveVideo PROC near + mov ah,0Fh + int 10h ; Get current display Mode + mov nDisplay,al + mov ah,03h + mov bh,0 + int 10h + mov wCPos,dx + + mov ds,segText + mov si,0 + mov es,segCode + mov di,offset bufText + mov cx,80*50 + rep movsw + ret +SaveVideo ENDP + +;; Restore the current video mode and cursor position with standard +;; BIOS calls +RestoreVideo PROC near + mov ah,00h + mov al,nDisplay + int 10h ; Get current display Mode + mov ah,02h + mov bh,0 + mov dx,wCPos + int 10h + + PAL_UPDATE ; When flipping into text mode, re-do the + ; palette because the BIOS changes it. + + mov es,segText + mov di,0 + mov ds,segCode + mov si,offset bufText + mov cx,80*50 + rep movsw + ret +RestoreVideo ENDP + \ No newline at end of file diff --git a/16/scrasm/KEYB.INC b/16/scrasm/KEYB.INC new file mode 100644 index 00000000..ef730cf0 --- /dev/null +++ b/16/scrasm/KEYB.INC @@ -0,0 +1,237 @@ +;; ==================================================================== +;; Macros +;; ==================================================================== +;; Jump if key pressed +JKEYP MACRO key,label + cmp byte ptr cs:_keyFlags[key+1],1 + je label + ENDM +;; Jump if key not pressed +JKEYNP MACRO key,label + cmp byte ptr cs:_keyFlags[key+1],1 + jne label + ENDM + +;; Note that JNKEY and JKEY both modify _flKeyChanged, so you cannot +;; use one after the other! In other words, +;; JKEYNP no_key +;; JKEYP yes_key ;<-- this will fail +;; will not work like you'd think it would. The second call (JKEYP) +;; will not know that a key has been pressed! +;; Jump if no key pressed: +JNKEY MACRO label + cmp cs:_flKeyChanged,0 + je label + mov cs:_flKeyChanged,0 ; <--- important! + ENDM +;; Jump if key pressed: +JKEY MACRO label + cmp cs:_flKeyChanged,0 + mov cs:_flKeyChanged,0 + jne label + ENDM + +;; Start keyboard interrupts +KEYB_START MACRO + call SwapInt9 + mov cs:_flKeyChanged,0 + ENDM + +;; Clear keyboard interrupts +KEYB_END MACRO + call SwapInt9 + ENDM + +;; Credit for these routines: Steve Dollins, Brown Computer Group. +;; I didn't write any of the code below -- just heisted it from some +;; stuff that he wrote and released! Very useful keyboard routines. +;; Any comments prefixed SDE were added by me. +_keyFlags dw 256 dup (0) ; SDE: since they only use 2 bits + ; per word, this is a tradeoff, + ; space for time + +oldint9_offset dw offset newint9 +oldint9_segment dw seg newint9 + +_flKeyChanged dw 0 + +;----------------------------------------------------------------------- +; void SwapInt9( void ) +; +; SwapInt9() exchanges the vector in oldint9_segment:oldint9_offset +; with the vector in the interrupt table for INT 9h. +;----------------------------------------------------------------------- + +SwapInt9 PROC far + mov ax,cs + mov ds,ax + + mov ax,03509h ; Get interrupt 09h + int 21h ; return in ES:BX + + mov ax,oldint9_segment + mov dx,oldint9_offset + push ds + mov ds,ax + mov ax,02509h ; Set new interrupt + int 21h ; to address in DS:DX + pop ds + + mov oldint9_segment,es ; Save the old interrupt + mov oldint9_offset,bx + ret +SwapInt9 ENDP + + +;----------------------------------------------------------------------- +; newint9 is the new keyboard interrupt (INT 9h). +; +; Reads the scan code from the keyboard and modifies the key +; flags table. The high byte is set to the position of the key, +; pressed=1, release=0. The low byte is set to 1 when the key +; is pressed and left unmodified when the key is released. +;----------------------------------------------------------------------- +newint9 PROC far + push ax + push bx + push ds + + mov ax,cs + mov ds,ax + + JKEYNP kCTRL,not_ctrlaltdel ; SDE code + JKEYNP kALT,not_ctrlaltdel ; To still allow ctrl- + JKEYNP kDELETE,not_ctrlaltdel ; alt-delete. Nothing + jmp ctrlaltdel ; worse than a total lockup! +not_ctrlaltdel: + + in ax,60h ; get scan code in AL, control byte in AH + mov bx,ax ; save a copy in BX + xchg ah,al ; swap to get control byte in AL + or al,80h ; clear keyboard + out 61h,al ; of interrupt + and al,7Fh + out 61h,al + mov al,20h ; send generic EOI to + out 20h,al ; PIC + + and bx,0007fh ; strip all but the scan code + shl bx,1 ; multiply by two to get our offset + + ; if the key was released, the high bit is set in the scan code + bt ax,15 ; move this high bit into the carry flag + setnc byte ptr [_keyFlags+bx+1] ; set "Is being pressed" flag + jc short int09done ; if the key was released, we're done + mov byte ptr [_keyFlags+bx],1 ; set "Has been pressed" flag + mov _flKeyChanged,1 ; state of keyboard has changed +int09done: + mov _flKeyChanged,1 ; state of keyboard has changed + pop ds + pop bx + pop ax + iret +ctrlaltdel: int 19h ; SDE -- added this. + ; Allows a reboot. +newint9 ENDP + +;; Defines the current key procedure (used as a jump-through) +kprocCur dw KprocDirect + +;; This is a keyboard procedure. Normally, this would control some +;; sprite, or something, and the screen would follow the sprite. For +;; the purposes of this code, though (namely, sprite-less scrolling) +;; it just directly affects ScrollDX and ScrollDY. +;; This keyproc is inertialess, use + and - to increase speed and +;; the up/down/left/right keys to move directions. +;; Pressing K will switch to the other keyprocedure on the fly. +;; P pauses the screen -- note that this is just for completely +;; freezing the screen... it doesn't return until you let go! + +EVEN +scroll_speed_x dw SCROLL_SPEED ; (defaults) +scroll_speed_y dw SCROLL_SPEED * VIRTUAL_WIDTH ; (defaults) +KprocDirect PROC near +chk_leftright: mov ax,0 + JKEYNP kRIGHT,not_right + mov ax,scroll_speed_x + mov ScrollDX,ax + jmp chk_updown +not_right: JKEYNP kLEFT,not_left + sub ax,scroll_speed_x + mov ScrollDX,ax + jmp chk_updown +not_left: mov ScrollDX,ax + +chk_updown: mov ax,0 + JKEYNP kUP,not_up + sub ax,scroll_speed_y + mov ScrollDY,ax + jmp chk_other +not_up: JKEYNP kDOWN,not_down + mov ax,scroll_speed_y + mov ScrollDY,ax + jmp chk_other +not_down: mov ScrollDY,ax + +chk_other: JKEYNP kK,not_k + mov kprocCur,KprocInertia +not_k: JKEYNP kM,not_m + mov bDoTransition,1 +not_m: JKEYNP kGREY_MINUS,not_minus + cmp scroll_speed_x,1 + jle not_minus + dec scroll_speed_x + sub scroll_speed_y,VIRTUAL_WIDTH +not_minus: JKEYNP kGREY_PLUS,not_plus + cmp scroll_speed_x,16 + jge not_plus + inc scroll_speed_x + add scroll_speed_y,VIRTUAL_WIDTH +not_plus: + +pause_key: JKEYP kP,pause_key + + ret +KprocDirect ENDP + +;; This keyproc has inertia, so + and - don't work. +;; Use up/down/left/right keys to increase speed in those directions. +;; Pressing K will switch to the other keyprocedure on the fly. +;; P pauses the screen -- note that this is just for completely +;; freezing the screen... it doesn't return until you let go! +KprocInertia PROC near +chk2_leftright: JKEYNP kRIGHT,not2_right + cmp ScrollDX,16 + je not2_right + inc ScrollDX + jmp chk2_updown +not2_right: JKEYNP kLEFT,not2_left + cmp ScrollDX,-16 + je not2_left + dec ScrollDX + jmp chk2_updown +not2_left: + +chk2_updown: JKEYNP kUP,not2_up + cmp ScrollDY,-VIRTUAL_WIDTH * 16 + je not2_up + add ScrollDY,-VIRTUAL_WIDTH + jmp chk2_other +not2_up: JKEYNP kDOWN,not2_down + cmp ScrollDY,VIRTUAL_WIDTH * 16 + je not2_down + add ScrollDY,VIRTUAL_WIDTH + jmp chk2_other +not2_down: + +chk2_other: JKEYNP kK,not2_k + mov kprocCur,KprocDirect +not2_k: JKEYNP kM,not2_m + mov bDoTransition,1 +not2_m: + +pause2_key: JKEYP kP,pause2_key + + ret +KprocInertia ENDP + \ No newline at end of file diff --git a/16/scrasm/LZTIMER.ASM b/16/scrasm/LZTIMER.ASM new file mode 100644 index 00000000..5fed7be1 --- /dev/null +++ b/16/scrasm/LZTIMER.ASM @@ -0,0 +1,636 @@ +; +; *** Listing 2-5 *** +; +; The long-period Zen timer. (LZTIMER.ASM) +; Uses the 8253 timer and the BIOS time-of-day count to time the +; performance of code that takes less than an hour to execute. +; Because interrupts are left on (in order to allow the timer +; interrupt to be recognized), this is less accurate than the +; precision Zen timer, so it is best used only to time code that takes +; more than about 54 milliseconds to execute (code that the precision +; Zen timer reports overflow on). Resolution is limited by the +; occurrence of timer interrupts. +; +; By Michael Abrash 4/26/89 +; +; Externally callable routines: +; +; ZTimerOn: Saves the BIOS time of day count and starts the +; long-period Zen timer. +; +; ZTimerOff: Stops the long-period Zen timer and saves the timer +; count and the BIOS time-of-day count. +; +; ZTimerReport: Prints the time that passed between starting and +; stopping the timer. +; +; Note: If either more than an hour passes or midnight falls between +; calls to ZTimerOn and ZTimerOff, an error is reported. For +; timing code that takes more than a few minutes to execute, +; either the DOS TIME command in a batch file before and after +; execution of the code to time or the use of the DOS +; time-of-day function in place of the long-period Zen timer is +; more than adequate. +; +; Note: The PS/2 version is assembled by setting the symbol PS2 to 1. +; PS2 must be set to 1 on PS/2 computers because the PS/2's +; timers are not compatible with an undocumented timer-stopping +; feature of the 8253; the alternative timing approach that +; must be used on PS/2 computers leaves a short window +; during which the timer 0 count and the BIOS timer count may +; not be synchronized. You should also set the PS2 symbol to +; 1 if you're getting erratic or obviously incorrect results. +; +; Note: When PS2 is 0, the code relies on an undocumented 8253 +; feature to get more reliable readings. It is possible that +; the 8253 (or whatever chip is emulating the 8253) may be put +; into an undefined or incorrect state when this feature is +; used. +; +; *************************************************************** +; * If your computer displays any hint of erratic behavior * +; * after the long-period Zen timer is used, such as the floppy * +; * drive failing to operate properly, reboot the system, set * +; * PS2 to 1 and leave it that way! * +; *************************************************************** +; +; Note: Each block of code being timed should ideally be run several +; times, with at least two similar readings required to +; establish a true measurement, in order to eliminate any +; variability caused by interrupts. +; +; Note: Interrupts must not be disabled for more than 54 ms at a +; stretch during the timing interval. Because interrupts +; are enabled, keys, mice, and other devices that generate +; interrupts should not be used during the timing interval. +; +; Note: Any extra code running off the timer interrupt (such as +; some memory-resident utilities) will increase the time +; measured by the Zen timer. +; +; Note: These routines can introduce inaccuracies of up to a few +; tenths of a second into the system clock count for each +; code section timed. Consequently, it's a good idea to +; reboot at the conclusion of timing sessions. (The +; battery-backed clock, if any, is not affected by the Zen +; timer.) +; +; All registers and all flags are preserved by all routines. +; + DOSSEG + .model small + .code + public ZTimerOn, ZTimerOff, ZTimerReport + +; +; Set PS2 to 0 to assemble for use on a fully 8253-compatible +; system; when PS2 is 0, the readings are more reliable if the +; computer supports the undocumented timer-stopping feature, +; but may be badly off if that feature is not supported. In +; fact, timer-stopping may interfere with your computer's +; overall operation by putting the 8253 into an undefined or +; incorrect state. Use with caution!!! +; +; Set PS2 to 1 to assemble for use on non-8253-compatible +; systems, including PS/2 computers; when PS2 is 1, readings +; may occasionally be off by 54 ms, but the code will work +; properly on all systems. +; +; A setting of 1 is safer and will work on more systems, +; while a setting of 0 produces more reliable results in systems +; which support the undocumented timer-stopping feature of the +; 8253. The choice is yours. +; +PS2 equ 1 +; +; Base address of the 8253 timer chip. +; +BASE_8253 equ 40h +; +; The address of the timer 0 count registers in the 8253. +; +TIMER_0_8253 equ BASE_8253 + 0 +; +; The address of the mode register in the 8253. +; +MODE_8253 equ BASE_8253 + 3 +; +; The address of the BIOS timer count variable in the BIOS +; data segment. +; +TIMER_COUNT equ 46ch +; +; Macro to emulate a POPF instruction in order to fix the bug in some +; 80286 chips which allows interrupts to occur during a POPF even when +; interrupts remain disabled. +; +MPOPF macro + local p1, p2 + jmp short p2 +p1: iret ;jump to pushed address & pop flags +p2: push cs ;construct far return address to + call p1 ; the next instruction + endm + +; +; Macro to delay briefly to ensure that enough time has elapsed +; between successive I/O accesses so that the device being accessed +; can respond to both accesses even on a very fast PC. +; +DELAY macro + jmp $+2 + jmp $+2 + jmp $+2 + endm + +StartBIOSCountLow dw ? ;BIOS count low word at the + ; start of the timing period +StartBIOSCountHigh dw ? ;BIOS count high word at the + ; start of the timing period +EndBIOSCountLow dw ? ;BIOS count low word at the + ; end of the timing period +EndBIOSCountHigh dw ? ;BIOS count high word at the + ; end of the timing period +EndTimedCount dw ? ;timer 0 count at the end of + ; the timing period +ReferenceCount dw ? ;number of counts required to + ; execute timer overhead code +; +; String printed to report results. +; +OutputStr label byte + db 0dh, 0ah, 'Timed count: ' +TimedCountStr db 10 dup (?) + db ' microseconds', 0dh, 0ah + db '$' +; +; Temporary storage for timed count as it's divided down by powers +; of ten when converting from doubleword binary to ASCII. +; +CurrentCountLow dw ? +CurrentCountHigh dw ? +; +; Powers of ten table used to perform division by 10 when doing +; doubleword conversion from binary to ASCII. +; +PowersOfTen label word + dd 1 + dd 10 + dd 100 + dd 1000 + dd 10000 + dd 100000 + dd 1000000 + dd 10000000 + dd 100000000 + dd 1000000000 +PowersOfTenEnd label word +; +; String printed to report that the high word of the BIOS count +; changed while timing (an hour elapsed or midnight was crossed), +; and so the count is invalid and the test needs to be rerun. +; +TurnOverStr label byte + db 0dh, 0ah + db '****************************************************' + db 0dh, 0ah + db '* Either midnight passed or an hour or more passed *' + db 0dh, 0ah + db '* while timing was in progress. If the former was *' + db 0dh, 0ah + db '* the case, please rerun the test; if the latter *' + db 0dh, 0ah + db '* was the case, the test code takes too long to *' + db 0dh, 0ah + db '* run to be timed by the long-period Zen timer. *' + db 0dh, 0ah + db '* Suggestions: use the DOS TIME command, the DOS *' + db 0dh, 0ah + db '* time function, or a watch. *' + db 0dh, 0ah + db '****************************************************' + db 0dh, 0ah + db '$' + +;******************************************************************** +;* Routine called to start timing. * +;******************************************************************** + +ZTimerOn proc near + +; +; Save the context of the program being timed. +; + push ax + pushf +; +; Set timer 0 of the 8253 to mode 2 (divide-by-N), to cause +; linear counting rather than count-by-two counting. Also stops +; timer 0 until the timer count is loaded, except on PS/2 +; computers. +; + mov al,00110100b ;mode 2 + out MODE_8253,al +; +; Set the timer count to 0, so we know we won't get another +; timer interrupt right away. +; Note: this introduces an inaccuracy of up to 54 ms in the system +; clock count each time it is executed. +; + DELAY + sub al,al + out TIMER_0_8253,al ;lsb + DELAY + out TIMER_0_8253,al ;msb +; +; In case interrupts are disabled, enable interrupts briefly to allow +; the interrupt generated when switching from mode 3 to mode 2 to be +; recognized. Interrupts must be enabled for at least 210 ns to allow +; time for that interrupt to occur. Here, 10 jumps are used for the +; delay to ensure that the delay time will be more than long enough +; even on a very fast PC. +; + pushf + sti + rept 10 + jmp $+2 + endm + MPOPF +; +; Store the timing start BIOS count. +; (Since the timer count was just set to 0, the BIOS count will +; stay the same for the next 54 ms, so we don't need to disable +; interrupts in order to avoid getting a half-changed count.) +; + push ds + sub ax,ax + mov ds,ax + mov ax,ds:[TIMER_COUNT+2] + mov cs:[StartBIOSCountHigh],ax + mov ax,ds:[TIMER_COUNT] + mov cs:[StartBIOSCountLow],ax + pop ds +; +; Set the timer count to 0 again to start the timing interval. +; + mov al,00110100b ;set up to load initial + out MODE_8253,al ; timer count + DELAY + sub al,al + out TIMER_0_8253,al ;load count lsb + DELAY + out TIMER_0_8253,al ;load count msb +; +; Restore the context of the program being timed and return to it. +; + MPOPF + pop ax + ret + +ZTimerOn endp + +;******************************************************************** +;* Routine called to stop timing and get count. * +;******************************************************************** + +ZTimerOff proc near + +; +; Save the context of the program being timed. +; + pushf + push ax + push cx +; +; In case interrupts are disabled, enable interrupts briefly to allow +; any pending timer interrupt to be handled. Interrupts must be +; enabled for at least 210 ns to allow time for that interrupt to +; occur. Here, 10 jumps are used for the delay to ensure that the +; delay time will be more than long enough even on a very fast PC. +; + sti + rept 10 + jmp $+2 + endm + +; +; Latch the timer count. +; + +if PS2 + + mov al,00000000b + out MODE_8253,al ;latch timer 0 count +; +; This is where a one-instruction-long window exists on the PS/2. +; The timer count and the BIOS count can lose synchronization; +; since the timer keeps counting after it's latched, it can turn +; over right after it's latched and cause the BIOS count to turn +; over before interrupts are disabled, leaving us with the timer +; count from before the timer turned over coupled with the BIOS +; count from after the timer turned over. The result is a count +; that's 54 ms too long. +; + +else + +; +; Set timer 0 to mode 2 (divide-by-N), waiting for a 2-byte count +; load, which stops timer 0 until the count is loaded. (Only works +; on fully 8253-compatible chips.) +; + mov al,00110100b ;mode 2 + out MODE_8253,al + DELAY + mov al,00000000b ;latch timer 0 count + out MODE_8253,al + +endif + + cli ;stop the BIOS count +; +; Read the BIOS count. (Since interrupts are disabled, the BIOS +; count won't change.) +; + push ds + sub ax,ax + mov ds,ax + mov ax,ds:[TIMER_COUNT+2] + mov cs:[EndBIOSCountHigh],ax + mov ax,ds:[TIMER_COUNT] + mov cs:[EndBIOSCountLow],ax + pop ds +; +; Read the timer count and save it. +; + in al,TIMER_0_8253 ;lsb + DELAY + mov ah,al + in al,TIMER_0_8253 ;msb + xchg ah,al + neg ax ;convert from countdown + ; remaining to elapsed + ; count + mov cs:[EndTimedCount],ax +; +; Restart timer 0, which is still waiting for an initial count +; to be loaded. +; + +ife PS2 + + DELAY + mov al,00110100b ;mode 2, waiting to load a + ; 2-byte count + out MODE_8253,al + DELAY + sub al,al + out TIMER_0_8253,al ;lsb + DELAY + mov al,ah + out TIMER_0_8253,al ;msb + DELAY + +endif + + sti ;let the BIOS count continue +; +; Time a zero-length code fragment, to get a reference for how +; much overhead this routine has. Time it 16 times and average it, +; for accuracy, rounding the result. +; + mov cs:[ReferenceCount],0 + mov cx,16 + cli ;interrupts off to allow a + ; precise reference count +RefLoop: + call ReferenceZTimerOn + call ReferenceZTimerOff + loop RefLoop + sti + add cs:[ReferenceCount],8 ;total + (0.5 * 16) + mov cl,4 + shr cs:[ReferenceCount],cl ;(total) / 16 + 0.5 +; +; Restore the context of the program being timed and return to it. +; + pop cx + pop ax + MPOPF + ret + +ZTimerOff endp + +; +; Called by ZTimerOff to start the timer for overhead measurements. +; + +ReferenceZTimerOn proc near +; +; Save the context of the program being timed. +; + push ax + pushf +; +; Set timer 0 of the 8253 to mode 2 (divide-by-N), to cause +; linear counting rather than count-by-two counting. +; + mov al,00110100b ;mode 2 + out MODE_8253,al +; +; Set the timer count to 0. +; + DELAY + sub al,al + out TIMER_0_8253,al ;lsb + DELAY + out TIMER_0_8253,al ;msb +; +; Restore the context of the program being timed and return to it. +; + MPOPF + pop ax + ret + +ReferenceZTimerOn endp + +; +; Called by ZTimerOff to stop the timer and add the result to +; ReferenceCount for overhead measurements. Doesn't need to look +; at the BIOS count because timing a zero-length code fragment +; isn't going to take anywhere near 54 ms. +; + +ReferenceZTimerOff proc near +; +; Save the context of the program being timed. +; + pushf + push ax + push cx + +; +; Match the interrupt-window delay in ZTimerOff. +; + sti + rept 10 + jmp $+2 + endm + + mov al,00000000b + out MODE_8253,al ;latch timer +; +; Read the count and save it. +; + DELAY + in al,TIMER_0_8253 ;lsb + DELAY + mov ah,al + in al,TIMER_0_8253 ;msb + xchg ah,al + neg ax ;convert from countdown + ; remaining to elapsed + ; count + add cs:[ReferenceCount],ax +; +; Restore the context and return. +; + pop cx + pop ax + MPOPF + ret + +ReferenceZTimerOff endp + +;******************************************************************** +;* Routine called to report timing results. * +;******************************************************************** + +ZTimerReport proc near + + pushf + push ax + push bx + push cx + push dx + push si + push di + push ds +; + push cs ;DOS functions require that DS point + pop ds ; to text to be displayed on the screen + assume ds:_TEXT +; +; See if midnight or more than an hour passed during timing. If so, +; notify the user. +; + mov ax,[StartBIOSCountHigh] + cmp ax,[EndBIOSCountHigh] + jz CalcBIOSTime ;hour count didn't change, + ; so everything's fine + inc ax + cmp ax,[EndBIOSCountHigh] + jnz TestTooLong ;midnight or two hour + ; boundaries passed, so the + ; results are no good + mov ax,[EndBIOSCountLow] + cmp ax,[StartBIOSCountLow] + jb CalcBIOSTime ;a single hour boundary + ; passed-that's OK, so long as + ; the total time wasn't more + ; than an hour + +; +; Over an hour elapsed or midnight passed during timing, which +; renders the results invalid. Notify the user. This misses the +; case where a multiple of 24 hours has passed, but we'll rely +; on the perspicacity of the user to detect that case. +; +TestTooLong: + mov ah,9 + mov dx,offset TurnOverStr + int 21h + jmp short ZTimerReportDone +; +; Convert the BIOS time to microseconds. +; +CalcBIOSTime: + mov ax,[EndBIOSCountLow] + sub ax,[StartBIOSCountLow] + mov dx,54925 ;number of microseconds each + ; BIOS count represents + mul dx + mov bx,ax ;set aside BIOS count in + mov cx,dx ; microseconds +; +; Convert timer count to microseconds. +; + mov ax,[EndTimedCount] + mov si,8381 + mul si + mov si,10000 + div si ;* .8381 = * 8381 / 10000 +; +; Add timer and BIOS counts together to get an overall time in +; microseconds. +; + add bx,ax + adc cx,0 +; +; Subtract the timer overhead and save the result. +; + mov ax,[ReferenceCount] + mov si,8381 ;convert the reference count + mul si ; to microseconds + mov si,10000 + div si ;* .8381 = * 8381 / 10000 + sub bx,ax + sbb cx,0 + mov [CurrentCountLow],bx + mov [CurrentCountHigh],cx +; +; Convert the result to an ASCII string by trial subtractions of +; powers of 10. +; + mov di,offset PowersOfTenEnd - offset PowersOfTen - 4 + mov si,offset TimedCountStr +CTSNextDigit: + mov bl,'0' +CTSLoop: + mov ax,[CurrentCountLow] + mov dx,[CurrentCountHigh] + sub ax,PowersOfTen[di] + sbb dx,PowersOfTen[di+2] + jc CTSNextPowerDown + inc bl + mov [CurrentCountLow],ax + mov [CurrentCountHigh],dx + jmp CTSLoop +CTSNextPowerDown: + mov [si],bl + inc si + sub di,4 + jns CTSNextDigit +; +; +; Print the results. +; + mov ah,9 + mov dx,offset OutputStr + int 21h +; +ZTimerReportDone: + pop ds + pop di + pop si + pop dx + pop cx + pop bx + pop ax + MPOPF + ret + +ZTimerReport endp + + end + \ No newline at end of file diff --git a/16/scrasm/LZTIMER.OBJ b/16/scrasm/LZTIMER.OBJ new file mode 100644 index 00000000..0511b4d5 Binary files /dev/null and b/16/scrasm/LZTIMER.OBJ differ diff --git a/16/scrasm/MAIN.ASM b/16/scrasm/MAIN.ASM new file mode 100644 index 00000000..1c33a0fb --- /dev/null +++ b/16/scrasm/MAIN.ASM @@ -0,0 +1,134 @@ +;;=======================================================================;; +;; ;; +;; Scrolling Routines -- main program ;; +;; ;; +;; All other INC files are included here. The main routines for the ;; +;; frame-by-frame execution loop are also here. Finally I tried to keep ;; +;; global variables stored in this file as well. ;; +;; ;; +;;=======================================================================;; + dosseg + .model small + .386 + + .code + extrn ZTimerOn:far, ZTimerOff:far, ZTimerReport:far + +INCLUDE constant.inc + + +DW_TABLE MACRO inc,num + count = 0 + number = 0 + WHILE (count LT num) + DW number + count = count + 1 + number = number + inc + ENDM + ENDM + +DOSPRINT MACRO st + mov ah,9 + mov dx,st + int 21h + ENDM + +EVEN +Mult320 label WORD +MultBufWidth label WORD + DW_TABLE 320,200 +MultVirtWidth label WORD + DW_TABLE (VIRTUAL_WIDTH/4),200 + +INCLUDE palette.inc +INCLUDE keyb.inc +INCLUDE modex.inc +INCLUDE page.inc +INCLUDE init.inc +INCLUDE map.inc +;INCLUDE sprite.inc NOT FOR NOW +INCLUDE scroll.inc + +;; Various segments that need to be filled in later... +EVEN +segVideo dw 0A000h ; videoram segment +segText dw 0B800h ; text segment +segMap dw -1 ; Map info segment +segTiles dw -1 ; Tile bitmap segment +segBuffer dw -1 ; Local 320x200 buffer segment +segCode dw -1 ; Code segment +segPSP dw -1 ; PSP segment +segPalette dw -1 ; Palette segment +segTextPal dw -1 ; Saved text palette + +EVEN +bDoTransition db 0 + +;; This routine is called for each frame. +;; Right now it just scrolls, but later all sprite animation would +;; occur here too. +EVEN +OneFrame PROC near + call Scroll ; Scrolls the screen +; call AnimateSprites ; prepares sprites on drawpage + jmp FlipPage ; shows drawpage... + ; no RET necessary +OneFrame ENDP + +;; Each frame -- call the frame motion code, then check for keyhit. +EVEN +MainLoop PROC NEAR +next_frame: call OneFrame + JNKEY next_frame + JKEYP kESC,all_done ; ESC -> quit, always + call kprocCur + mov al,bDoTransition + cmp al,0 + je next_frame +transition: FLASH_OFF 16,segPalette + mov bDoTransition,0 + mov ax,1 + sub ax,nMap + mov nMap,ax ; Flip maps + + call LoadData + call update_full ;<<<< + call OneFrame + FLASH_ON 16,segPalette + jmp next_frame +all_done: ret +MainLoop ENDP + +;; Beginning code -- Leaves text mode (saving the text screen) via +;; a fade. It loads the map data and draws one +;; frame before it fades on. +Beginning PROC near + NEW_PAL segTextPal + PAL_SAVE segTextPal + FADE_OFF 1,segTextPal + call SaveVideo + MODEX_START ; 320x200 Mode X graphics mode + PAL_BLACK + + call LoadData ; This call will change... + + call update_full ;<<<< + call OneFrame + FADE_ON 1,segPalette + ret +Beginning ENDP + +;; Ending code -- restore to text mode via a flash +Ending PROC near + FLASH_OFF 8,segPalette + call RestoreVideo + FLASH_ON 8,segTextPal + ret +Ending ENDP + + .data + + .stack 2048 + + END Initialize + \ No newline at end of file diff --git a/16/scrasm/MAIN.OBJ b/16/scrasm/MAIN.OBJ new file mode 100644 index 00000000..ff612818 Binary files /dev/null and b/16/scrasm/MAIN.OBJ differ diff --git a/16/scrasm/MAKEFILE b/16/scrasm/MAKEFILE new file mode 100644 index 00000000..8b9557ae --- /dev/null +++ b/16/scrasm/MAKEFILE @@ -0,0 +1,47 @@ +OBJS=main.obj lztimer.obj +INCLUDES=modex.inc keyb.inc palette.inc page.inc scroll.inc map.inc \ + constant.inc init.inc +PROGRAM=scroll + +.c.obj: + cl -c -Zi -Od -W4 $*.c + +.asm.obj: + masm -ml -zi $*.asm + +.obj.exe: + link /CO @$*.lnk + +project: scroll.exe gensq.exe genmap.exe genpal.exe + +main.asm: $(INCLUDES) + +scroll.lnk: makefile + echo $(OBJS: =+) > $*.lnk + echo $(PROGRAM); >> $*.lnk + +scroll.exe: $(OBJS) scroll.lnk + link /CO @$*.lnk + +gensq.obj: $*.c + +gensq.lnk: makefile + echo $*.obj; > $*.lnk + +gensq.exe: $*.obj $*.lnk + +genmap.obj: $*.c + +genmap.lnk: makefile + echo $*.obj; > $*.lnk + +genmap.exe: $*.obj $*.lnk + +genpal.obj: $*.c + +genpal.lnk: makefile + echo $*.obj; > $*.lnk + +genpal.exe: $*.obj $*.lnk + + \ No newline at end of file diff --git a/16/scrasm/MAP.INC b/16/scrasm/MAP.INC new file mode 100644 index 00000000..c87494c2 --- /dev/null +++ b/16/scrasm/MAP.INC @@ -0,0 +1,413 @@ +;; MAP in own segment allows map of tiles to be up to 65536 tiles in area +;; which translates to about 16.8 million pixels of virtual screen. This +;; can be represented in almost any rectangle -- just set MAP_WIDTH. + +;; Sorry this code isn't commented -- I was working on it right up until +;; the point that I released this. You have any questions? Ask away +;; (my internet address is in the DOC file). + +MAPHEADER STRUCT, NONUNIQUE + MapName BYTE "" + Wid WORD 2 + Ht WORD 3 + Extent WORD 4 + OffX1 WORD 5 + OffY1 WORD 6 + OffX2 WORD 7 + OffY2 WORD 8 + WrapX WORD 9 + WrapY WORD 10 + Magic WORD 11 +MAPHEADER ENDS +MapInfo MAPHEADER <> + + +; In: DS:DX = offset of filename +LoadMapFile PROC near + mov ax,segMap + cmp ax,-1 + je map_not_loaded + sub ax,(SIZEOF MAPHEADER) / 16 + mov es,ax + mov ah,49h + int 21h + mov nError,ERR_MEM + jc lm_err + mov segMap,-1 + +map_not_loaded: call LoadFile + jc lm_err + + mov ds,dx + mov si,0 + mov ax,cs + mov es,ax + lea di,MapInfo + mov cx,(SIZEOF MAPHEADER) / 4 + rep movsd + + add dx,(SIZEOF MAPHEADER) / 16 + mov cs:segMap,dx + + mov BlankPage.Valid,0 + mov ShowPage.Valid,0 + mov DrawPage.Valid,0 + + mov upper_left,0 + mov ScrollPosX,0 + mov ScrollPosY,0 + mov ScrollDX,0 + mov ScrollDY,0 + +lm_err: ret +LoadMapFile ENDP + +LoadTilesFile PROC near + mov ax,segTiles + cmp ax,-1 + je tiles_not_loaded + mov es,ax + mov ah,49h + int 21h + mov nError,ERR_MEM + jc lt_err + mov segMap,-1 + +tiles_not_loaded: call LoadFile + jc lm_err + mov segTiles,dx + + mov BlankPage.Valid,0 + mov ShowPage.Valid,0 + mov DrawPage.Valid,0 + +lt_err: ret +LoadTilesFile ENDP + +EVEN +LoadData PROC near + ; Load squares from data file + mov bx,nMap + shl bx,1 + mov dx,fntblTiles[bx] + mov ds,segCode + call LoadTilesFile + ; returns Carry if error + jc load_error + + ; Load map from data file + mov ds,segCode + mov bx,nMap + shl bx,1 + mov dx,fntblMap[bx] + call LoadMapFile + ; returns Carry if error + +load_error: ret +LoadData ENDP + +EVEN +update_full PROC + mov ds,segTiles + mov es,segVideo + mov fs,segMap + + mov dx,SC_INDEX + mov al,MAP_MASK + out dx,al + + mov di,DrawPage.Address + add di,upper_left + mov bp,MapInfo.OffX1 + add bp,MapInfo.OffY1 + + mov dx,MapInfo.WrapX + + mov ch,(VIRTUAL_WIDTH/SQUARE_WIDTH) +draw_full_loop: push cx + push si + push dx + + mov al,11h + mov si,0 + +update_f_loop: mov dx,SC_INDEX + 1 + out dx,al + push bp + call draw_col + pop bp + sub di,(VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4 + add si,(SQUARE_WIDTH * SQUARE_HEIGHT) / 4 + shl al,1 + jnc update_f_loop + + pop dx + dec dx + jnz update_f_go_on + mov dx,MapInfo.Wid + sub bp,dx +update_f_go_on: inc bp + pop si + add di,(SQUARE_WIDTH/ 4) + pop cx + dec ch + jnz draw_full_loop + + + + + + + + + + + + mov dx,GC_INDEX + mov ax,ALL_COPY_BITS + out dx,ax + + mov dx,SC_INDEX + mov ax,0F02h + out dx,ax + + mov ds,segVideo + mov si,DrawPage.Address + add si,upper_left + mov es,segVideo + mov di,BlankPage.Address + add di,upper_left + mov cx,(VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4 + rep movsb + mov si,DrawPage.Address + add si,upper_left + mov di,ShowPage.Address + add di,upper_left + mov cx,(VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4 + rep movsb + + mov dx,GC_INDEX + mov ax,ALL_DRAW_BITS + out dx,ax + + ret +update_full ENDP + +EVEN +update_left PROC + mov ds,cs:segTiles + mov es,cs:segVideo + mov fs,cs:segMap + + mov dx,SC_INDEX + mov al,MAP_MASK + out dx,al + + mov al,011h + mov si,0 + mov di,cs:DrawPage.Address + add di,cs:upper_left ; becomes DI later + mov bp,MapInfo.OffX1 + add bp,MapInfo.OffY1 + +update_l_loop: mov dx,SC_INDEX + 1 + out dx,al + push bp + call draw_col + pop bp + sub di,(VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4 + add si,(SQUARE_WIDTH * SQUARE_HEIGHT) / 4 + shl al,1 + jnc update_l_loop + + ret +update_left ENDP + +EVEN +update_right PROC near + mov ds,cs:segTiles + mov es,cs:segVideo + mov fs,cs:segMap + + mov dx,SC_INDEX + mov al,MAP_MASK + out dx,al + + mov bp,MapInfo.OffX2 + add bp,MapInfo.OffY1 + + mov al,011h + mov si,0 + + mov di,cs:DrawPage.Address ; becomes DI + add di,cs:upper_left + add di,(VIRTUAL_WIDTH - SQUARE_WIDTH) / 4 + +update_r_loop: mov dx,SC_INDEX + 1 + out dx,al + + push bp + call draw_col + pop bp + sub di,(VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4 + add si,(SQUARE_WIDTH * SQUARE_HEIGHT) / 4 + shl al,1 + jnc update_r_loop + + ret +update_right ENDP + +EVEN +update_top PROC + mov ds,cs:segTiles + mov es,cs:segVideo + mov fs,cs:segMap + + mov dx,SC_INDEX + mov al,MAP_MASK + out dx,al + + mov di,cs:DrawPage.Address + add di,cs:upper_left + mov bp,MapInfo.OffX1 + add bp,MapInfo.OffY1 + + mov al,011h + mov si,0 + +update_top_loop: + mov dx,SC_INDEX + 1 + out dx,al + push bp + call draw_row + pop bp + sub di,VIRTUAL_WIDTH / 4 + add si,(SQUARE_WIDTH * SQUARE_HEIGHT) / 4 + shl al,1 + jnc update_top_loop + + ret +update_top ENDP + +EVEN +update_bottom PROC + mov ds,cs:segTiles + mov es,cs:segVideo + mov fs,cs:segMap + + mov dx,SC_INDEX + mov al,MAP_MASK + out dx,al + + mov di,cs:DrawPage.Address + add di,cs:upper_left + add di,(VIRTUAL_WIDTH * (VIRTUAL_HEIGHT - SQUARE_HEIGHT)) / 4 + mov bp,MapInfo.OffX1 + add bp,MapInfo.OffY2 + + mov al,011h + mov si,0 + +update_bottom_loop: + mov dx,SC_INDEX + 1 + out dx,al + push bp + call draw_row + pop bp + sub di,VIRTUAL_WIDTH / 4 + add si,(SQUARE_WIDTH * SQUARE_HEIGHT) / 4 + shl al,1 + jnc update_bottom_loop + + ret +update_bottom ENDP + +; Draws ONE plane of a single col +EVEN +draw_col PROC near + ; DI->upper left corner of col to draw + ; BP->col of map to draw + ; SI used to point at tiles + ; AX,CX used + ; BX used to push SI + ; DX unused + shl eax,16 ; save it + mov ax,MapInfo.WrapY + + mov cl,(VIRTUAL_HEIGHT / SQUARE_HEIGHT) +do_col_loop: mov bx,si + mov bh,byte ptr fs:[bp] ; change tile # + + mov ch,SQUARE_HEIGHT +do_col_sq_loop: mov dl,byte ptr ds:[bx+2] + mov dh,byte ptr ds:[bx+3] + shl edx,16 + mov dl,byte ptr ds:[bx+0] + mov dh,byte ptr ds:[bx+1] + mov es:[di],edx ; 32-bit write + add di,VIRTUAL_WIDTH / 4 + add bx,4 + dec ch + jnz do_col_sq_loop + + add bp,MapInfo.Wid + dec ax + jnz yayaya + mov ax,MapInfo.Ht + sub bp,MapInfo.Extent +yayaya: + + dec cl + jnz do_col_loop + + shr eax,16 ; restore it + + ret +draw_col ENDP + +; Draws ONE plane of a single row +EVEN +draw_row PROC near + push ax +; shl eax,16 ; save ax + + mov ax,MapInfo.WrapX + + ; DI->upper left corner of row to draw + ; BP->row of map to draw + ; SI used to point at tiles + ; AX,CX used + ; BX used to push SI + ; DX unused + + mov cl,(VIRTUAL_WIDTH / SQUARE_WIDTH) +do_row_loop: mov bx,si + mov bh,byte ptr fs:[bp] ; change tile # + + mov ch,SQUARE_HEIGHT +do_row_sq_loop: mov dl,byte ptr ds:[bx+2] + mov dh,byte ptr ds:[bx+3] + shl edx,16 + mov dl,byte ptr ds:[bx+0] + mov dh,byte ptr ds:[bx+1] + mov es:[di],edx + add di,(VIRTUAL_WIDTH / 4) + add bx,4 + dec ch + jnz do_row_sq_loop + + add di,(-VIRTUAL_WIDTH*SQUARE_HEIGHT + SQUARE_WIDTH) / 4 + inc bp + dec ax + jnz yayaya2 + mov ax,MapInfo.Wid + sub bp,ax +yayaya2: + dec cl + jnz do_row_loop + +; shr eax,16 ; restore it + pop ax + ret +draw_row ENDP + \ No newline at end of file diff --git a/16/scrasm/MODEX.INC b/16/scrasm/MODEX.INC new file mode 100644 index 00000000..174aa354 --- /dev/null +++ b/16/scrasm/MODEX.INC @@ -0,0 +1,88 @@ +; ==================================================================== +; Entry points: +; ==================================================================== +MODEX_START MACRO + mov ax,13h ;let the BIOS set standard 256-color + int 10h ; mode (320x200 linear) +; PALETTE_BLACK + call ModifyForX + ENDM + +; ==================================================================== +; This is MODE-X code from Dr. Dobb's Journal, by Michael Abrash. +; I modified it from 320x240 back to 320x200, and then to 512 virtual +; width, for scrolling purposes. +; ==================================================================== + +; Mode X (320x240, 256 colors) mode set routine. Works on all VGAs. +; **************************************************************** +; * Revised 6/19/91 to select correct clock; fixes vertical roll * +; * problems on fixed-frequency (IBM 851X-type) monitors. * +; **************************************************************** +; Modified from public-domain mode set code by John Bridges. + +; Index/data pairs for CRT Controller registers that differ between +; mode 13h and mode X. +CRTParms label word +; dw 00d06h ;vertical total +; dw 03e07h ;overflow (bit 8 of vertical counts) +; dw 04109h ;cell height (2 to double-scan) +; dw 0ea10h ;v sync start +; dw 0ac11h ;v sync end and protect cr0-cr7 +; dw 0df12h ;vertical displayed = 480 + dw 00014h ;turn off dword mode * +; dw 0e715h ;v blank start +; dw 00616h ;v blank end + dw 0e317h ;turn on byte mode * + + dw (VIRTUAL_WIDTH*32)+13h ; width of screen = VWid NEW +; dw 09012h ;vertical displayed = 400 (already like this) +CRT_PARM_LENGTH equ (($-CRTParms)/2) + +ModifyForX PROC near + mov dx,SC_INDEX + mov ax,0604h + out dx,ax ;disable chain4 mode + mov ax,0100h + out dx,ax ;synchronous reset while setting Misc + ; Output for safety, even though clock + ; unchanged + mov dx,MISC_OUTPUT + mov al,0e3h + out dx,al ;select 25 MHz dot clock & 60 Hz scanning rate + + mov dx,SC_INDEX + mov ax,0300h + out dx,ax ;undo reset (restart sequencer) + + mov dx,CRTC_INDEX ;reprogram the CRT Controller + mov al,11h ;VSync End reg contains register write + out dx,al ; protect bit + inc dx ;CRT Controller Data register + in al,dx ;get current VSync End register setting + and al,7fh ;remove write protect on various + out dx,al ; CRTC registers + dec dx ;CRT Controller Index + cld + push cs + pop ds + mov si,offset CRTParms ;point to CRT parameter table + mov cx,CRT_PARM_LENGTH ;# of table entries +SetCRTParmsLoop: + lodsw ;get the next CRT Index/Data pair + out dx,ax ;set the next CRT Index/Data pair + loop SetCRTParmsLoop + + mov dx,SC_INDEX + mov ax,0f02h + out dx,ax ;enable writes to all four planes + mov ax,SCREEN_SEG ;now clear all display memory, 8 pixels + mov es,ax ; at a time + sub di,di ;point ES:DI to display memory + sub ax,ax ;clear to zero-value pixels + mov cx,8000h ;# of words in display memory + rep stosw ;clear all of display memory + + ret +ModifyForX ENDP + \ No newline at end of file diff --git a/16/scrasm/PAGE.INC b/16/scrasm/PAGE.INC new file mode 100644 index 00000000..a55bdf0b --- /dev/null +++ b/16/scrasm/PAGE.INC @@ -0,0 +1,109 @@ +;; ==================================================================== +;; (Code follows) +;; ==================================================================== + +EVEN +upper_left dw 0 ; Stores upper left corner offset + ; relative to page offset. +pages dw 0 ; for counting frame-per-sec + +PAGE_INFO STRUCT 2,NONUNIQUE + Address dw 0 + UpperLeftAddress dw 0 + MapPosX dw 0 + MapPosY dw 0 + Alignment db 0 + AlignmentMask db 0 + ScrollOffset dw 0 + Rectangles dw 0 + Valid db 0 +PAGE_INFO ENDS + +DrawPage PAGE_INFO +ShowPage PAGE_INFO +BlankPage PAGE_INFO + +ROTATE3 MACRO reg,item + mov reg,cs:ShowPage.item + xchg reg,cs:BlankPage.item + xchg reg,cs:DrawPage.item + mov cs:ShowPage.item,reg + ENDM ; Leaves ShowPage.item in reg! + +;; This procedure is used to flip between the three available pages. +;; Originally from Dr. Dobb's Journal's Graphics Programming column by +;; Michael Abrash, I've reworked the code to be more specific to my +;; own purposes, and commented it more. +EVEN +FlipPage PROC near + ; This series of instructions circles the show_page, blank_page, + ; and draw page appropriately and leaves the current page to show + ; in AX. Note that it's a lot more instructions than it looks like, + ; but I unrolled the copy loop for speed. So-so good idea, because + ; if you add a field and forget to rotate it, it could mean trouble! + ROTATE3 ax,Rectangles + ROTATE3 ax,ScrollOffset + ROTATE3 ax,MapPosX + ROTATE3 ax,MapPosY +; ROTATE3 al,AlignmentMask SPRITES ... + ROTATE3 al,Alignment + mov di,ax ; DI = scroll offset low, and + ; garbage in the high bits... + and di,3 ; DI = pixel pan, 0 to 3. + shl di,1 ; Mode X requires 0 2 4 or 6. + ROTATE3 ax,Address + ROTATE3 al,Valid + ROTATE3 ax,UpperLeftAddress ; Leaves AX=ShowPage.ULAddr + + add ax,cs:ShowPage.ScrollOffset + + ; AX is set up to be the current show page already. + ; By pre-loading BX with the low-address set code, and CX with + ; the high-address set code, we can more quickly flip the page + ; after the vertical retrace period. + mov bl,START_ADDRESS_LOW ;preload for fastest + mov bh,al ; flipping once display + mov cl,START_ADDRESS_HIGH ; enable is detected + mov ch,ah + + ; Wait for display enable to be active (status is active low), to be + ; sure both halves of the start address will take in the same frame. + mov dx,INPUT_STATUS_1 +WaitDE: in al,dx + test al,01h + jnz WaitDE ;display enable is active low (0 = active) + + ; Set the start offset in display memory of the page to display. + mov dx,CRTC_INDEX + mov ax,bx + out dx,ax ;start address low + mov ax,cx + out dx,ax ;start address high + + ; Now wait for vertical sync, so the other page will be invisible when + ; we start drawing to it. + mov dx,INPUT_STATUS_1 +WaitVS: in al,dx + test al,08h + jz WaitVS ;vertical sync is active high (1 = active) + + ; Finally, have to adjust the pixel panning register in order + ; to fine-tune the starting address on a pixel level. + ; This pixel pan value is the scroll offset mod 4 -- but since + ; Mode X's pixel pan works by values of 2 (0, 2, 4 or 6) we + ; have to shift appropriately. + mov dx,ATC_INDEX + mov al,13h ; 13h = set pixel pan + out dx,al + mov ax,di ; DI = pixel pan calculated above + out dx,al + mov dx,ATC_INDEX + mov al,32 ; Allows the computer to use this register + out dx,al ; again. Without this OUT, the screen will + ; remain blank! + + ; Increment the page counter now! + inc cs:pages + ret +FlipPage ENDP + \ No newline at end of file diff --git a/16/scrasm/PALETTE.INC b/16/scrasm/PALETTE.INC new file mode 100644 index 00000000..e3bd381e --- /dev/null +++ b/16/scrasm/PALETTE.INC @@ -0,0 +1,239 @@ +;; Palette operations +;; Note that where needed in the macros, a "palette" refers to +;; the segment handle to a 768-byte piece of memory. So palettes +;; can be loaded and freed, they're not permanent, but if you want +;; to use a fixed (not allocated) palette you'd better make sure +;; it's segment aligned or else you can't use these macros. If it +;; is, you can just supply "seg myPalette" as the 'palette' argument +;; to any of these macros. + +;; Fade from a palette to black +FADE_OFF MACRO fade,palette + mov si,0 + mov ds,palette + mov bh,fade ; positive -> Gets dimmer... + mov bl,0 ; Starts exact + mov cx,64/fade+1 ; Total number of loops required + call FadePalette + ENDM + +;; Fade from black to a palette +FADE_ON MACRO fade,palette + mov si,0 + mov ds,palette + mov bh,-fade ; negative -> Gets brighter... + mov bl,64 ; Starts totally dimmed + mov cx,64/fade+1 ; Total number of loops required + call FadePalette + ENDM + +;; Flash from a palette to white +FLASH_OFF MACRO fade,palette + mov si,0 + mov ds,palette + mov bh,-fade ; negative -> gets brighter + mov bl,0 ; Starts exact + mov cx,64/fade+1 ; Total number of loops required + call FadePalette + ENDM + +;; Flash from white to a palette +FLASH_ON MACRO fade,palette + mov si,0 + mov ds,palette + mov bh,fade ; positive -> Gets dimmer... + mov bl,-64 ; Starts totally bright + mov cx,64/fade+1 ; Total number of loops required + call FadePalette + ENDM + +;; Save a palette into a palette-sized piece of memory +PAL_SAVE MACRO palette + mov es,palette + mov di,0 + call SavePalette + ENDM + +; Returns AX = a new segment for a palette +NEW_PAL MACRO palette + mov bx,(256 * 3) / 16 + mov ah,48h + int 21h + mov palette,ax + ENDM + +;; Black the entire palette temporarily. Used to blank the screen while +;; drawing a frame before fading in. +PAL_BLACK MACRO + mov ax,seg tmppal + mov ds,ax + mov si,OFFSET tmppal + mov bh,-1 ; Doesn't really matter... + mov bl,64 ; Starts totally dimmed + mov cx,1 ; Just one time -- to leave it black + call FadePalette + ENDM + +;; drawing a frame before fading in. +PAL_WHITE MACRO + mov ax,seg tmppal + mov ds,ax + mov si,OFFSET tmppal + mov bh,-1 ; Doesn't really matter... + mov bl,-64 ; Starts totally dimmed + mov cx,1 ; Just one time -- to leave it black + call FadePalette + ENDM + +;; Black the entire palette temporarily. Used to blank the screen while +;; drawing a frame before fading in. +PAL_UPDATE MACRO + mov cx,0 ; 0 times = update + call FadePalette + ENDM + +WAITBORDER MACRO + LOCAL wbr1,wbr2 + mov dx,INPUT_STATUS_1 +wbr1: in al,dx + test al,8 + jnz wbr1 +wbr2: in al,dx + test al,8 + jz wbr2 + ENDM + +;; Fade Palette: +;; The following code is modified greatly from the Future Crew's palette +;; fading code. Works on blocks of 256 colors only, so far, but I might +;; change it later. Also, it theoretically could "anti-fade" -- fade to +;; white -- which I call flashing, so I added that ability, which was +;; missing from FC's code. +EVEN +tmppal DB 768 dup (?) ; Stores old palette +FadePalette PROC NEAR + mov ax,seg tmppal + mov es,ax + +FadeLoop: push cx + push si + + cmp cx,0 + je JustUpdate + + ; Load in the colors in the palette + mov di,OFFSET tmppal ; ES:DI -> temp palette + mov cx,768 ; Reads 256*3 bytes at a time. +loadpal_loop: mov al,ds:[si] ; Load one color byte + inc si + sub al,bl ; Subtract the fade amount + jge pal_more ; Limit the range by clipping + xor al,al ; to between 0 and 63 + jmp pal_ok ; (there's probably a faster +pal_more: cmp al,63 ; way to do it than this, + jle pal_ok ; but I don't know it) + mov al,63 +pal_ok: mov es:[di],al ; Store that byte in the new + inc di + dec cx ; temp palette and loop. + jnz loadpal_loop + + ; Get ready to move this block of palette values +JustUpdate: sti ; Let interrupts happen now, + WAITBORDER ; while waiting for a retrace, + cli ; instead of more critical times + + mov dx,PEL_WRITE_REG; Set up to write to color register, + xor al,al ; starting at palette entry 0. + out dx,al + mov dx,PEL_DATA_REG ; Point at color port + + ; Quickly put out the first half of the color palette + mov di,OFFSET tmppal + mov cl,(768/6)/2 ; Does 2 loops of 128 colors each. + cli ; Waits a retrace inbetween... +FirstHalfLoop: REPEAT 6 ; Steps of 6 -- reduces the + mov al,es:[di] ; number of LOOP instructions + inc di + out dx,al + ENDM + dec cl + jnz FirstHalfLoop + sti + + WAITBORDER ; Waits one retrace -- less flicker + mov dx,PEL_DATA_REG ; Reset DX + + ; Now, quickly put out the other half of the colors. + mov cl,(768/6)/2 + cli +SecondHalfLoop: REPEAT 6 ; Steps of 6 -- reduces the + mov al,es:[di] ; number of LOOP instructions + inc di + out dx,al + ENDM + dec cl + jnz SecondHalfLoop + + ; For the next iteration, restore everything and loop + pop si + pop cx + + cmp cx,0 + je JustUpdated + + add bl,bh ; Change brightness by BH + + dec cx + jnz FadeLoop + + ; All done, re-enable interrupts and return +JustUpdated: sti + ret +FadePalette ENDP + +;; Saves the palette into the memory pointed at by DS:SI. That memory +;; must be at least 768 bytes long... +SavePalette PROC NEAR + mov dx,PEL_READ_REG ; Set up to read from color register, + xor al,al ; starting at palette entry 0. + out dx,al + mov dx,PEL_DATA_REG + + ; Quickly read in the first half of the color palette + mov cl,(768/6) + cli +ReadPalLoop: REPEAT 6 ; Steps of 6 -- reduces the + in al,dx ; number of LOOP instructions + mov es:[di],al + inc di + ENDM + dec cl + jnz ReadPalLoop + ; All done, re-enable interrupts and return + sti + ret +SavePalette ENDP + +;; Load a palette from a file. Opens the file and reads it into +;; memory (standard LoadFile) and then points the palette at that +;; newly allocated memory. Also, frees old memory before it does +;; any loading ... +LoadPaletteFile PROC near + mov ax,segPalette + cmp ax,-1 + je pal_not_loaded + mov es,ax + mov ah,49h + int 21h + mov nError,ERR_MEM + jc lp_err + mov segPalette,-1 + +pal_not_loaded: call LoadFile + jc lp_err + + mov segPalette,dx +lp_err: ret +LoadPaletteFile ENDP + \ No newline at end of file diff --git a/16/scrasm/SCROLL.DOC b/16/scrasm/SCROLL.DOC new file mode 100644 index 00000000..0893db36 --- /dev/null +++ b/16/scrasm/SCROLL.DOC @@ -0,0 +1,297 @@ + ________________________________________________ + |+----------------------------------------------+| + || I N T R O D U C I N G : || + |+----------------------------------------------+| + || Steve's 4-Way || + || ___ ___ ____ ___ _ _ || + || / __| / __| | _ \ / _ \ | | | | || + || | <_ | | | |_> | | | | | | | | | || + || \_ \ | | | / | | | | | | | | || + || __> | | |__ | |\ \ | |_| | | |__ | |__ || + || |___/ \___| |_| \_| \___/ |____| |____| || + |+______________________________________________+| + +------------------------------------------------+ + + There, now that I have the hype outta the way, let me explain what +this program is. I'm releasing the source code to my 4-way scrolling +code so that others can learn from it. There aren't enough really +good resources out there for someone learning to program games, so I'm +trying to do my part to help. + +WHAT IT IS: + + The code is 100% assembly, for which I use MASM 6.0, so there may +be a few problems converting to Turbo Assembler. I also use the ".386" +directive, meaning that you can't run this code with a 286 or earlier. +But most of the code should be easily convertible. I haven't been +programming for 386's much so I really don't make the use of the 386 +registers like I could have. Mostly I just did it for some extra 386 +instructions. + + You'll need a VGA which can support mode 13h, the MCGA mode. This +code runs in "tweaked" MCGA mode, or what is called "Mode X". For more +information on Mode X, check out the 1991 - 1992 issues of Doctor Dobbs +Journal, wherein you will find Michael Abrash's excellent Graphics +Programming column. This is where I (and many others) found out about +Mode X, which is an excellent graphics mode for fast 256-color graphics. +Also, you can take a look at XLIB, YakIcons, FastGraph, etc which are all +graphics libraries (public domain or otherwise) which support Mode X +graphics and probably have some good documentation on programming the mode. +Additionally, check out _The Programmer's Guide to the EGA and VGA Cards_, +by Richard Ferraro, and _Power Graphics Programming_ (out of print, but +available directly from Que Books) by Michael Abrash. Finally, you can +ask about graphics programming on many newsgroups such as +"rec.games.programmer"... + +WHAT IT DOES: + + The code will allow you to create "tiled" background patterns and then +to omnidirectionally scroll over them. You could implement sprite routines +and then animate them over the background, but I haven't gotten this far +yet. The scrolling is always relational -- ie no "jump-to"'s, just "scroll +left", "scroll up", etc. Jump to would be very easy to implement, I just +haven't done it yet. + + It runs at about 60-70 fps on a 386/20, which means that it is operating +in under the time of one vertical refresh (_just_ under, according to some +timing I've done). This could probably be reduced, but the best way to +reduce it is to limit the speed at which it scrolls -- if you stick to +scrolling at most 8 pixels at a time in two axes or 16 pixels at a time +in one axis, it is very fast. More than that, and it occasionally takes +more than one refresh period even on my 486. Still, that should be +fast enough for just about any game. + + I also included some routines to generate maps, tiles, and palettes +so you can see the file formats. These are in C, and the executables +are around in case you don't care to recompile. None of the utilities +are exactly production quality. You'll have to look at the code to +figure out the arguments! Luckily you can just run them with no args +and they perform default behavior. + + Lastly, the program SCROLL.EXE is a demo of what it can do. In this +demo you can use one of two sets of keyboard controls to scroll around. +One, the default set of commands, lets you press up/down/left/right and +scroll in that direction. The other has "intertia" -- pressing up/down +left/right will accelerate you in that direction. You'll see what I +mean, just experiment. You can switch keyhandlers by pressing K. +You can also switch between the diagonal pattern map and a logo map +by pressing M. (By the way, it will eventually run out of memory loading +the maps and the diagonal map will screw up... don't worry about it, +it'd be fixed if I had more time). Try it out. + +CREDIT WHERE CREDIT IS DUE: + + People who (unknowingly) helped me out: + + Keyboard by Steven Dollins, Brown Computer Group. From his + KEYINT routines, which is an INT 9 handler to let you + keep track of many keys being pressed at the same time. + Graphics, basically, by Michael Abrash, whose Mode X columns + influenced me greatly. + Palette fades and file I/O by the Future Crew. Thanks for + letting out the Mental Surgery code! + CPU detection by Ray Duncan, taken from one of his books. + + Obviously I haven't just pirated the code, it's all from publicly +released source code and I modified it a bit. But I wouldn't have come +up with this whole thing without those helping hands. Thanks. + +HOW IT WORKS: + + Here's how the scrolling works. I'll explain it from a single-page +point of view, although it actually uses several pages of video memory. +The video memory is laid out like this: +ÉÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÑÍÍÍÍÍÍÍÍÍÍÍÍ» ÄÄÄ +º ³ / / / / º ³ +º ³/ / / / º ³ +º ³ / / / /º ³ +º Visible page ³ / Not / / º ³ +º ³/ visible/ º ³ +º ³ / / / /º 64K +º ³ / / / / º ³ +ÇÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÙ/ / / / º ³ +º / / / / / / / / / / / / / / /º ³ +º / / / / / / / / / / / / / / / º ³ +º/ / / / / / / / / / / / / / / º ³ +ÈÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍͼ ÄÄÄ +In other words, it has a virtual width greater than the actual screen +width, and a virtual height higher than the actual screen height. The +VGA hardware allows hardware panning around within the virtual area, so +that makes panning much easier: you only have to draw the information +that is coming on to the screen with each pan. + +What is Happening: What the user sees: +ÉÍÍÍÍÍÍÍÍÑÍÍÍÍ» ÚÄÄÄÄÄÄÄÄ¿ +º hel³////º ³ hel³ The picture that is +ÇÄÄÄÄÄÄÄÄÙ////º ÀÄÄÄÄÄÄÄÄÙ coming on to the screen +º/////////////º ("hello") appears to +ÈÍÍÍÍÍÍÍÍÍÍÍÍͼ the user to be scrolling +ÉÍÑÍÍÍÍÍÍÍÑÍÍÍ» ÚÄÄÄÄÄÄÄÄ¿ left, although it is +º/³ hell³///º ³ hell³ actually at a stationary +º/ÀÄÄÄÄÄÄÄÙ///º ÀÄÄÄÄÄÄÄÄÙ location in memory... +º/////////////º Each time the frame moves, +ÈÍÍÍÍÍÍÍÍÍÍÍÍͼ it is not necessary to +ÉÍÍÑÍÍÍÍÍÍÍÑÍÍ» ÚÄÄÄÄÄÄÄÄ¿ redraw the parts that stay +º//³ hello³//º ³ hello³ on the screen, just the +º//ÀÄÄÄÄÄÄÄÙ//º ÀÄÄÄÄÄÄÄÄÙ parts that become visible. +º/////////////º +ÈÍÍÍÍÍÍÍÍÍÍÍÍͼ + + The same works up&down too, or even left/right and up/down at the same +time. The problem occurs when you scroll enough to hit the edge of the +virtual space. Luckily, video memory increases and wraps at the right +edge to one line down on the left edge. So you end up with a situation +like this after scrolling too far right: +ÉÍÍÍÍÍÍÍÍÍÍÍÍÍÍÑÍÍÍÍ» ÄÄÄ +ÇÄÄÄÄÄÄÄ¿//////³ º ³ User sees: +º ³//////³ Thº ³ ÚÄÄÄÄÄÄÄÄÄÄÄ¿ +ºe quick³//////³ º 64K ³ ³ +º ³//////ÀÄÄÄĶ ³ ³ The quick³ +ÇÄÄÄÄÄÄÄÙ///////////º ³ ³ ³ +º///////////////////º ³ ÀÄÄÄÄÄÄÄÄÄÄÄÙ +ÈÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍͼ ÄÄÄ +The wrapping is transparent to the user. So, it appears that you can +scroll left & right infinitely, by simply always updating the amount of +memory that has scrolled into view. + + But what happens when you scroll too far down? Now Intel segments come +to the rescue! Because the video memory is 64K, and that is also the +largest amount of memory you can access in a segment, the segment arithmetic +performs the top-to-bottom wrapping for me. It results in a similar +situation as is pictured above, but with the screen split horizontally +instead of vertically. Again, it's completely transparent to the user. + + One performance optimization that I've done is to organize the background +picture that is being scrolled into quantitized "tiles" -- 16x16 pixels in +area. This means that you can store a large amount of picture data if that +data is repetitive -- as the backgrounds of many video games are. This also +helps when figuring out how much new stuff to draw on the screen. I can wait +until the panning crosses a 16-pixel border, then draw another 16-pixel +strip, and then wait for another tile crossing, etc. You can see this in +the MAP.INC and SCROLL.INC code. 16x16 pixels also leads to 256-pixel-square +tiles, which is always a convenient number in assembly... it helps out in +several places in the code. + + So, the display page is "wandering" around the video memory, only drawing +what is necessary at any time. Meanwhile you can animate sprites over the +background, etc. The only problem is that with one page, information is +constantly being drawn to that page and you can never guarantee that it is in +a correct state at the time of a vertical refresh period. Instead, I actually +use several pages, so that one can be shown while the other is worked on. +This guarantees a perfect picture at any time. So for now, let's ignore the +scrolling for a second, and talk about the paging, because it's easier to +understand the paging if scrolling isn't happening. + + Here's a basic explanation of how the paging works. I use three separate +pages, a Draw page, a Show page, and a Blank page. The Show page refers to +the page that is currently showing, the Draw page to the page that is +under construction (to be shown next frame), and the Blank page should always +be maintained as an up-to-date blank background page. (The Blank page is +useful for sprite programming which I am going to be doing next.) Each +of the pages is 352x240, although the screen resolution is only 320x200. + + Each frame, the pages rotate DrawPage->ShowPage->BlankPage->DrawPage. +This means that at the beginning of the frame, the Draw Page is already +blank, so all that is necessary is to draw on a bunch of sprites. The +BlankPage, though, is no longer blank (it's still got stuff from what +was the ShowPage) so we have to erase it, by blanking out the sprites +(luckily the new DrawPage _is_ empty, so we can use a Mode X 32-bit video- +to-video copy to blank it). Hope you're still with me. + + So, this loop continues with each frame, and the loop invariants are +maintained: Show Page is always a "good" frame -- don't touch it. Blank +Page is always blank. Draw Page can look like anything. Now to include +the scrolling again: + + The way I do scrolling with several pages is that the pages ALL wander +around video memory, only they're smaller (1/3 of the size that they could +have been, to be exact!). Here's a picture of the situation at its worst: +ÉÍÍÍÍÍÍÍÍÍÍÍÍÍÍ» ÄÄÄ +º ³//ÀÄÄÄÄÄÄÄĺ ³ +ºÄÄÙ///////////º ³ +º/////ÚÄÄÄÄÄÄÄĺ ³ +ºÄÄ¿//³PAGE 0 º ³ +º ³//³ (Draw) º ³ +º ³//ÀÄÄÄÄÄÄÄĺ ³ +ºÄÄÙ///////////º 64K +º/////ÚÄÄÄÄÄÄÄĺ (21K each page) +ºÄÄ¿//³PAGE 1 º ³ +º ³//³ (Show) º ³ +º ³//ÀÄÄÄÄÄÄÄĺ ³ +ºÄÄÙ///////////º ³ +º/////ÚÄÄÄÄÄÄÄĺ ³ +ºÄÄ¿//³PAGE 2 º ³ +º ³//³ (Blank)º ³ +ÈÍÍÍÍÍÍÍÍÍÍÍÍÍͼ ÄÄÄ +The pages always maintain an equal distance apart as they wander. Since +all pages move in parallel, the way it used to work is that as soon as the +pages scrolled, I would draw the newly-visible picture information on +all three of the pages. This worked great, except that it resulted in +a slight pause every time the screen scrolled because it was doing hardware +pan most of the time (which is very fast) and the drawing routines were +slow. Now, I've spread the copying over successive frames to allow a +smoother scrolling rate. This is possible because it's not really necessary +to draw the new information to a page before that page becomes the show +page... + + I hope that this has made some sense. It's pretty complicated stuff. +Take a look at the code and maybe that will help. Or, write me mail +(my email address is below). Or, design your own way and ignore this +whole thing. + +COMING SOON: + + Next up are Sprite routines. I threw in what I started as SPRITE.INC, +although it's not included in the project right now. + Sound support + + Who knows what else? Depends on what people send me! + +------------------------------------------------------------------------- + R E A D T H I S +------------------------------------------------------------------------- + R E A D T H I S +------------------------------------------------------------------------- + + This code is being released as "SwapWare". That means that if you wanted +to go ahead and use my code directly, I really wouldn't care. But I ask +that you send me some of your code that you think is neat. Especially if +it's modifications that you make to this code, such as quick sprite drawing +or optimizations. + + I'm not going to brag and say that I "threw this together in a few hours". +I didn't, it took me many days of work to get it working properly. But +I'm also not looking for money as recompensation for my labor. I make +great money at my real day job and you probably have a better use for your +donations, such as legitimizing your unregistered shareware and pirated +games. I'm in this for the knowledge ... so my best payback would be to +get lots of code from people out there, stuff to really help make a great +game. In particular, these would be great: + * 32-bit code + * Tricky optimizations + * Fast BitBlt/masked BitBlt code + * Useful File I/O functions + * 3D polygon and texture mapping code + * Maintenance routines -- like numeric conversions, etc. + * Hardware access code like timing routines and interrupt + handlers +Any of those would be very helpful when writing a fast scrolling game. + +You can contact me (for the rest of this term only) at + seisner@athena.mit.edu +Feel free to ask any questions you want! I check my mail about once or +twice a week so don't expect instant turnaround... If you're desperate +to talk to me, say if you work at Origin and want to give me the source +code to Strike Commander or whatnot, you can also reach me at: + Steve Eisner + (617) 247-8691 +and leave a message. But I'd rather you wrote e-mail. + + Thanks, + Steve Eisner + +* Read rec.games.programmer! And for those who already do: + I dream of a world where no one argues over why Wolfenstein + 3-D sucks or why it doesn't. Would people just give it a + break? + \ No newline at end of file diff --git a/16/scrasm/SCROLL.EXE b/16/scrasm/SCROLL.EXE new file mode 100644 index 00000000..8181acf1 Binary files /dev/null and b/16/scrasm/SCROLL.EXE differ diff --git a/16/scrasm/SCROLL.INC b/16/scrasm/SCROLL.INC new file mode 100644 index 00000000..be58c025 --- /dev/null +++ b/16/scrasm/SCROLL.INC @@ -0,0 +1,441 @@ +;; Global variables used here ... +EVEN +ScrollPosX dw 0 ; Scroll origin, upper-left X +ScrollPosY dw 0 ; Scroll origin, upper-left Y +ScrollDX dw 0 ; Amount to change scroll origin, X +ScrollDY dw 0 ; Amount to change scroll origin, Y + +;; SCROLL: +;; This routine takes care of all of the scrolling, however it calls +;; outside drawing routines to update the screen. Scrollx and +;; Scrolly determine the amount to scroll by. +;; Note that this does only RELATIVE scrolling, not absolute scrolling. +;; Scroll saves time by updating only up to the one row or column of +;; tiles which have come into view due to a change in scroll offset. +;; In other words, it's not good for "jumping" to a particular point, +;; although this effect can be accomplished in other ways -- the draw_full +;; routine is available to draw a full screen again. +;; Sometimes this means that you will have to calculate values ahead of +;; time, for instance if you wish the scrolling to keep a certain sprite +;; in the center of the screen. In this case, just set ScrollDX and +;; ScrollDY to the delta-x and delta-y of the sprite. +;; * Newly added: +;; Since there are three pages, it is necessary to keep each one of them +;; up to date with each scroll. Recently, I was doing some fast (8+ +;; pixels per frame) scrolling and noticed that there was a significant +;; pause when the screen snapped to a new origin. (The origin is always +;; at a square's corner, even though it may not look like it because it +;; disguises things by smooth-panning the hardware.) Every time it +;; scrolled, it was drawing the new information and copying it to the +;; two other planes. I've now distributed the load over successive +;; pages, in other words it doesn't copy the new info all at once, but +;; over several frames. This really smoothed out the scrolling so that +;; while there are still some jumps, they only occur very infrequently +;; and then only at 15 or 16 pixel/frame scroll rates...) That's the +;; "catchup" code at the bottom, and that's why it's more complex than +;; it maybe could be... +EVEN +Scroll PROC near + ; Using the ScrollDX variable as delta-x, move the scroll-origin + ; in the x direction. Then, if the visible screen is now + ; viewing invalid data, snap the origin to a new point and + ; draw any new columns that are necessary. +do_x_scroll: mov ax,cs:ScrollPosX + add ax,cs:ScrollDX ; ScrollDX is a delta-x + jl wrap_l ; wrap left if negative + cmp ax,VIRTUAL_WIDTH - SCREEN_WIDTH ; too far right? + jge wrap_r ; wrap right if too big + mov cs:ScrollPosX,ax ; Stores new scroll-x + ; (just like above, for y:) + ; Using the ScrollDY variable as delta-y, move the scroll-origin + ; in the y direction. Then, if the visible screen is now + ; viewing invalid data, snap the origin to a new point and + ; draw any new rows that are necessary. +do_y_scroll: mov ax,cs:ScrollPosY + add ax,cs:ScrollDY ; ScrollDY is a delta-y + jl wrap_t ; wrap top if negative + cmp ax,(VIRTUAL_HEIGHT - SCREEN_HEIGHT) * VIRTUAL_WIDTH + jge wrap_b ; wrap bottom if too big + mov cs:ScrollPosY,ax ; Store the new scroll-y + jmp calculate + + ; To wrap to the right: + ; Add a square's width to the origin's upper left corner, and + ; subtract the same amount from the scroll origin's upper left + ; corner. This makes no difference on the screen but allows + ; us to forget about the leftmost column on the screen (it's + ; offscreen now...) so we can take over the right column. + ; See any documentation I included for an explanation of the +EVEN ; scrolling... +wrap_r: add cs:upper_left,SQUARE_WIDTH / 4 + sub ax,SQUARE_WIDTH + mov cs:ScrollPosX,ax + + mov dx,MapInfo.Wid + mov bp,MapInfo.OffX1 + inc bp + cmp bp,dx + jb wrap_r1_ok + sub bp,dx +wrap_r1_ok: mov MapInfo.OffX1,bp + + mov bp,MapInfo.OffX2 + inc bp + cmp bp,dx + jb wrap_r2_ok + sub bp,dx +wrap_r2_ok: mov MapInfo.OffX2,bp + + mov bp,MapInfo.WrapX + dec bp + jnz wrap_r3_ok + add bp,dx +wrap_r3_ok: mov MapInfo.WrapX,bp + + call update_right + jmp do_y_scroll ; Jump back to do Y + +EVEN ; Same for left side +wrap_l: sub cs:upper_left,SQUARE_WIDTH / 4 + add ax,SQUARE_WIDTH + mov cs:ScrollPosX,ax + + mov dx,MapInfo.Wid + mov bp,MapInfo.OffX1 + dec bp + cmp bp,dx + jb wrap_l1_ok + add bp,dx +wrap_l1_ok: mov MapInfo.OffX1,bp + + mov bp,MapInfo.OffX2 + dec bp + cmp bp,dx + jb wrap_l2_ok + add bp,dx +wrap_l2_ok: mov MapInfo.OffX2,bp + + mov bp,MapInfo.WrapX + inc bp + cmp bp,dx + jbe wrap_l3_ok + sub bp,dx +wrap_l3_ok: mov MapInfo.WrapX,bp + + call update_left + jmp do_y_scroll ; Jump back to do Y + +EVEN ; Same for bottom +wrap_b: add cs:upper_left,(SQUARE_HEIGHT * VIRTUAL_WIDTH) / 4 + sub ax,SQUARE_HEIGHT * VIRTUAL_WIDTH + mov cs:ScrollPosY,ax + + mov bp,MapInfo.OffY1 + mov dx,MapInfo.Extent + add bp,MapInfo.Wid + cmp bp,dx + jb wrap_b1_ok + sub bp,dx +wrap_b1_ok: mov MapInfo.OffY1,bp + + mov bp,MapInfo.OffY2 + add bp,MapInfo.Wid + cmp bp,dx + jb wrap_b2_ok + sub bp,dx +wrap_b2_ok: mov MapInfo.OffY2,bp + + mov dx,MapInfo.Ht + mov bp,MapInfo.WrapY + dec bp + jg wrap_b3_ok + add bp,dx +wrap_b3_ok: mov MapInfo.WrapY,bp + + call update_bottom + mov ax,cs:ScrollPosY + jmp calculate ; Jump down to calc new offsets + +EVEN ; Same for top +wrap_t: sub cs:upper_left,(SQUARE_HEIGHT * VIRTUAL_WIDTH) / 4 + add ax,SQUARE_HEIGHT * VIRTUAL_WIDTH + mov cs:ScrollPosY,ax + + mov bp,MapInfo.OffY1 + mov dx,MapInfo.Extent + sub bp,MapInfo.Wid + cmp bp,dx + jb wrap_t1_ok + add bp,dx +wrap_t1_ok: mov MapInfo.OffY1,bp + + mov bp,MapInfo.OffY2 + sub bp,MapInfo.Wid + cmp bp,dx + jb wrap_t2_ok + add bp,dx +wrap_t2_ok: mov MapInfo.OffY2,bp + + mov bp,MapInfo.WrapY + mov dx,MapInfo.Ht + inc bp + cmp bp,dx + jbe wrap_t3_ok + sub bp,dx +wrap_t3_ok: mov MapInfo.WrapY,bp + + call update_top + mov ax,cs:ScrollPosY + jmp calculate ; Jump down to calc new offsets + +EVEN +align_mask_table DB 11h,22h,44h,88h +calculate: + ; Calculate the scroll offset + ; AX already = ScrollPosY + add ax,cs:ScrollPosX ;Now AX = scroll offset + + ; Calculate the plane alignment + mov bl,al + and bx,0003h + mov cs:DrawPage.Alignment,bl +; mov bl,cs:align_mask_table[bx] +; mov cs:DrawPage.AlignmentMask,bl + + ; Now we don't need Scroll Offset on a pixel level any more, + ; so shift it to a byte level (/4) and store it away. + shr ax,2 + mov cs:DrawPage.ScrollOffset,ax + + ; Calculate the actual upper left corner address + mov si,cs:DrawPage.Address + add si,cs:upper_left + mov cs:DrawPage.UpperLeftAddress,si + + ; And the map offset: + mov bx,MapInfo.WrapX + mov cs:DrawPage.MapPosX,bx + mov di,MapInfo.WrapY + mov cs:DrawPage.MapPosY,di + + mov cs:DrawPage.Valid,1 + cmp cs:BlankPage.Valid,0 + je no_catch_up + + ; Lastly, update dirty area (if any) on blank page. + ; BP still contains the draw page's mapoffset. + sub bx,cs:BlankPage.MapPosX + sub di,cs:BlankPage.MapPosY + jnz yes_catch_up + cmp bx,0 + jnz yes_catch_up + ; No catchup necessary -- return. +no_catch_up: ret + +;; Okay, this stuff is a mess. I've registerized everything except +;; for the video data itself. I'll try to comment it best I can. +EVEN +yes_catch_up: + ; First, switch into full-copy mode. This means latching the + ; bit mask as coming entirely from the local 32-bit registers + ; and then setting the map mask to write to all 4 planes. This + ; is Mode X's greatest advantage, when you can do it! It + ; provides a 2x speedup or so... + mov dx,SC_INDEX ; Select Sequencer input + mov ax,0F02h + out dx,ax ; set map mask = all bits + + mov dx,GC_INDEX + mov ax,ALL_COPY_BITS + out dx,ax + + JKEYNP kB,isntbp +isbp: nop +isntbp: + ; Next, calculate the amount to catch up the top/bottom rows + ; If we just wrapped over the edge, it is possible that the + ; distance traveled will be as high as MapInfo.Ht - 1. So, + ; in the fashion of signed numbers, if the number is greater + ; than MapInfo.Ht / 2, we take it to mean negative. To convert + ; it to signed, we have to shift it into the proper range. But + ; if it's less than MapInfo.Ht / 2, then it's okay as it is. + mov ax,di + cmp ax,0 + je y_mod + + mov cx,MapInfo.Ht + cwd ; DX = -1 or 0 based on AX's sign. + and dx,cx ; DX = Ht or 0 + add ax,dx ; AX = 0 ... Ht (unsigned) + + mov di,ax + shl di,1 + cmp di,cx + jb y_signed + sub ax,cx +y_signed: neg ax + + ; Find DI MOD MapInfo.Wid, and then convert to it into virtual + ; coordinates from map offset coordinates. + ; This routine also calculates BP, which will be used as a loop + ; counter to determine how many rows to draw on the left/right + ; column copy. +y_mod: mov bp,ax + cwd + add bp,dx + xor bp,dx + shl bp,3 ; BP = (SQUARE_HEIGHT / 2) * dX + mov di,cs:MultVirtWidth[bp] ; Use multiplication table + add di,dx ; to calculate new DI, then + xor di,dx ; restore the sign. + sub bp,VIRTUAL_HEIGHT / 2 + ; Out: DI = # of pixels traveled, + ; BP = (VIRTUAL_HEIGHT - # of rows) / 2 + + ; Change BX (delta-x) to signed from unsigned, store in AX + mov ax,bx + mov cx,MapInfo.Wid + cwd + and dx,cx ; DX = Wid or 0 + add ax,dx ; AX = 0 ... Wid + + mov bx,ax + shl bx,1 + cmp bx,cx + jb x_signed + sub ax,cx +x_signed: + + ; The following is an optimization which would slow down on + ; normal memory, but I believe it will be okay on VGA memory, + ; which is so incredibly slow. Basically, I've replaced all + ; "rep movsb"'s with a loop that first calculates "bx = di - si", + ; and then loops performing "mov ds:[si],es:[si+bx]". Why? + ; Because of several reasons, none of which I'm sure actually + ; help out, but they do make for smaller code. 1) It means that + ; I only have to maintain SI, and "DI" is maintained automatically + ; (because DI - SI should remain constant). 2) Don't have to + ; calculate DS. Not much gain here. 3) Because I'd already + ; unrolled the loops, and the "rep movsb"'s had become instead + ; "mov al, ds:[si] / mov es:[di], al / mov al, ds:[si + 1] / + ; mov es:[di + 1],al ... etc ... add si, 4 / add di, 4". In + ; other words, I wasn't using MOVSB anyway. The only advantage + ; I can see in MOVSB is that it doesn't have to store the answer + ; in AL so it could be slightly faster. By unrolling the loops, + ; I'd already made up for that, I think. 4) Normally, using + ; [SI + BX + 1] would incur a penalty of an additional clock + ; cycle (because it has to add two indexs + an offset). But + ; the VGA memory and the '86 CPU can multi-task, and the VGA + ; is very slow. So by the time the VGA is ready to write the + ; next byte, the one extra clock cycle has already passed. + ; + ; Am I right? Does this make things faster? I have no idea. + ; I haven't bothered to check both ways. Please let me know + ; if I've missed something important... + ; + ; Here's the calculation of BX. SI is already set. + ; si already = DrawPage.UpperLeftAddress + mov bx,cs:BlankPage.Address + sub bx,cs:DrawPage.Address + + ; Now, converts SI into "1/4" units. I do all the calculations + ; in "1/4" scale and then scale back up, mostly because it saved + ; me some instructions elsewhere. + shr si,2 + ; Stores this value of SI. This will be restored after doing + ; the top/bottom copying. + mov dx,si + + ; Check if it's necessary to catch up the top or bottom. +catchup_tb: cmp di,0 + je catchup_tb_end + jl catchup_t +catchup_b: ; COPY BOTTOM + ; Move SI to point at the bottom of the screen - # of rows + ; to update. + add si,((VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4) / 4 + sub si,di + jmp copy_tb +catchup_t: ; COPY_TOP + ; Leave SI, but add to the "pushed" value of SI the number of + ; rows that will be drawn. This prevents overlap between top + ; and right/left when moving diagonally. Also, DI = |DI| + neg di + add dx,di + + ; Now do the actual copying. Shifts SI back into scale "1", + ; then performs an unrolled loop to copy the entire virtual + ; width * # of pixel rows. Since DI is already in "1/4" scale, + ; it is only decremented once for each four pixels drawn. +copy_tb: shl si,2 +copy_tb_loop: mov cl,es:[si] + mov es:[si+bx],cl + mov cl,es:[si+1] + mov es:[si+bx+1],cl + mov cl,es:[si+2] + mov es:[si+bx+2],cl + mov cl,es:[si+3] + mov es:[si+bx+3],cl + add si,4 + dec di + jnz copy_tb_loop +catchup_tb_end: + + ; Next, check to see if it's necessary to draw the right or + ; the left side. +catchup_rl: cmp ax,0 + je catchup_rl_end + jg catchup_l +catchup_r: ; COPY RIGHT + ; Adds to the "pushed" SI the width of the screen, minus + ; the number of rows to be drawn. + neg ax + add dx,(VIRTUAL_WIDTH / 4) / 4 + sub dx,ax +catchup_l: ; COPY LEFT (or nothing) + + ; Does the actual copying. First pops SI from its stored value + ; and shifts it back into scale "1" +copy_rl: mov si,dx + shl si,2 + + ; This is a loop over BP -- which has already been set as + ; VIRTUAL_HEIGHT - (# of bytes drawn in vertical update) + ; Again, this loop is unrolled such that it does two rows @ + ; 4 bytes each with every iteration. + ; This LEA instruction is just a quick MOV DI, SI + 2 *y + ; DI is used to push the next value of SI for each iteration + ; of the loop. +copy_rl_loop: lea di,[si + 2*(VIRTUAL_WIDTH/4)] + mov cx,ax +copy_rl_col: mov dl,es:[si] + mov es:[si+bx],dl + mov dl,es:[si+1] + mov es:[si+bx+1],dl + mov dl,es:[si+2] + mov es:[si+bx+2],dl + mov dl,es:[si+3] + mov es:[si+bx+3],dl + mov dl,es:[si+VIRTUAL_WIDTH/4] + mov es:[si+bx+VIRTUAL_WIDTH/4],dl + mov dl,es:[si+VIRTUAL_WIDTH/4+1] + mov es:[si+bx+VIRTUAL_WIDTH/4+1],dl + mov dl,es:[si+VIRTUAL_WIDTH/4+2] + mov es:[si+bx+VIRTUAL_WIDTH/4+2],dl + mov dl,es:[si+VIRTUAL_WIDTH/4+3] + mov es:[si+bx+VIRTUAL_WIDTH/4+3],dl + add si,4 + dec cx + jnz copy_rl_col + mov si,di ; SI = pop (SI + VIRTUAL_WIDTH/4) + inc bp ; (BP is negative, so INC it) + jnz copy_rl_loop +catchup_rl_end: + + ; Switch back to all-draw mode. + mov dx,GC_INDEX + mov ax,ALL_DRAW_BITS + out dx,ax + ret +Scroll ENDP + \ No newline at end of file diff --git a/16/scrasm/SCROLL.LNK b/16/scrasm/SCROLL.LNK new file mode 100644 index 00000000..275ba401 --- /dev/null +++ b/16/scrasm/SCROLL.LNK @@ -0,0 +1,2 @@ +main.obj+lztimer.obj +scroll; diff --git a/16/scrasm/SCROLL.MAP b/16/scrasm/SCROLL.MAP new file mode 100644 index 00000000..0b18f2bf Binary files /dev/null and b/16/scrasm/SCROLL.MAP differ diff --git a/16/scrasm/SCROLL.PAL b/16/scrasm/SCROLL.PAL new file mode 100644 index 00000000..5dee1969 Binary files /dev/null and b/16/scrasm/SCROLL.PAL differ diff --git a/16/scrasm/SCROLL.TIL b/16/scrasm/SCROLL.TIL new file mode 100644 index 00000000..3b47a8e1 Binary files /dev/null and b/16/scrasm/SCROLL.TIL differ diff --git a/16/scrasm/SPRITE.INC b/16/scrasm/SPRITE.INC new file mode 100644 index 00000000..9c3f2e48 --- /dev/null +++ b/16/scrasm/SPRITE.INC @@ -0,0 +1,280 @@ +; SPRITE routines +MAX_SPRITE EQU 100 + +RECTANGLE STRUCT 2,NONUNIQUE + X WORD 0 + Y WORD 0 + Wid4 BYTE 0 + Ht BYTE 0 + Color BYTE 0 + Next WORD 0 + ; DrawMe is used to not bother with sprites that you know + ; are contained totally within another, allowing animated + ; eyes, etc to be stored in separate sprites. These will be + ; drawn to the local buffer but skipped when copying to the + ; screen, so if they are not TOTALLY contained, they will + ; just get clipped away. + DrawMe BYTE 1 ; default, yes draw me. + ; (Storage from this point on ... NEVER provide anything but + ; default for these values!) + address_virt WORD 0 + address_buf WORD 0 + next_line_virt WORD 0 + next_line_buf WORD 0 +RECTANGLE ENDS + +SPRITE STRUCT 2, NONUNIQUE + RECTANGLE <> ; Contains rectangle info +SPRITE ENDS + +EVEN +rect5 SPRITE <<40 ,60 , 2,8, C_TRANSPARENT, 0 , 0>> +rect4 SPRITE <<80 ,30 , 2,8, C_TRANSPARENT, offset rect5, 0>> +rect3 SPRITE <<120,60 , 2,8, C_TRANSPARENT, offset rect4, 0>> +rect2 SPRITE <<55 ,100, 2,8, C_TRANSPARENT, offset rect3, 0>> +rect1 SPRITE <<105,100, 2,8, C_TRANSPARENT, offset rect2, 0>> + +rect6 SPRITE <<36 ,56 , 4,16, C_BLUE, offset rect1, 1>> +rect7 SPRITE <<76 ,26 , 4,16, C_BLUE, offset rect6, 1>> +rect8 SPRITE <<116,56 , 4,16, C_BLUE, offset rect7, 1>> +rect9 SPRITE <<51 ,96 , 4,16, C_BLUE, offset rect8, 1>> +rect10 SPRITE <<101,96 , 4,16, C_BLUE, offset rect9, 1>> + +;; Simply adding in these 5 rectangles (~20000 pixels for both +;; drawing and erasing) really slows things down! That's why +;; it's important to optimize the sprite drawing routines! +rect11 SPRITE <<35 ,55 ,14,36, C_GREEN, offset rect10, 1>> +rect12 SPRITE <<75 ,25 ,14,36, C_GREEN, offset rect11, 1>> +rect13 SPRITE <<115,55 ,14,36, C_GREEN, offset rect12, 1>> +rect14 SPRITE <<50 ,95 ,14,36, C_GREEN, offset rect13, 1>> +rect15 SPRITE <<100,95 ,14,36, C_GREEN, offset rect14, 1>> + +FIRST_SPRITE EQU rect10 + +EVEN +AnimateSprites PROC near + ret + ; Blank out the draw page, by copying from the blank page + ; to the draw page all rectangles which had changed. The + ; blank page must always be entirely blank if this is going + ; to work! + mov di,cs:DrawPage.UpperLeftAddress + add di,cs:DrawPage.ScrollOffset + mov si,cs:BlankPage.UpperLeftAddress + add si,cs:BlankPage.ScrollOffset + mov bp,cs:BlankPage.Rectangles + call CopyRectangles + + ; Now draw the sprites. Uses a temporary buffer to ensure + ; minimal drawing to the screen, but that's not really necessary, + ; if memory is at a minimum. It's just faster... + mov bp,offset FIRST_SPRITE + mov cs:DrawPage.Rectangles,bp + call do_fill_buffer + mov di,cs:DrawPage.UpperLeftAddress + add di,cs:DrawPage.ScrollOffset + mov bh,cs:DrawPage.AlignmentMask + mov bp,offset FIRST_SPRITE + jmp smart_rects ; "call" +AnimateSprites ENDP + +smart_dest DW 0 +out_di DW 0 +out_si DW 0 + +EVEN +smart_rects PROC near + add di,cs:DrawPage.Address + mov ds,cs:segBuffer + mov es,cs:segVideo + mov dx,3c4h + mov al,02h + out dx,al + inc dx + mov cs:smart_dest,di + + ; === Beginning of loop through rectangles! === +sp_nextrect: + cmp cs:[bp].RECTANGLE.DrawMe,1 + jne sp_next + ; Draw this rectangle from the buffer to screen memory. + ; Calculate the output address. + mov si,cs:[bp].RECTANGLE.address_buf + mov di,cs:[bp].RECTANGLE.address_virt + add di,cs:smart_dest + + ; Loop over 4 planes + mov bl,4 +sp_plane_loop: mov al,bh + out dx,al + + mov cs:out_di,di + mov cs:out_si,si + + ; Loop over height + mov ch,cs:[bp].RECTANGLE.Ht +sp_row_loop: + + ; Loop over width of rectangle (Wid4 is actually width/4) + mov cl,cs:[bp].RECTANGLE.Wid4 +sp_col_loop: + + ; Read a byte from the buffer + ; Is it transparent (no-modify)? If so, just jump over the draw + mov al,byte ptr ds:[si] + cmp al,C_TRANSPARENT + je sp_next_pixel + ; Otherwise, draw it on the spreen, and mark it transparent + ; so that it won't be drawn again. + mov byte ptr es:[di],al + mov byte ptr ds:[si],C_TRANSPARENT + + ; Skip to next 4-byte group (next column that can be drawn in + ; Mode X) Also increment spreen draw address, but only by 1 + ; because ModeX is 4 pixels per byte +sp_next_pixel: + add si,4 + inc di + + dec cl + jnz sp_col_loop + + ; End of row. Skip space to get to left edge of next row down + ; Skip SI = (SCREEN_WIDTH - #bytesdrawn) + ; Only draw up to height of rectangle + add si,cs:[bp].RECTANGLE.next_line_buf + add di,cs:[bp].RECTANGLE.next_line_virt + dec ch + jnz sp_row_loop + + mov di,cs:out_di + mov si,cs:out_si + inc si + rol bh,1 + adc di,0 + + dec bl + jnz sp_plane_loop + + ; Follow chain to next rectangle +sp_next: mov bp,cs:[bp].RECTANGLE.Next + cmp bp,0 + jne sp_nextrect + ; All done +sp_end: ret +smart_rects ENDP + +; BP -> first rectangle. Follows BP->next, stops when BP = 0 +EVEN +do_fill_buffer PROC near + mov es,cs:segBuffer + + cmp bp,0 + je fill_end +fill_loop: + + mov bx,cs:[bp].RECTANGLE.Y + shl bx,1 ; BX = word index y + mov di,cs:MultBufWidth[bx] ; DI = SW * y + mov cx,cs:[bp].RECTANGLE.X ; CX = x + add di,cx ; DI = (SW * y) + x + mov cs:[bp].RECTANGLE.address_buf,di ; (DI used later) + + mov ax,cs:MultVirtWidth[bx] ; AX = (VW/4) * y + shr cx,2 ; CX = (x / 4) + add ax,cx ; AX = (VW * y + x)/4 + mov cs:[bp].RECTANGLE.address_virt,ax + + mov dx,(VIRTUAL_WIDTH / 4) + sub dl,cs:[bp].RECTANGLE.Wid4 ; DX = (VW - w) / 4 + mov cs:[bp].RECTANGLE.next_line_virt,dx + + mov dx,(SCREEN_WIDTH / 4) + sub dl,cs:[bp].RECTANGLE.Wid4 ; DX = (SW - w) / 4 + shl dx,2 ; DX = SW - w + mov cs:[bp].RECTANGLE.next_line_buf,dx + + mov ah,cs:[bp].RECTANGLE.Color + mov al,cs:[bp].RECTANGLE.Color + + mov ch,cs:[bp].RECTANGLE.Ht +fill_row_loop: mov cl,cs:[bp].RECTANGLE.Wid4 +fill_col_loop: mov es:[di],ax + mov es:[di+2],ax + add di,4 + dec cl + jnz fill_col_loop + add di,dx + dec ch + jnz fill_row_loop + + mov bp,cs:[bp].RECTANGLE.Next + cmp bp,0 + jne fill_loop +fill_end: ret +do_fill_buffer ENDP + +EVEN +CopyRectangles PROC near + mov ax,cs:segVideo + mov ds,ax + mov es,ax + + ; Calculate the difference between the source and destination + ; pages. Since in a movsb loop the two would remain a constant + ; distance apart, we can just calculate a displacement and then + ; not have to worry about SI; instead use DI and DI+BX, thanks + ; to the thoughtful x86 ALU! + mov bx,di + sub bx,si + + mov dx,GC_INDEX + mov ax,ALL_COPY_BITS + out dx,ax + + mov dx,SC_INDEX + mov ax,0F02h + out dx,ax + mov si,di ;store destination + + ; === Beginning of loop through rectangles! === +cr_nextrect: cmp cs:[bp].RECTANGLE.DrawMe,1 + jne cr_next + ; Draw this rectangle from the buffer to screen memory. + ; Calculate the output address. + mov di,cs:[bp].RECTANGLE.address_virt + mov dx,cs:[bp].RECTANGLE.next_line_virt + add di,si + + ; Loop over height + mov ch,cs:[bp].RECTANGLE.Ht +cr_row_loop: + + ; Loop over width of rectangle (Wid4 is actually width/4) + mov cl,cs:[bp].RECTANGLE.Wid4 +cr_col_loop: mov al,ds:[di + bx] + stosb + dec cl + jnz cr_col_loop + mov al,ds:[di + bx] + mov es:[di],al + + ; End of row. Skip space to get to left edge of next row down + ; Only draw up to height of rectangle + add di,dx + dec ch + jnz cr_row_loop + + ; Follow chain to next rectangle +cr_next: mov bp,cs:[bp].RECTANGLE.Next + cmp bp,0 + jne cr_nextrect + ; All done +cr_end: + mov dx,GC_INDEX + mov ax,ALL_DRAW_BITS + out dx,ax + ret +CopyRectangles ENDP + + \ No newline at end of file