c2e.convert_special: 0
e2c.convert_num: 0
-openfiles: /dos/z/16/16/dos_gfx.cpp:8135:7436:1:
-openfiles: /dos/z/16/16/dos_gfx.h:327:0:0:
+openfiles: /dos/z/16/16/dos_gfx.cpp:1620:647:1:
+openfiles: /dos/z/16/16/dos_gfx.h:665:373:0:
openfiles: /dos/z/16/16/dos_kb.c:1039:46:0:
openfiles: /dos/z/16/16/dos_kb.h:23:0:0:
openfiles: /dos/z/16/16/lib/lib_com.cpp:0:0:0:
recent_files: file:///dos/z/16/16/lib/x/MXPN.ASM
recent_files: file:///dos/z/4x4_16/!/c/TUT10.C
recent_files: file:///dos/z/16/16/lib/x/MXVS.ASM
-recent_files: file:///dos/z/16/16/lib/x/MODEX.H
-recent_files: file:///dos/z/4x4_16/modex/DEMO01.PAS
-recent_files: file:///dos/z/4x4_16/modex/DEMO07.PAS
+recent_files: file:///dos/z/16/16/lib/x/MAKEFILE
+recent_files: file:///dos/z/16/16/lib/x/MODEX.DEF
recent_files: file:///dos/z/16/16/dos_gfx.h
+recent_files: file:///dos/z/16/16/dos_gfx.cpp
recent_files: file:///dos/z/16/16/dos_kb.c
recent_files: file:///dos/z/16/16/dos_kb.h
recent_files: file:///dos/z/16/16/lib/lib_com.cpp
-recent_files: file:///dos/z/16/16/lib/lib_com.h
recent_files: file:///dos/z/16/16/16.txt
+recent_files: file:///dos/z/16/16/lib/lib_com.h
recent_files: file:///dos/z/16/16/scroll.txt
recent_files: file:///dos/z/16/16/project16.txt
-recent_files: file:///dos/z/16/16/lib/intro/lib.c
-recent_files: file:///dos/z/16/src/lib/dos_gfx.h
+recent_files: file:///dos/z/16/16/lib/x/MODEX.H
+recent_files: file:///dos/z/4x4_16/modex/DEMO07.PAS
recent_files: file:///dos/z/16/16/lib/x/MXBB.ASM
-recent_files: file:///dos/z/16/src/lib/dos_gfx.cpp
+recent_files: file:///dos/z/16/src/lib/dos_gfx.h
+recent_files: file:///dos/z/4x4_16/modex/DEMO01.PAS
recent_files: file:///dos/z/16/16/lib/x/MXCR.ASM
-recent_files: file:///dos/z/16/16/dos_gfx.cpp
-recent_files: file:///dos/z/16/16/lib/x/MAKEFILE
-recent_files: file:///dos/z/16/16/lib/x/MODEX.DEF
+recent_files: file:///dos/z/16/16/lib/intro/lib.c
+recent_files: file:///dos/z/16/src/lib/dos_gfx.cpp
snr_replacetype: 0
savedir: file:///dos/z/16/16
spell_check_default: 1
mxSetMode( MX_320x240 );\r
// mxSetVirtualScreen(SW+(SW/4), SH+(SH/4));\r
// mxSetVirtualScreen(SW*2, SH*2);\r
- mxSetVirtualScreen(VW,(VH+(TILEWH*BUFFMX)));\r
+ mxSetVirtualScreen(VW,BH);\r
// mxSetVirtualScreen((640-(TILEWH*4)),(480-(TILEWH*4)));\r
mxSetClip(true);\r
- mxSetClipRegion(0, 0, VW, (VH+(TILEWH*BUFFMX)));\r
+ mxSetClipRegion(0, 0, VW, BH);
+ mxPan(TILEWH*2,TILEWH*2);\r
//mxSetClipRegion(0, VH+1, VW, (TILEWH*BUFFMX));\r
}\r
}\r
}\r
}\r
// fixer\r
- if(q!=16){
+ if(q!=16){\r
#ifdef TILE\r
if(xx<0) xx=(VW-TILEWH);\r
if(yy<0) yy=(VH-TILEWH);\r
if(xx>(VW-TILEWH)) xx=0;\r
- if(yy>(VH-TILEWH)/*+(TILEWH*BUFFMX)*/) yy=0;
- #else
+ if(yy>(VH-TILEWH)/*+(TILEWH*BUFFMX)*/) yy=0;\r
+ #else\r
if(xx<0) xx=VW;\r
if(yy<0) yy=VH;\r
if(xx>VW) xx=0;\r
- if(yy>VH) yy=0;
+ if(yy>VH) yy=0;\r
#endif\r
}\r
\r
// main variables\r
d=4; // switch variable\r
key=4; // default screensaver number\r
- xpos=0;\r
- ypos=0;\r
+ xpos=TILEWH*2;\r
+ ypos=TILEWH*2;\r
xdir=1;\r
ydir=1;\r
setvideo(1);\r
mxPutPixel(VW-1, y, 15);\r
}\r
\r
- getch();
+ getch();\r
//text box\r
- mxSetTextColor(10, OP_TRANS); //set font
- mxBitBlt(xpos, ypos+(TILEWH*12), 320, TILEWH*BUFFMX, 0, VH); //copy background
- mxFillBox(xpos, ypos+(TILEWH*12), 320, TILEWH*BUFFMX, 0, OP_SET); // background for text box
- //+(QUADWH*6)
- mxOutText(xpos+1, ypos+SH-48, "========================================");
+ mxSetTextColor(10, OP_TRANS); //set font\r
+ mxBitBlt(xpos, ypos+(TILEWH*12), 320, TILEWH*BUFFMX, 0, BS); //copy background\r
+ mxFillBox(xpos, ypos+(TILEWH*12), 320, TILEWH*BUFFMX, 0, OP_SET); // background for text box\r
+ //+(QUADWH*6)\r
+ mxOutText(xpos+1, ypos+SH-48, "========================================");\r
mxOutText(xpos+1, ypos+SH-40, "| |Chikyuu:$line1");\r
mxOutText(xpos+1, ypos+SH-32, "| |$line2");\r
mxOutText(xpos+1, ypos+SH-24, "| |$line3");\r
- mxOutText(xpos+1, ypos+SH-16, "| |$line4");
- mxOutText(xpos+1, ypos+SH-8, "========================================");
- mxFillBox(xpos+QUADWH, ypos+QUADWH+(TILEWH*12), TILEWH*2, TILEWH*2, 9, OP_SET);
- getch();
- mxBitBlt(0, VH, 320, TILEWH*BUFFMX, xpos, ypos+(TILEWH*12)); //copy background
+ mxOutText(xpos+1, ypos+SH-16, "| |$line4");\r
+ mxOutText(xpos+1, ypos+SH-8, "========================================");\r
+ mxFillBox(xpos+QUADWH, ypos+QUADWH+(TILEWH*12), TILEWH*2, TILEWH*2, 9, OP_SET); //portriat~\r
+ getch();\r
+ mxBitBlt(0, BS, 320, TILEWH*BUFFMX, xpos, ypos+(TILEWH*12)); //copy background\r
//mxBitBlt(0, (TILEWH*12)+1, 320, TILEWH*3, 0, 0);\r
getch();\r
while(!kbhit()){\r
// scrolly(1);\r
// vScroll(1);\r
// delay(100);\r
- //for(int i=0;i<TILEWH;i++){
+ //for(int i=0;i<TILEWH;i++){\r
\r
ding(key);\r
mxPan(xpos,ypos);\r
//for(short o = 0; o<TILEWH; o++){\r
- //xpos+=xdir;\r
- //ypos+=ydir;
- if(ypos==1 || (ypos==((VH+(TILEWH*BUFFMX))-SH-1)))delay(1000);\r
- //mxWaitRetrace();\r
+ xpos+=xdir;\r
+ ypos+=ydir;\r
+ //if(ypos==1 || (ypos==(BH-SH-1)))delay(500);
+ //if((xpos>(VW-SW-1)) || (xpos<1))delay(500);\r
+ mxWaitRetrace();\r
//}\r
if( (xpos>(VW-SW-1)) || (xpos<1)){xdir=-xdir;}\r
- if( (ypos>((VH+(TILEWH*BUFFMX))-SH-1)) || (ypos<1)){ydir=-ydir;} // { Hit a boundry, change\r
+ if( (ypos>(BH-SH-1)) || (ypos<1)){ydir=-ydir;} // { Hit a boundry, change\r
// direction! }\r
}\r
ch=getch();\r
if(ch==0x1b)break; // 'ESC'\r
}\r
setvideo(0);\r
- printf("wwww\nVirtual Resolution: %dx%d\n", VW,VH);
+ printf("wwww\nFull Buffer Virtual Resolution: %dx%d\n", VW,BH);
+ printf("Virtual Resolution: %dx%d\n", VW,VH);\r
printf("Resolution: %dx%d\n", SW,SH);\r
printf("Mode X Library Version: %d\n", mxGetVersion());\r
printf("bakapi ver. 1.04.09.04\nis made by sparky4\81i\81\86\83Ö\81\85\81j feel free to use it ^^\nLicence: GPL v2\n");\r
#define SW 320\r
#define SH 240\r
-#define VW 560\r
-#define VH 416
+//#define VW 560\r
+//#define VH 416
+#define VW (SW+64)\r
+#define VH (SH+64)
+#define BS (VH*2) // buffer space! not BULLSHIT
+#define BH BS+(TILEWH*BUFFMX) // buffer resolution
\r
//void drawChar(int x, int y, int color, byte c);\r
//void drawText(int x, int y, int color, byte string);\r
--- /dev/null
+_80x86 OPTIMIZATION_\r
+by Michael Abrash\r
+\r
+\r
+[LISTING ONE]\r
+\r
+; Copies one string to another string, converting all characters to\r
+; uppercase in the process, using a loop containing LODSB and STOSB.\r
+; Adapted from Zen of Assembly Language, by Michael Abrash; not a\r
+; standalone program, but designed to be used with the Zen timer from\r
+; that book via the Zen timer's PZTIME.BAT batch file: ZTimerOn starts\r
+; the clock, ZTimerOff stops it, and the test-bed program linked in by\r
+; PZTIME.BAT starts the program, reports the results, and ends.\r
+\r
+ jmp Skip ;skip over data in CS and subroutine\r
+\r
+SourceString label word ;sample string to copy\r
+ db 'This space intentionally left not blank',0\r
+DestString db 100 dup (?) ;destination for copy\r
+\r
+; Copies one zero-terminated string to another string,\r
+; converting all characters to uppercase.\r
+; Input: DS:SI = start of source string; DS:DI = start of destination buffer\r
+; Output: none\r
+; Registers altered: AX, BX, SI, DI, ES\r
+; Direction flag cleared\r
+\r
+CopyStringUpper:\r
+ mov ax,ds\r
+ mov es,ax ;for STOS\r
+ mov bl,'a' ;set up for fast register-register\r
+ mov bh,'z' ; comparisons\r
+ cld\r
+StringUpperLoop:\r
+ lodsb ;get next character and point to following character\r
+ cmp al,bl ;below 'a'?\r
+ jb IsUpper ;yes, not lowercase\r
+ cmp al,bh ;above 'z'?\r
+ ja IsUpper ;yes, not lowercase\r
+ and al,not 20h ;is lowercase-make uppercase\r
+IsUpper:\r
+ stosb ;put character into new string and point to \r
+ ; following location\r
+ and al,al ;is this the zero that marks end of the string?\r
+ jnz StringUpperLoop ;no, do the next character\r
+ ret\r
+\r
+; Calls CopyStringUpper to copy & convert SourceString->DestString.\r
+Skip:\r
+ call ZTimerOn ;start timing\r
+ mov si,offset SourceString ;point SI to the string to copy from\r
+ mov di,offset DestString ;point DI to the string to copy to\r
+ call CopyStringUpper ;copy & convert to uppercase\r
+ call ZTimerOff ;stop timing\r
+\r
+\r
+[LISTING TWO]\r
+\r
+; Copies one string to another string, converting all characters to\r
+; uppercase in the process, using no string instructions.\r
+; Not a standalone program, but designed to be used with the Zen\r
+; timer, as described in Listing 1.\r
+\r
+ jmp Skip ;skip over data in CS and subroutine\r
+\r
+SourceString label word ;sample string to copy\r
+ db 'This space intentionally left not blank',0\r
+DestString db 100 dup (?) ;destination for copy\r
+\r
+; Copies one zero-terminated string to another string,\r
+; converting all characters to uppercase. \r
+; Input: DS:SI = start of source string; DS:DI = start of destination string\r
+; Output: none\r
+; Registers altered: AL, BX, SI, DI\r
+\r
+CopyStringUpper:\r
+ mov bl,'a' ;set up for fast register-register\r
+ mov bh,'z' ; comparisons\r
+StringUpperLoop:\r
+ mov al,[si] ;get the next character and\r
+ inc si ; point to the following character\r
+ cmp al,bl ;below 'a'?\r
+ jb IsUpper ;yes, not lowercase\r
+ cmp al,bh ;above 'z'?\r
+ ja IsUpper ;yes, not lowercase\r
+ and al,not 20h ;is lowercase-make uppercase\r
+IsUpper:\r
+ mov [di],al ;put the character into the new string and\r
+ inc di ; point to the following location\r
+ and al,al ;is this the zero that marks the end of the string?\r
+ jnz StringUpperLoop ;no, do the next character\r
+ ret\r
+\r
+; Calls CopyStringUpper to copy & convert SourceString->DestString.\r
+Skip:\r
+ call ZTimerOn\r
+ mov si,offset SourceString ;point SI to the string to copy from\r
+ mov di,offset DestString ;point DI to the string to copy to\r
+ call CopyStringUpper ;copy & convert to uppercase\r
+ call ZTimerOff\r
+\r
+\r
+[LISTING THREE]\r
+\r
+; Clears a buffer using MOV/ADD in a loop.\r
+; Not a standalone program, but designed to be used with the Zen\r
+; timer, as described in Listing 1.\r
+\r
+ mov dx,2 ;repeat the test code twice, to make\r
+ ; sure it's in the cache (if there is one)\r
+ mov bx,dx ;distance from the start of one word\r
+ ; to the start of the next\r
+ sub ax,ax ;set buffer to zeroes\r
+TestTwiceLoop:\r
+ mov cx,1024 ;clear 1024 words starting at address\r
+ mov di,8000h ; DS:8000h (this is just unused memory\r
+ ; past the end of the program)\r
+ call ZTimerOn ;start timing (resets timer to 0)\r
+StoreLoop:\r
+ mov [di],ax ;clear the current word\r
+ add di,bx ;point to the next word\r
+ dec cx ;count off words to clear until none\r
+ jnz StoreLoop ; remain\r
+ call ZTimerOff ;stop timing\r
+ dec dx ;count off passes through test code\r
+ jz StoreDone ;that was the second pass; we're done\r
+ jmp TestTwiceLoop ;that was first pass; do second pass with all \r
+ ; instructions and data in the cache\r
+StoreDone:\r
+\r
+\r
+[LISTING FOUR]\r
+\r
+; Clears a buffer using MOV/ADD in an unrolled loop.\r
+; Not a standalone program, but designed to be used with the Zen\r
+; timer, as described in Listing 1.\r
+\r
+ mov dx,2 ;repeat the test code twice, to make\r
+ ; sure it's in the cache (if there is one)\r
+ mov bx,dx ;distance from the start of one word\r
+ ; to the start of the next\r
+ sub ax,ax ;set buffer to zeroes\r
+TestTwiceLoop:\r
+ mov si,1024 ;clear 1024 words starting at address\r
+ mov di,8000h ; DS:8000h (this is just unused memory\r
+ ; past the end of the program)\r
+ call ZTimerOn ;start timing (resets timer to 0)\r
+ mov cl,4 ;divide the count of words to clear by\r
+ shr si,cl ; 16, because we'll clear 16 words\r
+ ; each time through the loop\r
+StoreLoop:\r
+ REPT 16 ;clear 16 words in a row without looping\r
+ mov [di],ax ;clear the current word\r
+ add di,bx ;point to the next word\r
+ ENDM\r
+ dec si ;count off blocks of 16 words to clear\r
+ jnz StoreLoop ; until none remain\r
+ call ZTimerOff ;stop timing\r
+ dec dx ;count off passes through test code\r
+ jz StoreDone ;that was the second pass; we're done\r
+ jmp TestTwiceLoop ;that was the first pass; do the second pass \r
+ ; with all instructions and data in the cache\r
+StoreDone:\r
+\r
--- /dev/null
+Journal: Dr. Dobb's Journal March 1991 v16 n3 p16(8)\r
+-----------------------------------------------------------------------------\r
+Title: 80x86 optimization: aim down the middle and pray. (80x86 family of\r
+ microprocessors) (tutorial)\r
+Author: Abrash, Michael.\r
+AttFile: Program: 80X86.ASC Source code listing.\r
+\r
+Summary: Optimizing code for 8088, 80286, 80386 and 80486 microprocessors\r
+ is difficult because the chips use significantly different memory\r
+ architectures and instruction execution times. Code cannot be\r
+ optimized for the 80x86 family; rather, code must be designed to\r
+ produce good performance on a range of systems or optimized for\r
+ particular combinations of processors and memory. Programmers\r
+ must avoid the unusual instructions supported by the 8088, which\r
+ have lost their performance edge in subsequent chips. String\r
+ instructions should be used but not relied upon. Registers should\r
+ be used rather than memory operations. Branching is also slow for\r
+ all four processors. Memory accesses should be aligned to improve\r
+ performance. Generally, optimizing an 80486 requires exactly the\r
+ opposite steps as optimizing an 8088.\r
+-----------------------------------------------------------------------------\r
+Descriptors..\r
+Company: Intel Corp. (Products).\r
+Ticker: INTC.\r
+Product: Intel 80286 (Microprocessor) (Programming)\r
+ Intel 80386 (Microprocessor) (Programming)\r
+ Intel 80486 (Microprocessor) (Programming)\r
+ Intel 8088 (Microprocessor) (Programming).\r
+Topic: Microprocessors\r
+ Optimization\r
+ Programming\r
+ Tutorial\r
+ Assembly Language\r
+ Guidelines\r
+ Type-In Programs\r
+ Microcode\r
+ Processor Architecture.\r
+Feature: illustration\r
+ graph.\r
+Caption: Official and actual cycles per binary-to-hex ASCII conversion.\r
+ (graph)\r
+ Actual performance in microseconds of two solutions to a problem.\r
+ (graph)\r
+ Actual performance of three clearing approaches across the 80x86\r
+ family. (graph)\r
+\r
+-----------------------------------------------------------------------------\r
+Full Text:\r
+\r
+Optimization\r
+\r
+Picture this: You're an archer aiming at a target 100 feet away. A strong\r
+wind comes up and pushes each arrow to the left as it flies. Naturally, you\r
+compensate by aiming farther to the right. That's what it's like optimizing\r
+for the 8088; once you learn to compensate for the strong but steady effects\r
+of the prefetch queue and the 8-bit bus, you can continue merrily on your\r
+programming way.\r
+\r
+Now the wind starts gusting unpredictably. There's no way to compensate, so\r
+you just aim for the bull's-eye and hope for the best. That's what it's like\r
+writing code for good performance across the entire 80x86 family, or even for\r
+the 286/386SX/386 heart of today's market. You just aim down the middle and\r
+pray.\r
+\r
+The New World of the 80x86\r
+\r
+In the beginning, the 8088 was king, and that was good. The optimization\r
+rules weren't obvious, but once you learned them, you could count on them\r
+serving you well on every computer out there.\r
+\r
+Not so these days. There are four major processor types--8088, 80286, 80386,\r
+and 80486--with a bewildering array of memory architectures: cached (in\r
+several forms), page mode, static-column RAM, interleaved, and, of course,\r
+the 386SX, with its half-pint memory interface. The processors offer wildly\r
+differing instruction execution times, and memory architectures warp those\r
+times further by affecting the speed of instruction fetching and access to\r
+memory operands. Because actual performance is a complex interaction of\r
+instruction characteristics, instruction execution times, and memory access\r
+speed, the myriad processor-memory combinations out there make "exact\r
+performance" a meaningless term. A specific instruction sequence may run at\r
+a certain speed on a certain processor in a certain system, but that often\r
+says little about the performance of the same instructions on a different\r
+processor, or even on the same processor with a different memory system. The\r
+result: Precise optimization for the general PC market is a thing of the\r
+past. (We're talking about optimizing for speed here; optimizing for size is\r
+the same for all processors so long as you stick to 8088-compatible code.)\r
+\r
+So there is no way to optimize performance ideally across the 80x86 family.\r
+An optimization that suits one processor beautifully is often a dog on\r
+another. Any 8088 programmer would instinctively replace:\r
+\r
+DEC CX JNZ LOOPTOP\r
+\r
+with:\r
+\r
+LOOP LOOPTOP\r
+\r
+because LOOP is significantly faster on the 8088. LOOP is also faster on the\r
+286. On the 386, however, LOOP is actually two cycles slower than DEC/JNZ.\r
+The pendulum swings still further on the 486, where LOOP is about twice as\r
+slow as DEC/JNZ--and, mind you, we're talking about what was originally\r
+perhaps the most obvious optimization in the entire 80x86 instruction set.\r
+\r
+In short, there is no such thing as code that's truly optimized for the\r
+80x86. Instead, code is either optimized for specific processor-memory\r
+combinations, or aimed down the middle, designed to produce good performance\r
+across a range of systems. Optimizing for the 80x86 family by aiming down\r
+the middle is quite different from optimizing for the 8088, but many PC\r
+programmers are inappropriately still applying the optimization lore they've\r
+learned over the years on the PC (or AT). The world has changed, and many of\r
+those old assumptions and tricks don't hold true anymore.\r
+\r
+You will not love the new world of 80x86 optimization, which is less precise\r
+and offers fewer clever tricks than optimizing for the 8088 alone. Still,\r
+isn't it better to understand the forces affecting your code's performance\r
+out in the real world than to optimize for a single processor and hope for\r
+the best?\r
+\r
+Better, yes. As much fun, no. Optimizing for the 8088 was just about as\r
+good as it gets. So it goes.\r
+\r
+Optimization Rules for a New World\r
+\r
+So, how do you go about writing fast code nowadays? One way is to write\r
+different versions of critical code for various processors and memory access\r
+speeds, selecting the best version at runtime. That's a great solution, but\r
+it requires an awful lot of knowledge and work.\r
+\r
+An alternative is to optimize for one particular processor and settle for\r
+whatever performance you get on the others. This might make sense when the\r
+8088 is the target processor because it certainly needs the optimization more\r
+than any other processor. However, 8088 optimization works poorly at the\r
+upper end of the 80x86 family.\r
+\r
+Nowadays, though, most of us want to optimize for the 286 and 386 systems\r
+that dominate the market, or across all 80x86 processors, and that's a tough\r
+nut to crack. The 286 and 386 come in many configurations, and you can be\r
+sure, for example, that a 386SX, an interleaved 386, and a cached 386 have\r
+markedly different performance characteristics. There are, alas, no hard and\r
+fast optimization rules that apply across all these environments.\r
+\r
+My own approach to 80x86 optimization has been to develop a set of general\r
+rules that serve reasonably well throughout the 80x86 line, especially the\r
+286 and 386, and to select a specific processor (in my case a cached 386, for\r
+which cycle times tend to be accurate) to serve as the tiebreaker when\r
+optimization details vary from one processor to another. (Naturally, it's\r
+only worth bothering with these optimizations in critical code.) The rules\r
+I've developed are:\r
+\r
+* Avoid accessing memory operands; use the registers to the max.\r
+\r
+* Don't branch.\r
+\r
+* Use string instructions, but don't go much out of your way to do so.\r
+\r
+* Keep memory accesses to a minimum by avoiding memory operands and keeping\r
+instructions short.\r
+\r
+* Align memory accesses.\r
+\r
+* Forget about many of those clever 8088 optimizations, using oddball\r
+instructions such as DAA and XLAT, that you spent years learning.\r
+\r
+Next I'll discuss each of these rules in turn in the context of\r
+8088-compatible real mode, which is still the focus of the 80x86 world.\r
+Later, I'll touch on protected mode.\r
+\r
+Let's start by looking at the last--and most surprising--rule.\r
+\r
+Kiss Those Tricks Goodbye\r
+\r
+To skilled assembly language programmers, the 8088 is perhaps the most\r
+wonderful processor ever created, largely because the instruction set is\r
+packed with odd instructions that are worthless to compilers but can work\r
+miracles in the hands of clever assembly programmers. Unfortunately, each\r
+new generation of the 80x86 has rendered those odd instructions and marvelous\r
+tricks less desirable. As the execution time for the commonly used\r
+instruction ADD BX, 4 has gone down from four cycles (8088) to three cycles\r
+(286) to two cycles (386) to one cycle (486), the time for the less\r
+frequently used instruction CBW has gone from two cycles (8088 and 286) up to\r
+three cycles (386 and 486)!\r
+\r
+Consider this ancient optimization for converting a binary digit to hex\r
+ASCII:\r
+\r
+ADD AL,90H DAA ADC AL,40H DAA\r
+\r
+Now consider the standard alternative:\r
+\r
+ADD AL,'0' CMP AL,'9' JBE HaveAscii ADD AL,'A'-('9'+1) HaveAscii:\r
+\r
+As Figure 1 indicates, the standard code should be slower on an 8088 or 286,\r
+but faster on a 386 or a 486--and real-world tests confirm those results, as\r
+shown in Figure 2. (All "actual performance" timings in this article were\r
+performed with the Zen timer from Zen of Assembly Language, see "References"\r
+for details. The systems used for the tests were: 8088, standard 4.77 MHz PC\r
+XT; 80286, standard one-wait-state, 8 MHz PC AT; 386SX, 16 MHz noncached;\r
+80386, 20 MHz externally cached with all instructions and data in external\r
+cache for all tests except Listings One and Two; 80486, 25 MHz internally\r
+cached, with all instructions and data in internal cache for all tests except\r
+Listings One and Two.)\r
+\r
+In other words, this nifty, time-tested optimization is an anti-optimization\r
+on the 386 and 486.\r
+\r
+Why is this? On the 386, DAA--a rarely used instruction--takes four cycles,\r
+and on the 486 it takes two cycles, in both cases twice as long as the more\r
+common instructions CMP and ADD; in contrast, on the 8088 all three\r
+instructions are equally fast at four cycles. Also, the instruction-fetching\r
+advantage that the 1-byte DAA provides on the 8088 means nothing on a cached\r
+386.\r
+\r
+Nor is this an isolated example. Most oddball instructions, from AAA to\r
+XCHG, have failed to keep pace with the core instructions--ADC, ADD, AND,\r
+CALL, CMP, DEC, INC, Jcc, JMP, LEA, MOV, OR, POP, PUSH, RET, SBB, SUB, TEST,\r
+and XOR--during the evolution from 8088 to 486. As we saw earlier, even LOOP\r
+lags behind on the 386 and 486. Check your favorite tricks for yourself;\r
+they might or might not hold up on the 386, but will most likely be\r
+liabilities on the 486. Sorry, but I just report the news, and the news is:\r
+Kiss most of those tricks goodbye as the 386 and 486 come to dominate the\r
+market. (This means that hand-optimization in assembly language yields less\r
+of a performance boost nowadays than it did when the 8088 was king; the\r
+improvement is certainly significant, but rarely in the 200-500 percent range\r
+anymore. Sic transit gloria mundi.) Most startling of all, string\r
+instructions lose much of their allure as we move away from the 8088, hitting\r
+bottom on the 486.\r
+\r
+The 486: All the Rules Change\r
+\r
+The 486 represents a fundamental break with 8088-style optimization.\r
+Virtually all the old rules fail on the 486, where, incredibly, a move to or\r
+from memory often takes just one cycle, but exchanging two registers takes\r
+three cycles. The nonbranching core instructions mentioned earlier take only\r
+one cycle on the 486 when operating on registers; MOV can, under most\r
+conditions, access memory in one cycle; and CALL and JMP take only three\r
+cycles, given a cache hit. However, noncore instructions take considerably\r
+longer. XLAT takes four cycles; even STC and CLC take two cycles each. The\r
+486's highly asymmetric execution times heavily favor core instructions and\r
+defeat most pre-486 optimizations.\r
+\r
+Core instructions do have a weakness on the 486. While 486 MOVs involving\r
+memory are remarkably fast, accessing memory for an operand to OR, ADD, or\r
+the like costs cycles. Even with the 8K internal cache, memory is not as\r
+fast as registers, except when MOV is used (and sometimes not even then), so\r
+registers are still preferred operands. (AND [BX],1 is fast, at only three\r
+cycles, but AND BX,1 takes only one cycle--three times as fast.)\r
+\r
+OUT should be avoided whenever possible on the 486, and likewise for IN. OUT\r
+takes anywhere from 10 to 31 cycles, depending on processor mode and\r
+privileges, more than an order of magnitude slower than MOV. The lousy\r
+performance of OUT -- true on the 386 as well -- has important implications\r
+for graphics applications.\r
+\r
+String instructions are so slow on the 486 that you should check cycle times\r
+before using any string instruction other than the always superior REP MOV's.\r
+For example, LODSB takes five cycles on the 486, but MOV AL,[SI]/INC SI takes\r
+only two cycles; likewise for STOSB and MOV [DI],AL/INC DI. Listing One\r
+(page 73) uses LODSB/STOSB to copy a string, converting lowercase to\r
+uppercase while copying; Listing Two (page 73) uses MOV/INC instead. Figure\r
+3 summarizes the performance of the two routines on a variety of processors;\r
+note the diminishing effectiveness of string instructions on the newer\r
+processors. Think long and hard before using string instructions other than\r
+REP MOVS on the 486.\r
+\r
+Optimization for the 486 is really a whole new ball game. When optimizing\r
+across the 80x86 family, the 486 will generally be the least of your worries\r
+because it is so much faster than the rest of the family; anything that runs\r
+adequately on any other processor will look terrific on the 486. Still, the\r
+future surely holds millions of 486s, so it wouldn't hurt to keep one eye on\r
+the 486 as you optimize.\r
+\r
+String Instructions: Fading Stars\r
+\r
+On the 8088, string instructions are so far superior to other instructions\r
+that it's worth going to great lengths to use them, but they lose much of\r
+that status on newer processors. One of the best things about string\r
+instructions on the 8088 is that they require little instruction fetching,\r
+because they're 1-byte instructions and because of the REP prefix; however,\r
+instruction fetching is less of a bottleneck on newer processors. String\r
+instructions also have superior cycle times on the 8088, but that advantage\r
+fades on the 286 and 386 as well.\r
+\r
+On the 286, string instructions (when they do exactly what you need) are\r
+still clearly better than the alternatives. On the 386, however, some string\r
+instructions are, even under ideal circumstances, the best choice only by a\r
+whisker, if at all. For example, since Day One, clearing a buffer has been\r
+done with REP STOS. That's certainly faster than the looping MOV/ADD\r
+approach shown in Listing Three (page 73), but on the 386 and 486 it's no\r
+faster than the unrolled loop MOV/ADD approach of Listing Four (page 73), as\r
+shown in Figure 4. (Actually, in my tests REP STOS was a fraction of a cycle\r
+slower on the 386, and fractionally faster on the 486.) REP STOS is much\r
+easier to code and more compact, so it's still the approach of choice for\r
+buffer clearing--but it's not necessarily fastest on a 486 or fast-memory\r
+386. This again demonstrates just how unreliable the old optimization rules\r
+are on the newer processors.\r
+\r
+The point is not that you shouldn't use string instructions on the 386. REP\r
+MOVs is the best way to move data, and the other string instructions are\r
+compact and usually faster, especially on uncached systems. However, on the\r
+386 it's no longer worth going to the trouble of juggling registers and\r
+reorganizing data structures to use string instructions. Furthermore, when\r
+you truly need maximum performance on the 386, check out nonstring\r
+instructions in unrolled loops. It goes against every lesson learned in a\r
+decade of 8088 programming, but avoiding string instructions sometimes pays\r
+on the 386.\r
+\r
+The Siren Song of Memory Accesses\r
+\r
+Finally, here's a rule that's constant from the 8088 to the 486: Use the\r
+registers. Avoid memory.\r
+\r
+Don't be fooled by the much faster memory access times of the 286 and 386.\r
+The effective address calculation time of the 8088 is mostly gone, so MOV\r
+AX,[BX] takes only five cycles on the 286, and ADD [SI],DX takes only seven\r
+on the 386. That's so much faster than the 17 and 29 cycles, respectively,\r
+that they take on the 8088 that you might start thinking that memory is\r
+pretty much interchangeable with registers.\r
+\r
+Think again. MOV AX,BX is still more than twice as fast as MOV AX,[BX] on\r
+the 286, and ADD SI,DX is more than three times as fast as ADD [SI],DX on the\r
+386. Memory operands can also reduce performance by slowing instruction\r
+fetching. Memory is fast on the 286 and 386. Registers are faster. Use\r
+them as heavily as possible.\r
+\r
+Don't Branch\r
+\r
+Here's another rule that stays the same across the 80x86 family: Don't\r
+branch. Branching suffers on the 8088 from lengthy cycle counts and emptying\r
+the prefetch queue. Emptying the prefetch queue is a lesser but nonetheless\r
+real problem in the post-8088 world, and the cycle counts of branches are\r
+still killers. As Figure 4 indicates, it pays to eliminate branches by\r
+unrolling loops or using repeated string instructions.\r
+\r
+Modern-Day Instruction Fetching\r
+\r
+Instruction fetching is the bugbear of 8088 performance; the 8088 simply\r
+can't fetch instruction bytes as quickly as it can execute them, thanks to\r
+its undersized bus. Minimizing all memory accesses, including instruction\r
+fetches, is paramount on the 8088.\r
+\r
+Instruction fetching is less of a problem nowadays. Figure 5 shows the\r
+maximum rates at which various processors can fetch instruction bytes;\r
+clearly, matters have improved considerably since the 8088, although\r
+instructions also execute in fewer cycles on the newer processors. Fetching\r
+problems can occur on any 80x86 processor, even the 486, but the only\r
+processors other than the 8088 that face major instruction fetching problems\r
+are the one-wait-state 286 and the 386SX, although uncached 386s may also\r
+outrun memory. However, the problems here are different from and less\r
+serious than with the 8088.\r
+\r
+Consider: An 8088 executes a register ADD in three cycles, but requires eight\r
+cycles to fetch that instruction, a fetch/execute ratio of 2.67. A\r
+one-wait-state 286 requires three cycles to fetch a register ADD and executes\r
+it in two cycles, a ratio of 1.5. A 386SX can fetch a register ADD in two\r
+cycles, matching the execution time nicely, and a cached 386 can fetch two\r
+register ADDs in the two cycles it takes to execute just one. For\r
+register-only code--the sort of code critical loops should contain--the 386\r
+generally runs flat out, and the 286 and 386SX usually (not always, but\r
+usually) outrun memory by only a little at worst. Greater fetching problems\r
+can arise when working with large instructions or instruction sequences that\r
+access memory nonstop, but those are uncommon in critical code. This is a\r
+welcome change from the 8088, where small, register-only instructions tend to\r
+suffer most from inadequate instruction fetching.\r
+\r
+Also, uncached 386 systems often use memory architectures that provide\r
+zero-wait-state performance when memory is accessed sequentially. In\r
+register-only code, instruction fetches are the only memory accesses, so\r
+fetching proceeds at full speed when the registers are used heavily.\r
+\r
+So, is instruction fetching a problem in the post-8088 world? Should\r
+instructions be kept short?\r
+\r
+Yes. Smaller instructions can help considerably on the one-wait-state 286\r
+and on the 386SX. Not as much as on the 8088, but it's still worth the\r
+trouble. Even a cached 386 can suffer from fetching problems, although\r
+that's fairly uncommon. For example, when several MOV WORD PTR [MemVar],0\r
+instructions are executed in a row, as might happen when initializing memory\r
+variables, performance tends to fall far below rated speed, as shown in\r
+Figure 6. The particular problem with MOV WORD PTR [MemVar],0 is that it\r
+executes in just two (386) or three (286) cycles, yet has both an addressing\r
+displacement field and a constant field. This eats up memory bandwidth by\r
+requiring more instruction fetching. It also accesses memory, eating up\r
+still more bandwidth. We'll see this again, and worse, when we discuss\r
+protected mode.\r
+\r
+Generally, though, post-8088 processors with fast memory systems and\r
+full-width buses run most instructions at pretty near their official cycle\r
+times; for these systems, optimization consists mostly of counting cycles.\r
+Slower memory or constricted buses (as in the 386SX) require that memory\r
+accesses (both instruction fetches and operand accesses) be minimized as\r
+well. Fortunately, the same sort of code--register only--meets both\r
+requirements.\r
+\r
+Use the registers. Avoid constants. Avoid displacements. Don't branch.\r
+That's the big picture. Don't sweat the details.\r
+\r
+Alignment: The Easy Optimization\r
+\r
+The 286, 386SX, and 386 take twice as long to access memory words at odd\r
+addresses as at even addresses. The 386 takes twice as long to access memory\r
+dwords at addresses that aren't multiples of four as those that are. You\r
+should use ALIGN 2 to word align all word-sized data, and ALIGN 4 to dword\r
+align all data that's accessed as a dword operand, as in:\r
+\r
+ALIGN 4 MemVar dd ? : MOV EAX,[MemVar]\r
+\r
+Alignment also applies to code; you may want to word or dword align the\r
+starts of procedures, labels that can only be reached by branching, and the\r
+tops of loops. (Code alignment matters only at branch targets, because only\r
+the first instruction fetch after a branch can suffer from nonalignment.)\r
+Dword alignment of code is optimal, and will help on the 386 even in real\r
+mode, but word alignment will produce nearly as much improvement as dword\r
+alignment without wasting nearly as many bytes.\r
+\r
+Alignment improves performance on many 80x86 systems without hindering it on\r
+any. Recommended.\r
+\r
+Protected Mode\r
+\r
+There are two sorts of protected mode, 16-bit and 32-bit. The primary\r
+optimization characteristic of 16-bit protected mode (OS/2 1.X, Rational DOS\r
+Extender) is that it takes an ungodly long time to load a segment register\r
+(for example, MOV ES,AX takes 17 cycles on a 286) so load segment registers\r
+as infrequently as possible in 16-bit protected mode.\r
+\r
+Optimizing for 32-bit protected mode (OS/2 2.0, SCO Unix, Phar Lap DOS\r
+Extender) is another matter entirely. Typically, no segment loads are needed\r
+because of the flat address space. However, 32-bit protected mode code can\r
+be bulky, and that can slow instruction fetching. Constants and addressing\r
+displacements can be as large as 4 bytes each, and an extra byte, the SIB\r
+byte, is required whenever two 32-bit registers are used to address an\r
+operand or scaled addressing is used. So, for example, MOV DWORD PTR\r
+[MemVar],0 is a 10-byte instruction in 32-bit protected mode. The\r
+instruction is supposed to execute in two cycles, but even a 386 needs four\r
+to six cycles to fetch it, plus another two cycles to access memory; a few\r
+such instructions in a row can empty the prefetch queue and slow performance\r
+considerably. The slowdown occurs more quickly and is more acute on a 386SX,\r
+which needs 14 cycles to perform the memory accesses for this nominally\r
+2-cycle instruction.\r
+\r
+Code can get even larger when 32-bit instructions are executed in 16-bit\r
+segments, adding prefix bytes. (Avoid prefix bytes if you can; they increase\r
+instruction size and can cost cycles.) Figure 7 shows actual versus nominal\r
+cycle times of multiple MOV DWORD PTR [EBX*4+MemVar],0 instructions running\r
+in a 16-bit segment. Although cache type (write-back, write-through) and\r
+main-memory write time also affect the performance of stores to memory, there\r
+is clearly a significant penalty for using several large (in this case,\r
+13-byte) instructions in a row.\r
+\r
+Fortunately, this is a worst case, easily avoided by keeping constants and\r
+displacements out of critical loops. For example, you should replace:\r
+\r
+ADDLOOP: MOV DWORD PTR BaseTable[EDX+EBX],0 ADD EBX,4 DEC ECX JNZ ADDLOOP\r
+\r
+with:\r
+\r
+LEA EBX,BaseTable[EDX+EBX] SUB EAX,EAX ADDLOOP: MOV [EBX],EAX ADD EBX,4\r
+DEC ECX JNZ ADDLOOP\r
+\r
+Better yet, use REP STOSD or unroll the loop!\r
+\r
+Happily, register-only instructions are no larger in 32-bit protected mode\r
+than otherwise and run at or near their rated speed in 32-bit protected mode\r
+on all processors. All in all, in protected mode it's more important than\r
+ever to avoid large constants and displacements and to use the registers as\r
+much as possible.\r
+\r
+Conclusion\r
+\r
+Optimization across the 80x86 family isn't as precise as 8088 optimization,\r
+and it's a lot less fun, with fewer nifty tricks and less spectacular\r
+speed-ups. Still, familiarity with the basix 80x86 optimization rules can\r
+give you a decided advantage over programmers still laboring under the\r
+delusion that the 286, 386, and 486 are merely faster 8088s.\r
+\r
+References\r
+\r
+Abrash, Michael. Zen of Assembly Language. Glenview, Ill.: Scott, Foresman,\r
+1990.\r
+\r
+Barrenechea, Mark. "Peak Performance: On to the 486." Programmer's Journal,\r
+(November-December 1990).\r
+\r
+Paterson, Tim. "Assembly Language Tricks of the Trade." Dr. Dobb's Journal\r
+(March 1990).\r
+\r
+Turbo Assembler Quick Reference Guide. Borland International, 1990.\r
+\r
+i486 Microprocessor Programmer's Reference Manual. Intel Corporation, 1989.\r
+\r
+80386 Programmer's Reference Manual. Intel Corporation, 1986.\r
+\r
+Microsystems Components Handbook: Microprocessors Volume I. Intel\r
+Corporation, 1985.\r
--- /dev/null
+PEL_READ_REG EQU 03C7h ;Color register, read address\r
+PEL_WRITE_REG EQU 03C8h ;Color register, write address\r
+PEL_DATA_REG EQU 03C9h ;Color register, data port\r
+SC_INDEX equ 03C4h ;Sequence Controller Index\r
+CRTC_INDEX equ 03D4h ;CRT Controller Index\r
+MISC_OUTPUT equ 03C2h ;Miscellaneous Output register\r
+SCREEN_SEG equ 0a000h ;segment of display memory in mode X\r
+INPUT_STATUS_1 equ 03DAh ;Input Status 1 register\r
+ATC_INDEX equ 03C0h ;Attribute Controller\r
+START_ADDRESS_HIGH equ 0Ch ;bitmap start address high byte\r
+START_ADDRESS_LOW equ 0Dh ;bitmap start address low byte\r
+GC_INDEX EQU 03CEh\r
+BIT_MASK EQU 08h\r
+MAP_MASK EQU 02h\r
+\r
+ALL_COPY_BITS EQU 00000h+BIT_MASK\r
+ALL_DRAW_BITS EQU 0FF00h+BIT_MASK\r
+\r
+SQUARE_WIDTH EQU 16\r
+SQUARE_HEIGHT EQU 16\r
+SCREEN_WIDTH EQU 320\r
+SCREEN_HEIGHT EQU 240\r
+VIRTUAL_WIDTH EQU 352\r
+VIRTUAL_HEIGHT EQU 240\r
+\r
+PAGE_0 EQU 0\r
+PAGE_1 EQU 05540h ;05470h ;5540h\r
+PAGE_2 EQU 0AA80h ;0A8E0h ;AA80h\r
+\r
+SCROLL_SPEED EQU 1 ; Don't let it go above 8!\r
+MAGIC_NUM EQU 100\r
+\r
+CPU8086 EQU 0\r
+CPU80286 EQU 1\r
+CPU80386 EQU 2\r
+CPU80486 EQU 3\r
+\r
+;======================================================================\r
+; Key Assignments\r
+;======================================================================\r
+kESC EQU 2\r
+kONE EQU 4\r
+kTWO EQU 6\r
+kTHREE EQU 8\r
+kFOUR EQU 10\r
+kFIVE EQU 12\r
+kSIX EQU 14\r
+kSEVEN EQU 16\r
+kEIGHT EQU 18\r
+kNINE EQU 20\r
+kZERO EQU 22\r
+kMINUS EQU 24\r
+kEQUAL EQU 26\r
+kBACKSPACE EQU 28\r
+kTAB EQU 30\r
+kQ EQU 32\r
+kW EQU 34\r
+kE EQU 36\r
+kR EQU 38\r
+kT EQU 40\r
+kY EQU 42\r
+kU EQU 44\r
+kI EQU 46\r
+kO EQU 48\r
+kP EQU 50\r
+kL_BRACE EQU 52\r
+kR_BRACE EQU 54\r
+kENTER EQU 56\r
+kCTRL EQU 58\r
+kA EQU 60\r
+kS EQU 62\r
+kD EQU 64\r
+kF EQU 66\r
+kG EQU 68\r
+kH EQU 70\r
+kJ EQU 72\r
+kK EQU 74\r
+kL EQU 76\r
+kSEMICOLON EQU 78\r
+kQUOTE EQU 80\r
+kBACKQUOTE EQU 82\r
+kL_SHIFT EQU 84\r
+kBACKSLASH EQU 86\r
+kZ EQU 88\r
+kX EQU 90\r
+kC EQU 92\r
+kV EQU 94\r
+kB EQU 96\r
+kN EQU 98\r
+kM EQU 100\r
+kCOMMA EQU 102\r
+kPERIOD EQU 104\r
+kSLASH EQU 106\r
+kR_SHIFT EQU 108\r
+kGREY_STAR EQU 110\r
+kALT EQU 112\r
+kSPACE EQU 114\r
+kCAPSLOCK EQU 116\r
+kF1 EQU 118\r
+kF2 EQU 120\r
+kF3 EQU 122\r
+kF4 EQU 124\r
+kF5 EQU 126\r
+kF6 EQU 128\r
+kF7 EQU 130\r
+kF8 EQU 132\r
+kF9 EQU 134\r
+kF10 EQU 136\r
+kNUMLOCK EQU 138\r
+kSCRLLOCK EQU 140\r
+kHOME EQU 142\r
+kUP EQU 144\r
+kPAGE_UP EQU 146\r
+kGREY_MINUS EQU 148\r
+kLEFT EQU 150\r
+kPAD_FIVE EQU 152\r
+kRIGHT EQU 154\r
+kGREY_PLUS EQU 156\r
+kEND EQU 158\r
+kDOWN EQU 160\r
+kPAGE_DOWN EQU 162\r
+kINSERT EQU 164\r
+kDELETE EQU 166\r
+\r
+kF11 EQU 174\r
+kF12 EQU 176\r
+\1a
\ No newline at end of file
--- /dev/null
+#include <stdio.h>\r
+#include <string.h>\r
+#include <memory.h>\r
+#include <stdlib.h>\r
+\r
+#define WIDTH 255\r
+\r
+#define MAPNAME "Diagonal"\r
+#define FILENAME "%s.MAP"\r
+char fn[100] = FILENAME;\r
+typedef unsigned char BYTE;\r
+typedef unsigned short int WORD;\r
+typedef BYTE ROW[WIDTH];\r
+\r
+ROW r;\r
+\r
+#define MAGIC_NUM 100\r
+#define SQUARE_WIDTH 16\r
+#define SQUARE_HEIGHT 16\r
+#define SCREEN_WIDTH 320\r
+#define SCREEN_HEIGHT 200\r
+#define VIRTUAL_WIDTH 352\r
+#define VIRTUAL_HEIGHT 240\r
+typedef struct MAPHEADER {\r
+ BYTE name[12]; /* 12 Includes [n]=0 and [n+1]=26 */\r
+ WORD width; /* 2 */\r
+ WORD height; /* 2 */\r
+ WORD extent; /* 2 */\r
+ WORD off_x1; /* 2 */\r
+ WORD off_y1; /* 2 */\r
+ WORD off_x2; /* 2 */\r
+ WORD off_y2; /* 2 */\r
+ WORD x_wrap; /* 2 */\r
+ WORD y_wrap; /* 2 */\r
+ WORD magic; /* 2 */\r
+ } MAPHEADER, far *LPMAPHEADER;\r
+MAPHEADER mh;\r
+\r
+void main(int argc, char *argv[])\r
+ {\r
+ FILE *fp;\r
+ int i,j;\r
+ BYTE b;\r
+ int width = WIDTH;\r
+ int height = WIDTH;\r
+\r
+ if (argc > 1) {\r
+ width = atoi(argv[1]);\r
+ if (width > WIDTH)\r
+ width = WIDTH;\r
+ printf("Width = %d\n",width);\r
+ height=width;\r
+ if (argc > 2) {\r
+ height = atoi(argv[2]);\r
+ if (height > WIDTH)\r
+ height = WIDTH;\r
+ printf("Height = %d\n",height);\r
+ }\r
+ }\r
+\r
+ sprintf(fn,FILENAME,MAPNAME);\r
+ fp = fopen(fn,"wb");\r
+ if (!fp) {\r
+ printf("Couldn't open %s for write.\n",fn);\r
+ exit(1);\r
+ }\r
+\r
+ memset(&mh, 0xFF, sizeof(MAPHEADER)); /* Will reveal missing initializing */\r
+ strcpy((char *)mh.name,MAPNAME);\r
+ mh.name[8]=0;\r
+ mh.name[9]=26; /* Ctrl-Z */\r
+ mh.width = (WORD)width;\r
+ mh.height = (WORD)height;\r
+ mh.extent = (WORD)((WORD)width * (WORD)height);\r
+ mh.off_x1 = (WORD)0;\r
+ mh.off_y1 = (WORD)0;\r
+ mh.off_x2 = (WORD)(((VIRTUAL_WIDTH / SQUARE_WIDTH) - 1) % width);\r
+ mh.off_y2 = (WORD)((((VIRTUAL_HEIGHT / SQUARE_HEIGHT) - 1) % height) * width);\r
+ mh.x_wrap = (WORD)width;\r
+ mh.y_wrap = (WORD)height;\r
+ mh.magic = MAGIC_NUM;\r
+ fwrite(&mh, 1, sizeof(MAPHEADER), fp);\r
+\r
+ for (i = 0; i<width; i++) {\r
+ b = (BYTE)(i%width);\r
+ for (j = 0; j<width; j++) {\r
+ r[j] = b;\r
+ b = (BYTE)(((int)b+1) % width);\r
+ }\r
+ r[0]=1;\r
+ if (i == 0) r[0]=0;\r
+ fwrite(r, width,1, fp);\r
+ printf("Map row %d\r",i);\r
+ }\r
+ fclose(fp);\r
+ printf("All done! \n");\r
+ exit(0);\r
+ }\r
+\1a
\ No newline at end of file
--- /dev/null
+genmap.obj; \r
--- /dev/null
+#include <stdio.h>\r
+#include <string.h>\r
+#include <memory.h>\r
+#include <stdlib.h>\r
+\r
+#define COLORS 256\r
+#define PALNAME "Diagonal"\r
+#define FILENAME "%s.PAL"\r
+char fn[100] = FILENAME;\r
+typedef unsigned char BYTE;\r
+typedef unsigned short int WORD;\r
+typedef struct COLOR {\r
+ BYTE r,g,b;\r
+ } COLOR, far *LPCOLOR;\r
+\r
+void main() /* int argc, char *argv[]) */\r
+ {\r
+ FILE *fp;\r
+ int i;\r
+ int colors = COLORS;\r
+ COLOR c;\r
+ int r,dr,g,dg,b,db;\r
+\r
+ sprintf(fn,FILENAME,PALNAME);\r
+ fp = fopen(fn,"wb");\r
+ if (!fp) {\r
+ printf("Couldn't open %s for write.\n",fn);\r
+ exit(1);\r
+ }\r
+\r
+ r=0; dr=2;\r
+ g=0; dg=3;\r
+ b=0; db=5;\r
+ for (i = 0; i < colors; i++) {\r
+ c.r = (BYTE)r; r+=dr;\r
+ if (r > 63) { r = 63; dr = -dr; }\r
+ else if (r < 0) { r = 0; dr = -dr; }\r
+ c.g = (BYTE)g; g+=dg;\r
+ if (g > 63) { g = 63; dg = -dg; }\r
+ else if (g < 0) { g = 0; dg = -dg; }\r
+ c.b = (BYTE)b; b+=db;\r
+ if (b > 63) { b = 63; db = -db; }\r
+ else if (b < 0) { b = 0; db = -db; }\r
+ fwrite(&c, sizeof(c),1, fp);\r
+ printf("Palette %d\r",i);\r
+ }\r
+ fclose(fp);\r
+ printf("All done! \n");\r
+ exit(0);\r
+ }\r
+\1a
\ No newline at end of file
--- /dev/null
+genpal.obj; \r
--- /dev/null
+#include <stdio.h>\r
+#include <string.h>\r
+#include <memory.h>\r
+#include <stdlib.h>\r
+\r
+#define WIDTH 256\r
+\r
+#define FILENAME "DIAGONAL.TIL"\r
+char fn[100] = FILENAME;\r
+typedef unsigned char BYTE;\r
+typedef BYTE ROW[16];\r
+typedef ROW BITMAP[16];\r
+\r
+BITMAP b;\r
+BITMAP c;\r
+\r
+BITMAP pattern={{1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0},\r
+ {1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0},\r
+ {1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0},\r
+ {1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0},\r
+ {1,0,0,1,0,0,0,0,0,0,0,2,0,0,0,0},\r
+ {0,0,1,0,0,1,1,0,1,1,0,0,2,0,0,0},\r
+ {0,0,1,0,0,1,2,0,1,2,0,0,2,0,0,0},\r
+ {0,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0},\r
+ {0,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0},\r
+ {0,1,0,0,1,2,0,0,0,1,2,0,0,2,0,0},\r
+ {0,0,1,0,1,2,1,1,1,1,2,0,2,0,0,0},\r
+ {0,0,1,0,0,2,2,2,2,2,0,0,2,0,0,2},\r
+ {0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,2},\r
+ {0,0,0,0,2,2,0,0,0,2,2,0,0,0,0,2},\r
+ {0,0,0,0,0,0,2,2,2,0,0,0,0,0,2,2},\r
+ {0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2}};\r
+\r
+void copy_pattern(BITMAP b,BITMAP patt, BYTE l, BYTE m, BYTE h)\r
+ {\r
+ int x,y;\r
+\r
+ for (y=0; y<16; y++) {\r
+ for (x=0; x<16; x++) {\r
+ switch (patt[y][x]) {\r
+ case 0:\r
+ b[y][x] = m;\r
+ break;\r
+ case 1:\r
+ b[y][x] = l;\r
+ break;\r
+ case 2:\r
+ b[y][x] = h;\r
+ break;\r
+ }\r
+ }\r
+ }\r
+ }\r
+\r
+/* Transforms linear to planar */\r
+void transform(BITMAP b,BITMAP c)\r
+ {\r
+ int x,y,p;\r
+ BYTE *pb = (BYTE *)c;\r
+\r
+ for (p=0; p<4; p++) {\r
+ for (y=0; y<16; y++) {\r
+ for (x=0; x<16; x+=4) {\r
+ *(pb++) = b[y][x+p];\r
+ }\r
+ }\r
+ }\r
+ }\r
+\r
+void main(int argc,char *argv[])\r
+ {\r
+ FILE *fp;\r
+ int i;\r
+ int width = WIDTH;\r
+\r
+ fp = fopen(fn,"wb");\r
+ if (!fp) {\r
+ printf("Couldn't open %s for write.\n",fn);\r
+ exit(1);\r
+ }\r
+ if (argc > 1) {\r
+ width = atoi(argv[1]);\r
+ if (width > WIDTH)\r
+ width = WIDTH;\r
+ printf("Width = %d\n",width);\r
+ }\r
+\r
+ for (i = 0; i< width; i++) {\r
+ BYTE less,more;\r
+\r
+ less = (BYTE)((i + width - 1) % width);\r
+ more = (BYTE)((i + 1) % width);\r
+ copy_pattern(b, pattern, less, (BYTE)i, more);\r
+ transform(b,c);\r
+ fwrite(c, 16,16, fp);\r
+ printf("Square %d\r",i);\r
+ }\r
+ fclose(fp);\r
+ printf("All done! \n");\r
+ exit(0);\r
+ }\r
+\1a
\ No newline at end of file
--- /dev/null
+gensq.obj; \r
--- /dev/null
+;; Error messages\r
+ERR_OK EQU 0\r
+msgErr0 db 'Later!',13,10,'$'\r
+ERR_MEM EQU 1\r
+msgErr1 db 'Error 001: Out of memory?',13,10,'$'\r
+ERR_CPU EQU 2\r
+msgErr2 db 'Error 002: CPU must be at least an 80386.',13,10,'$'\r
+ERR_FILE EQU 3\r
+msgErr3 db 'Error 003: File error.',13,10,'$'\r
+ERR_FILENOTFOUND EQU 4\r
+msgErr4 db 'Error 004: File not found.',13,10,'$'\r
+msgtblError dw offset msgErr0, offset msgErr1, offset msgErr2,\r
+ offset msgErr3, offset msgErr4\r
+nError db 0\r
+\r
+;; CPU name strings\r
+CPUName86 DB "8088/8086$"\r
+CPUName286 DB "80286DX/SX$"\r
+CPUName386 DB "80386DX/SX$"\r
+CPUName486 DB "80486DX/SX or better$"\r
+CPUNameTable DW CPUName86,CPUName286,CPUName386,CPUName486\r
+\r
+EVEN\r
+msgCPUTypeIs DB "Your CPU type: $"\r
+EVEN\r
+msgCPUTypeIsEnd DB 13,10,'$'\r
+nCPU DB 0\r
+\r
+EVEN\r
+msgPages DB 'Pages displayed: '\r
+strNumPages DB 6 dup (?),13,10,'$'\r
+\r
+EVEN\r
+bufText DW 80*50 DUP (?) ; Needs this much to hold\r
+ ; a 50-line screen...\r
+wCPos DW 0\r
+nDisplay DB 0\r
+\r
+EVEN\r
+fnMap1 db 'DIAGONAL.MAP',0\r
+fnTiles1 db 'DIAGONAL.TIL',0\r
+fnPalette db 'DIAGONAL.PAL',0 ; only one allowed, for now\r
+fnMap2 db 'SCROLL.MAP',0\r
+fnTiles2 db 'SCROLL.TIL',0\r
+\r
+fntblMap dw offset fnMap1,offset fnMap2\r
+fntblTiles dw offset fnTiles1,offset fnTiles2\r
+nMap dw 0\r
+\r
+;; CPUType routine snatched from Ray Duncan's _Power Programming MASM_\r
+;; chapter 14. Reformatted to my style, but I left the code alone\r
+;; except that it used to push a bunch of stuff, but doesn't any more\r
+;; because I don't care what gets destroyed.\r
+CPUType PROC near\r
+ pushf ; now try to clear bits 12-15\r
+ pop ax ; of CPU flags\r
+ and ax,0fffh\r
+ push ax ; set modified CPU flags\r
+ popf\r
+ pushf\r
+ pop ax ; get flags again\r
+ and ax,0f000h ; if bits 12-15 are still\r
+ cmp ax,0f000h ; set, this is 8086/88\r
+ jne cpu1 ; jump, not 8086/88\r
+ mov nCPU,CPU8086 ; set nCPU = 86/88 CPU type\r
+ jmp cpux ; and exit\r
+\r
+cpu1: or ax,0f000h ; must be 286 or later,\r
+ push ax ; now try to set bits 12-15\r
+ popf ; of CPU flags\r
+ pushf\r
+ pop ax ; if bits 12-15 can't be\r
+ and ax,0f000h ; set, this is a 286\r
+ jnz cpu2 ; jump, not 80286\r
+ mov nCPU,CPU80286 ; set nCPU = 286 CPU type\r
+ jmp cpux ; and exit\r
+\r
+cpu2: mov bx,sp ; 386 or later, save SP\r
+ and sp,not 3 ; avoid stack alignment fault\r
+ pushfd ; get value of EFLAGS\r
+ pop eax\r
+ mov ecx,eax ; save copy of EFLAGS\r
+ xor eax,40000h ; flip AC bit in EFLAGS\r
+ push eax ; try and force EFLAGS\r
+ popfd\r
+ pushfd ; get back EFLAGS value\r
+ pop eax\r
+ mov sp,bx ; restore old stack pointer\r
+ xor eax,ecx ; can AC bit be changed?\r
+ jnz cpu3 ; no, jump, not a 386\r
+ mov nCPU,CPU80386 ; set nCPU = 386 CPU type\r
+ jmp cpux ; and exit\r
+\r
+cpu3: mov nCPU,CPU80486 ; set nCPU = 486 CPU type\r
+\r
+cpux: mov bl,nCPU\r
+ xor bh,bh\r
+ shl bx,1\r
+ DOSPRINT <offset msgCPUTypeIs>\r
+ DOSPRINT CPUNameTable[bx]\r
+ DOSPRINT <offset msgCPUTypeIsEnd>\r
+ ret ; return with nCPU = CPU type\r
+CPUType ENDP\r
+\r
+;; Initialize: So far, all it does is make sure you have a 386 +\r
+;; (because that's what I assembled the code for).\r
+Initialize PROC near\r
+ ; Set DS = CS in this program, since data is local\r
+ mov ax,cs\r
+ mov segCode,ax ; Store the Code Segment\r
+ mov bx,ds\r
+ mov segPSP,bx ; Store the PSP Segment\r
+ mov ds,ax ; Set DS = CS\r
+\r
+ ; Resize code to 64K\r
+ CODE_SIZE EQU 64 ; <- this is arbitrary.\r
+ ; ES already -> allocated segment\r
+ mov ah,4ah\r
+ mov bx,64*CODE_SIZE\r
+ int 21h\r
+ mov nError,ERR_MEM\r
+ jc TerminateError\r
+\r
+;; I've chosen not to implement sprites yet so that I can get this out\r
+;; the door...\r
+;; ; 320x200 buffer for sprite drawing. To draw sprites, first draw them\r
+;; ; into this buffer, adding rectangles to the current rectangle list.\r
+;; ; Then, use BUFFER_COPY to put out the buffers with the current\r
+;; ; rectangle list to the screen. BUFFER_COPY will ensure minimal VGA\r
+;; ; writing.\r
+;; ; Create a buffer segment\r
+;; mov bx,(320 * 200) / 16\r
+;; mov ah,48h\r
+;; int 21h\r
+;; mov nError,ERR_MEM\r
+;; jc TerminateError\r
+;; mov segBuffer,ax\r
+\r
+ call CPUType\r
+ mov nError,ERR_CPU\r
+ cmp nCPU,2\r
+ jl TerminateError\r
+\r
+ mov ds,segCode\r
+ mov dx,offset fnPalette\r
+ call LoadPaletteFile\r
+ jc TerminateError\r
+\r
+ call LoadIndex\r
+ jc TerminateError\r
+\r
+ KEYB_START\r
+\r
+ call Beginning ; Can display an entry screen here\r
+\r
+ ; This is linked in from Michael Abrash's zen timer code.\r
+ ; (But I wrote the Click myself)\r
+ call Click\r
+ call ZTimerOn\r
+\r
+ call MainLoop\r
+\r
+ call ZTimerOff\r
+ call Click\r
+\r
+ call Ending ; Can display an exit screen here\r
+\r
+ KEYB_END\r
+\r
+Terminate: mov nError,ERR_OK\r
+TerminateError:\r
+ mov ax,cs ;DOS functions require that DS point\r
+ mov ds,ax ; to text to be displayed on the screen\r
+ mov bh,0\r
+ mov bl,nError\r
+ shl bx,1\r
+ DOSPRINT msgtblError[bx]\r
+\r
+ mov ax,pages\r
+ mov ds,segCode\r
+ mov si,offset strNumPages\r
+ call Int2Ascii\r
+ DOSPRINT <offset msgPages>\r
+\r
+ call ZTimerReport\r
+\r
+ mov al,nError\r
+ mov ah,4ch ; DOS Terminate\r
+ int 21h\r
+ ; Don't need to RET! We're outta here\r
+Initialize ENDP\r
+\r
+;; Clicks the internal speaker. I use this to indicate that page timing\r
+;; has started.\r
+Click PROC\r
+ in al,61h\r
+ mov ah,al\r
+ or al,3\r
+ out 61h,al\r
+\r
+ mov cx,5000 ; (this is an arbitrary delay!)\r
+spkr_on: loop spkr_on\r
+ mov al,ah\r
+ out 61h,al\r
+ ret\r
+Click ENDP\r
+\r
+;; Copied from an old 8088 "Learn Assembly" book and changed a bit\r
+Int2Ascii PROC\r
+ mov cx,6\r
+ mov byte ptr cs:[si],' '\r
+ mov byte ptr cs:[si+1],'0'\r
+ mov byte ptr cs:[si+2],'0'\r
+ mov byte ptr cs:[si+3],'0'\r
+ mov byte ptr cs:[si+4],'0'\r
+ mov byte ptr cs:[si+5],'0'\r
+ add si,6\r
+ mov cx,10\r
+ or ax,ax\r
+ jns clear_divide\r
+ neg ax\r
+ mov byte ptr cs:[si-6],'-'\r
+clear_divide: mov dx,0\r
+ div cx\r
+ add dx,'0'\r
+ dec si\r
+ mov cs:[si],dl\r
+ or ax,ax\r
+ jnz clear_divide\r
+ ret\r
+Int2Ascii ENDP\r
+\r
+;; Given a filename at DS:DX, reads the file into memory and returns\r
+;; a pointer to it as DX:0000.\r
+;; Note that this routine obviously will only work correctly for\r
+;; a file < 640k in size, but you can bring in files bigger than 64k.\r
+;; This code comes from Future Crew's STMIK sampler "Mental Surgery"\r
+;; and I commented it up to make it fit in with my stuff a little better.\r
+;; Thank you, FC, for releasing that code! Several of the routines\r
+;; in this program were inspired or helped along by having that source...\r
+;; Most recently, added in error codes.\r
+EVEN\r
+LoadFile PROC NEAR\r
+ ;set: DX=offset to filename\r
+ ;return: DX=segment of file\r
+\r
+ ; Open the datafile at DS:DX.\r
+ mov ax,3D00h ; 3D,00 -> Open file, read only\r
+ ; DS:DX already points at filename\r
+ int 21h ; returns AX=file handle\r
+ mov cl,ERR_FILENOTFOUND\r
+ jc ferror\r
+ mov bx,ax ; Store file handle in BX\r
+ mov si,bx ; and also in a variable\r
+\r
+ ; Get the length of the file so we know how much to allocate\r
+ mov ax,4202h ; 42,02 -> Seek, signed from end\r
+ mov cx,0 ; CX:DX is a long file offset,\r
+ ; BX is already set as file handle\r
+ mov dx,0 ; zero in this case = end of file\r
+ int 21h ; (returns long offset in DX:AX)\r
+ mov cl,ERR_FILE\r
+ jc ferror\r
+\r
+;;; shr dx,1 ; This is original FC code,\r
+;;; rcr ax,1 ; which I removed because the\r
+;;; shr dx,1 ; 386 has a nice instruction\r
+;;; rcr ax,1 ; to do this all!\r
+;;; shr dx,1 ; But 286 users will want to\r
+;;; rcr ax,1 ; return to this code, instead\r
+;;; shr dx,1 ; of SHRD dx,ax,4\r
+;;; rcr ax,1 ;\r
+\r
+ ; Now turn that long DX:AX into a number of paragraphs to allocate\r
+ ; for when we read the file.\r
+ shrd ax,dx,4 ; Divides long DX:AX by 4,\r
+ mov bx,ax ; and stores this in BX\r
+ inc bx ; HHMMMM? One more needed for small #'s\r
+ mov ah,48h ; 48 -> Allocate memory\r
+ ; BX already = # of paragraphs\r
+ int 21h\r
+ mov cl,ERR_MEM\r
+ jc ferror\r
+ mov di,ax ; store this in a variable\r
+\r
+ ; Seek the file back to the beginning in order to read it into\r
+ ; the memory we just allocated.\r
+ mov ax,4200h ; 42,00 -> Seek, absolute offset\r
+ mov bx,si ; BX is the file handle.\r
+ mov cx,0 ; CX:DX is a long offset\r
+ mov dx,0\r
+ int 21h\r
+ jc ferror\r
+\r
+ ; Now read the file into memory\r
+ mov ds,di ; DS points at alloc'd memory\r
+ReadBlock: mov ah,3fh ; 3F -> Read file\r
+ mov cx,32768 ; read 32768 bytes at a time\r
+ mov dx,0 ; DS:DX points at beginning of\r
+ int 21h ; this block of memory.\r
+ mov cl,ERR_FILE\r
+ jc ferror\r
+ mov dx,ds ; Offset DS by (32768/16), which\r
+ add dx,800h ; is the number of paragraphs in\r
+ mov ds,dx ; each block of 32768 bytes.\r
+ cmp ax,32768 ; Did we actually read 32768 bytes?\r
+ je ReadBlock ; If so, there's more to read...\r
+ ; Otherwise, we've read all the\r
+ ; data in the file.\r
+\r
+ ; So now, close the file handle.\r
+ mov ah,3Eh ; 3E -> Close file\r
+ ; BX still is the file handle\r
+ int 21h\r
+\r
+ ; Everything went ok. Return the segment in DX.\r
+ mov dx,di\r
+ mov nError,ERR_OK\r
+ ret\r
+ferror: mov nError,cl\r
+ ret\r
+LoadFile ENDP\r
+\r
+;; Eventually, this should load in an index of all data files to\r
+;; allow for filenames to be specified outside of the program. The\r
+;; goal is to make the program have no hardcoded filenames...\r
+;; Of course, the structure of this index and its entries will be\r
+;; hardcoded, as will the structures of all of the files it includes.\r
+LoadIndex PROC near\r
+ ret\r
+LoadIndex ENDP\r
+\r
+;; Save the current video mode and cursor position with standard\r
+;; BIOS calls\r
+SaveVideo PROC near\r
+ mov ah,0Fh\r
+ int 10h ; Get current display Mode\r
+ mov nDisplay,al\r
+ mov ah,03h\r
+ mov bh,0\r
+ int 10h\r
+ mov wCPos,dx\r
+\r
+ mov ds,segText\r
+ mov si,0\r
+ mov es,segCode\r
+ mov di,offset bufText\r
+ mov cx,80*50\r
+ rep movsw\r
+ ret\r
+SaveVideo ENDP\r
+\r
+;; Restore the current video mode and cursor position with standard\r
+;; BIOS calls\r
+RestoreVideo PROC near\r
+ mov ah,00h\r
+ mov al,nDisplay\r
+ int 10h ; Get current display Mode\r
+ mov ah,02h\r
+ mov bh,0\r
+ mov dx,wCPos\r
+ int 10h\r
+\r
+ PAL_UPDATE ; When flipping into text mode, re-do the\r
+ ; palette because the BIOS changes it.\r
+\r
+ mov es,segText\r
+ mov di,0\r
+ mov ds,segCode\r
+ mov si,offset bufText\r
+ mov cx,80*50\r
+ rep movsw\r
+ ret\r
+RestoreVideo ENDP\r
+\1a
\ No newline at end of file
--- /dev/null
+;; ====================================================================\r
+;; Macros\r
+;; ====================================================================\r
+;; Jump if key pressed\r
+JKEYP MACRO key,label\r
+ cmp byte ptr cs:_keyFlags[key+1],1\r
+ je label\r
+ ENDM\r
+;; Jump if key not pressed\r
+JKEYNP MACRO key,label\r
+ cmp byte ptr cs:_keyFlags[key+1],1\r
+ jne label\r
+ ENDM\r
+\r
+;; Note that JNKEY and JKEY both modify _flKeyChanged, so you cannot\r
+;; use one after the other! In other words,\r
+;; JKEYNP no_key\r
+;; JKEYP yes_key ;<-- this will fail\r
+;; will not work like you'd think it would. The second call (JKEYP)\r
+;; will not know that a key has been pressed!\r
+;; Jump if no key pressed:\r
+JNKEY MACRO label\r
+ cmp cs:_flKeyChanged,0\r
+ je label\r
+ mov cs:_flKeyChanged,0 ; <--- important!\r
+ ENDM\r
+;; Jump if key pressed:\r
+JKEY MACRO label\r
+ cmp cs:_flKeyChanged,0\r
+ mov cs:_flKeyChanged,0\r
+ jne label\r
+ ENDM\r
+\r
+;; Start keyboard interrupts\r
+KEYB_START MACRO\r
+ call SwapInt9\r
+ mov cs:_flKeyChanged,0\r
+ ENDM\r
+\r
+;; Clear keyboard interrupts\r
+KEYB_END MACRO\r
+ call SwapInt9\r
+ ENDM\r
+\r
+;; Credit for these routines: Steve Dollins, Brown Computer Group.\r
+;; I didn't write any of the code below -- just heisted it from some\r
+;; stuff that he wrote and released! Very useful keyboard routines.\r
+;; Any comments prefixed SDE were added by me.\r
+_keyFlags dw 256 dup (0) ; SDE: since they only use 2 bits\r
+ ; per word, this is a tradeoff,\r
+ ; space for time\r
+\r
+oldint9_offset dw offset newint9\r
+oldint9_segment dw seg newint9\r
+\r
+_flKeyChanged dw 0\r
+\r
+;-----------------------------------------------------------------------\r
+; void SwapInt9( void )\r
+;\r
+; SwapInt9() exchanges the vector in oldint9_segment:oldint9_offset\r
+; with the vector in the interrupt table for INT 9h.\r
+;-----------------------------------------------------------------------\r
+\r
+SwapInt9 PROC far\r
+ mov ax,cs\r
+ mov ds,ax\r
+\r
+ mov ax,03509h ; Get interrupt 09h\r
+ int 21h ; return in ES:BX\r
+\r
+ mov ax,oldint9_segment\r
+ mov dx,oldint9_offset\r
+ push ds\r
+ mov ds,ax\r
+ mov ax,02509h ; Set new interrupt\r
+ int 21h ; to address in DS:DX\r
+ pop ds\r
+\r
+ mov oldint9_segment,es ; Save the old interrupt\r
+ mov oldint9_offset,bx\r
+ ret\r
+SwapInt9 ENDP\r
+\r
+\r
+;-----------------------------------------------------------------------\r
+; newint9 is the new keyboard interrupt (INT 9h).\r
+;\r
+; Reads the scan code from the keyboard and modifies the key\r
+; flags table. The high byte is set to the position of the key,\r
+; pressed=1, release=0. The low byte is set to 1 when the key\r
+; is pressed and left unmodified when the key is released.\r
+;-----------------------------------------------------------------------\r
+newint9 PROC far\r
+ push ax\r
+ push bx\r
+ push ds\r
+\r
+ mov ax,cs\r
+ mov ds,ax\r
+\r
+ JKEYNP kCTRL,not_ctrlaltdel ; SDE code\r
+ JKEYNP kALT,not_ctrlaltdel ; To still allow ctrl-\r
+ JKEYNP kDELETE,not_ctrlaltdel ; alt-delete. Nothing\r
+ jmp ctrlaltdel ; worse than a total lockup!\r
+not_ctrlaltdel:\r
+\r
+ in ax,60h ; get scan code in AL, control byte in AH\r
+ mov bx,ax ; save a copy in BX\r
+ xchg ah,al ; swap to get control byte in AL\r
+ or al,80h ; clear keyboard\r
+ out 61h,al ; of interrupt\r
+ and al,7Fh\r
+ out 61h,al\r
+ mov al,20h ; send generic EOI to\r
+ out 20h,al ; PIC\r
+\r
+ and bx,0007fh ; strip all but the scan code\r
+ shl bx,1 ; multiply by two to get our offset\r
+\r
+ ; if the key was released, the high bit is set in the scan code\r
+ bt ax,15 ; move this high bit into the carry flag\r
+ setnc byte ptr [_keyFlags+bx+1] ; set "Is being pressed" flag\r
+ jc short int09done ; if the key was released, we're done\r
+ mov byte ptr [_keyFlags+bx],1 ; set "Has been pressed" flag\r
+ mov _flKeyChanged,1 ; state of keyboard has changed\r
+int09done:\r
+ mov _flKeyChanged,1 ; state of keyboard has changed\r
+ pop ds\r
+ pop bx\r
+ pop ax\r
+ iret\r
+ctrlaltdel: int 19h ; SDE -- added this.\r
+ ; Allows a reboot.\r
+newint9 ENDP\r
+\r
+;; Defines the current key procedure (used as a jump-through)\r
+kprocCur dw KprocDirect\r
+\r
+;; This is a keyboard procedure. Normally, this would control some\r
+;; sprite, or something, and the screen would follow the sprite. For\r
+;; the purposes of this code, though (namely, sprite-less scrolling)\r
+;; it just directly affects ScrollDX and ScrollDY.\r
+;; This keyproc is inertialess, use + and - to increase speed and\r
+;; the up/down/left/right keys to move directions.\r
+;; Pressing K will switch to the other keyprocedure on the fly.\r
+;; P pauses the screen -- note that this is just for completely\r
+;; freezing the screen... it doesn't return until you let go!\r
+\r
+EVEN\r
+scroll_speed_x dw SCROLL_SPEED ; (defaults)\r
+scroll_speed_y dw SCROLL_SPEED * VIRTUAL_WIDTH ; (defaults)\r
+KprocDirect PROC near\r
+chk_leftright: mov ax,0\r
+ JKEYNP kRIGHT,not_right\r
+ mov ax,scroll_speed_x\r
+ mov ScrollDX,ax\r
+ jmp chk_updown\r
+not_right: JKEYNP kLEFT,not_left\r
+ sub ax,scroll_speed_x\r
+ mov ScrollDX,ax\r
+ jmp chk_updown\r
+not_left: mov ScrollDX,ax\r
+\r
+chk_updown: mov ax,0\r
+ JKEYNP kUP,not_up\r
+ sub ax,scroll_speed_y\r
+ mov ScrollDY,ax\r
+ jmp chk_other\r
+not_up: JKEYNP kDOWN,not_down\r
+ mov ax,scroll_speed_y\r
+ mov ScrollDY,ax\r
+ jmp chk_other\r
+not_down: mov ScrollDY,ax\r
+\r
+chk_other: JKEYNP kK,not_k\r
+ mov kprocCur,KprocInertia\r
+not_k: JKEYNP kM,not_m\r
+ mov bDoTransition,1\r
+not_m: JKEYNP kGREY_MINUS,not_minus\r
+ cmp scroll_speed_x,1\r
+ jle not_minus\r
+ dec scroll_speed_x\r
+ sub scroll_speed_y,VIRTUAL_WIDTH\r
+not_minus: JKEYNP kGREY_PLUS,not_plus\r
+ cmp scroll_speed_x,16\r
+ jge not_plus\r
+ inc scroll_speed_x\r
+ add scroll_speed_y,VIRTUAL_WIDTH\r
+not_plus:\r
+\r
+pause_key: JKEYP kP,pause_key\r
+\r
+ ret\r
+KprocDirect ENDP\r
+\r
+;; This keyproc has inertia, so + and - don't work.\r
+;; Use up/down/left/right keys to increase speed in those directions.\r
+;; Pressing K will switch to the other keyprocedure on the fly.\r
+;; P pauses the screen -- note that this is just for completely\r
+;; freezing the screen... it doesn't return until you let go!\r
+KprocInertia PROC near\r
+chk2_leftright: JKEYNP kRIGHT,not2_right\r
+ cmp ScrollDX,16\r
+ je not2_right\r
+ inc ScrollDX\r
+ jmp chk2_updown\r
+not2_right: JKEYNP kLEFT,not2_left\r
+ cmp ScrollDX,-16\r
+ je not2_left\r
+ dec ScrollDX\r
+ jmp chk2_updown\r
+not2_left:\r
+\r
+chk2_updown: JKEYNP kUP,not2_up\r
+ cmp ScrollDY,-VIRTUAL_WIDTH * 16\r
+ je not2_up\r
+ add ScrollDY,-VIRTUAL_WIDTH\r
+ jmp chk2_other\r
+not2_up: JKEYNP kDOWN,not2_down\r
+ cmp ScrollDY,VIRTUAL_WIDTH * 16\r
+ je not2_down\r
+ add ScrollDY,VIRTUAL_WIDTH\r
+ jmp chk2_other\r
+not2_down:\r
+\r
+chk2_other: JKEYNP kK,not2_k\r
+ mov kprocCur,KprocDirect\r
+not2_k: JKEYNP kM,not2_m\r
+ mov bDoTransition,1\r
+not2_m:\r
+\r
+pause2_key: JKEYP kP,pause2_key\r
+\r
+ ret\r
+KprocInertia ENDP\r
+\1a
\ No newline at end of file
--- /dev/null
+;\r
+; *** Listing 2-5 ***\r
+;\r
+; The long-period Zen timer. (LZTIMER.ASM)\r
+; Uses the 8253 timer and the BIOS time-of-day count to time the\r
+; performance of code that takes less than an hour to execute.\r
+; Because interrupts are left on (in order to allow the timer\r
+; interrupt to be recognized), this is less accurate than the\r
+; precision Zen timer, so it is best used only to time code that takes\r
+; more than about 54 milliseconds to execute (code that the precision\r
+; Zen timer reports overflow on). Resolution is limited by the\r
+; occurrence of timer interrupts.\r
+;\r
+; By Michael Abrash 4/26/89\r
+;\r
+; Externally callable routines:\r
+;\r
+; ZTimerOn: Saves the BIOS time of day count and starts the\r
+; long-period Zen timer.\r
+;\r
+; ZTimerOff: Stops the long-period Zen timer and saves the timer\r
+; count and the BIOS time-of-day count.\r
+;\r
+; ZTimerReport: Prints the time that passed between starting and\r
+; stopping the timer.\r
+;\r
+; Note: If either more than an hour passes or midnight falls between\r
+; calls to ZTimerOn and ZTimerOff, an error is reported. For\r
+; timing code that takes more than a few minutes to execute,\r
+; either the DOS TIME command in a batch file before and after\r
+; execution of the code to time or the use of the DOS\r
+; time-of-day function in place of the long-period Zen timer is\r
+; more than adequate.\r
+;\r
+; Note: The PS/2 version is assembled by setting the symbol PS2 to 1.\r
+; PS2 must be set to 1 on PS/2 computers because the PS/2's\r
+; timers are not compatible with an undocumented timer-stopping\r
+; feature of the 8253; the alternative timing approach that\r
+; must be used on PS/2 computers leaves a short window\r
+; during which the timer 0 count and the BIOS timer count may\r
+; not be synchronized. You should also set the PS2 symbol to\r
+; 1 if you're getting erratic or obviously incorrect results.\r
+;\r
+; Note: When PS2 is 0, the code relies on an undocumented 8253\r
+; feature to get more reliable readings. It is possible that\r
+; the 8253 (or whatever chip is emulating the 8253) may be put\r
+; into an undefined or incorrect state when this feature is\r
+; used.\r
+;\r
+; ***************************************************************\r
+; * If your computer displays any hint of erratic behavior *\r
+; * after the long-period Zen timer is used, such as the floppy *\r
+; * drive failing to operate properly, reboot the system, set *\r
+; * PS2 to 1 and leave it that way! *\r
+; ***************************************************************\r
+;\r
+; Note: Each block of code being timed should ideally be run several\r
+; times, with at least two similar readings required to\r
+; establish a true measurement, in order to eliminate any\r
+; variability caused by interrupts.\r
+;\r
+; Note: Interrupts must not be disabled for more than 54 ms at a\r
+; stretch during the timing interval. Because interrupts\r
+; are enabled, keys, mice, and other devices that generate\r
+; interrupts should not be used during the timing interval.\r
+;\r
+; Note: Any extra code running off the timer interrupt (such as\r
+; some memory-resident utilities) will increase the time\r
+; measured by the Zen timer.\r
+;\r
+; Note: These routines can introduce inaccuracies of up to a few\r
+; tenths of a second into the system clock count for each\r
+; code section timed. Consequently, it's a good idea to\r
+; reboot at the conclusion of timing sessions. (The\r
+; battery-backed clock, if any, is not affected by the Zen\r
+; timer.)\r
+;\r
+; All registers and all flags are preserved by all routines.\r
+;\r
+ DOSSEG\r
+ .model small\r
+ .code\r
+ public ZTimerOn, ZTimerOff, ZTimerReport\r
+\r
+;\r
+; Set PS2 to 0 to assemble for use on a fully 8253-compatible\r
+; system; when PS2 is 0, the readings are more reliable if the\r
+; computer supports the undocumented timer-stopping feature,\r
+; but may be badly off if that feature is not supported. In\r
+; fact, timer-stopping may interfere with your computer's\r
+; overall operation by putting the 8253 into an undefined or\r
+; incorrect state. Use with caution!!!\r
+;\r
+; Set PS2 to 1 to assemble for use on non-8253-compatible\r
+; systems, including PS/2 computers; when PS2 is 1, readings\r
+; may occasionally be off by 54 ms, but the code will work\r
+; properly on all systems.\r
+;\r
+; A setting of 1 is safer and will work on more systems,\r
+; while a setting of 0 produces more reliable results in systems\r
+; which support the undocumented timer-stopping feature of the\r
+; 8253. The choice is yours.\r
+;\r
+PS2 equ 1\r
+;\r
+; Base address of the 8253 timer chip.\r
+;\r
+BASE_8253 equ 40h\r
+;\r
+; The address of the timer 0 count registers in the 8253.\r
+;\r
+TIMER_0_8253 equ BASE_8253 + 0\r
+;\r
+; The address of the mode register in the 8253.\r
+;\r
+MODE_8253 equ BASE_8253 + 3\r
+;\r
+; The address of the BIOS timer count variable in the BIOS\r
+; data segment.\r
+;\r
+TIMER_COUNT equ 46ch\r
+;\r
+; Macro to emulate a POPF instruction in order to fix the bug in some\r
+; 80286 chips which allows interrupts to occur during a POPF even when\r
+; interrupts remain disabled.\r
+;\r
+MPOPF macro\r
+ local p1, p2\r
+ jmp short p2\r
+p1: iret ;jump to pushed address & pop flags\r
+p2: push cs ;construct far return address to\r
+ call p1 ; the next instruction\r
+ endm\r
+\r
+;\r
+; Macro to delay briefly to ensure that enough time has elapsed\r
+; between successive I/O accesses so that the device being accessed\r
+; can respond to both accesses even on a very fast PC.\r
+;\r
+DELAY macro\r
+ jmp $+2\r
+ jmp $+2\r
+ jmp $+2\r
+ endm\r
+\r
+StartBIOSCountLow dw ? ;BIOS count low word at the\r
+ ; start of the timing period\r
+StartBIOSCountHigh dw ? ;BIOS count high word at the\r
+ ; start of the timing period\r
+EndBIOSCountLow dw ? ;BIOS count low word at the\r
+ ; end of the timing period\r
+EndBIOSCountHigh dw ? ;BIOS count high word at the\r
+ ; end of the timing period\r
+EndTimedCount dw ? ;timer 0 count at the end of\r
+ ; the timing period\r
+ReferenceCount dw ? ;number of counts required to\r
+ ; execute timer overhead code\r
+;\r
+; String printed to report results.\r
+;\r
+OutputStr label byte\r
+ db 0dh, 0ah, 'Timed count: '\r
+TimedCountStr db 10 dup (?)\r
+ db ' microseconds', 0dh, 0ah\r
+ db '$'\r
+;\r
+; Temporary storage for timed count as it's divided down by powers\r
+; of ten when converting from doubleword binary to ASCII.\r
+;\r
+CurrentCountLow dw ?\r
+CurrentCountHigh dw ?\r
+;\r
+; Powers of ten table used to perform division by 10 when doing\r
+; doubleword conversion from binary to ASCII.\r
+;\r
+PowersOfTen label word\r
+ dd 1\r
+ dd 10\r
+ dd 100\r
+ dd 1000\r
+ dd 10000\r
+ dd 100000\r
+ dd 1000000\r
+ dd 10000000\r
+ dd 100000000\r
+ dd 1000000000\r
+PowersOfTenEnd label word\r
+;\r
+; String printed to report that the high word of the BIOS count\r
+; changed while timing (an hour elapsed or midnight was crossed),\r
+; and so the count is invalid and the test needs to be rerun.\r
+;\r
+TurnOverStr label byte\r
+ db 0dh, 0ah\r
+ db '****************************************************'\r
+ db 0dh, 0ah\r
+ db '* Either midnight passed or an hour or more passed *'\r
+ db 0dh, 0ah\r
+ db '* while timing was in progress. If the former was *'\r
+ db 0dh, 0ah\r
+ db '* the case, please rerun the test; if the latter *'\r
+ db 0dh, 0ah\r
+ db '* was the case, the test code takes too long to *'\r
+ db 0dh, 0ah\r
+ db '* run to be timed by the long-period Zen timer. *'\r
+ db 0dh, 0ah\r
+ db '* Suggestions: use the DOS TIME command, the DOS *'\r
+ db 0dh, 0ah\r
+ db '* time function, or a watch. *'\r
+ db 0dh, 0ah\r
+ db '****************************************************'\r
+ db 0dh, 0ah\r
+ db '$'\r
+\r
+;********************************************************************\r
+;* Routine called to start timing. *\r
+;********************************************************************\r
+\r
+ZTimerOn proc near\r
+\r
+;\r
+; Save the context of the program being timed.\r
+;\r
+ push ax\r
+ pushf\r
+;\r
+; Set timer 0 of the 8253 to mode 2 (divide-by-N), to cause\r
+; linear counting rather than count-by-two counting. Also stops\r
+; timer 0 until the timer count is loaded, except on PS/2\r
+; computers.\r
+;\r
+ mov al,00110100b ;mode 2\r
+ out MODE_8253,al\r
+;\r
+; Set the timer count to 0, so we know we won't get another\r
+; timer interrupt right away.\r
+; Note: this introduces an inaccuracy of up to 54 ms in the system\r
+; clock count each time it is executed.\r
+;\r
+ DELAY\r
+ sub al,al\r
+ out TIMER_0_8253,al ;lsb\r
+ DELAY\r
+ out TIMER_0_8253,al ;msb\r
+;\r
+; In case interrupts are disabled, enable interrupts briefly to allow\r
+; the interrupt generated when switching from mode 3 to mode 2 to be\r
+; recognized. Interrupts must be enabled for at least 210 ns to allow\r
+; time for that interrupt to occur. Here, 10 jumps are used for the\r
+; delay to ensure that the delay time will be more than long enough\r
+; even on a very fast PC.\r
+;\r
+ pushf\r
+ sti\r
+ rept 10\r
+ jmp $+2\r
+ endm\r
+ MPOPF\r
+;\r
+; Store the timing start BIOS count.\r
+; (Since the timer count was just set to 0, the BIOS count will\r
+; stay the same for the next 54 ms, so we don't need to disable\r
+; interrupts in order to avoid getting a half-changed count.)\r
+;\r
+ push ds\r
+ sub ax,ax\r
+ mov ds,ax\r
+ mov ax,ds:[TIMER_COUNT+2]\r
+ mov cs:[StartBIOSCountHigh],ax\r
+ mov ax,ds:[TIMER_COUNT]\r
+ mov cs:[StartBIOSCountLow],ax\r
+ pop ds\r
+;\r
+; Set the timer count to 0 again to start the timing interval.\r
+;\r
+ mov al,00110100b ;set up to load initial\r
+ out MODE_8253,al ; timer count\r
+ DELAY\r
+ sub al,al\r
+ out TIMER_0_8253,al ;load count lsb\r
+ DELAY\r
+ out TIMER_0_8253,al ;load count msb\r
+;\r
+; Restore the context of the program being timed and return to it.\r
+;\r
+ MPOPF\r
+ pop ax\r
+ ret\r
+\r
+ZTimerOn endp\r
+\r
+;********************************************************************\r
+;* Routine called to stop timing and get count. *\r
+;********************************************************************\r
+\r
+ZTimerOff proc near\r
+\r
+;\r
+; Save the context of the program being timed.\r
+;\r
+ pushf\r
+ push ax\r
+ push cx\r
+;\r
+; In case interrupts are disabled, enable interrupts briefly to allow\r
+; any pending timer interrupt to be handled. Interrupts must be\r
+; enabled for at least 210 ns to allow time for that interrupt to\r
+; occur. Here, 10 jumps are used for the delay to ensure that the\r
+; delay time will be more than long enough even on a very fast PC.\r
+;\r
+ sti\r
+ rept 10\r
+ jmp $+2\r
+ endm\r
+\r
+;\r
+; Latch the timer count.\r
+;\r
+\r
+if PS2\r
+\r
+ mov al,00000000b\r
+ out MODE_8253,al ;latch timer 0 count\r
+;\r
+; This is where a one-instruction-long window exists on the PS/2.\r
+; The timer count and the BIOS count can lose synchronization;\r
+; since the timer keeps counting after it's latched, it can turn\r
+; over right after it's latched and cause the BIOS count to turn\r
+; over before interrupts are disabled, leaving us with the timer\r
+; count from before the timer turned over coupled with the BIOS\r
+; count from after the timer turned over. The result is a count\r
+; that's 54 ms too long.\r
+;\r
+\r
+else\r
+\r
+;\r
+; Set timer 0 to mode 2 (divide-by-N), waiting for a 2-byte count\r
+; load, which stops timer 0 until the count is loaded. (Only works\r
+; on fully 8253-compatible chips.)\r
+;\r
+ mov al,00110100b ;mode 2\r
+ out MODE_8253,al\r
+ DELAY\r
+ mov al,00000000b ;latch timer 0 count\r
+ out MODE_8253,al\r
+\r
+endif\r
+\r
+ cli ;stop the BIOS count\r
+;\r
+; Read the BIOS count. (Since interrupts are disabled, the BIOS\r
+; count won't change.)\r
+;\r
+ push ds\r
+ sub ax,ax\r
+ mov ds,ax\r
+ mov ax,ds:[TIMER_COUNT+2]\r
+ mov cs:[EndBIOSCountHigh],ax\r
+ mov ax,ds:[TIMER_COUNT]\r
+ mov cs:[EndBIOSCountLow],ax\r
+ pop ds\r
+;\r
+; Read the timer count and save it.\r
+;\r
+ in al,TIMER_0_8253 ;lsb\r
+ DELAY\r
+ mov ah,al\r
+ in al,TIMER_0_8253 ;msb\r
+ xchg ah,al\r
+ neg ax ;convert from countdown\r
+ ; remaining to elapsed\r
+ ; count\r
+ mov cs:[EndTimedCount],ax\r
+;\r
+; Restart timer 0, which is still waiting for an initial count\r
+; to be loaded.\r
+;\r
+\r
+ife PS2\r
+\r
+ DELAY\r
+ mov al,00110100b ;mode 2, waiting to load a\r
+ ; 2-byte count\r
+ out MODE_8253,al\r
+ DELAY\r
+ sub al,al\r
+ out TIMER_0_8253,al ;lsb\r
+ DELAY\r
+ mov al,ah\r
+ out TIMER_0_8253,al ;msb\r
+ DELAY\r
+\r
+endif\r
+\r
+ sti ;let the BIOS count continue\r
+;\r
+; Time a zero-length code fragment, to get a reference for how\r
+; much overhead this routine has. Time it 16 times and average it,\r
+; for accuracy, rounding the result.\r
+;\r
+ mov cs:[ReferenceCount],0\r
+ mov cx,16\r
+ cli ;interrupts off to allow a\r
+ ; precise reference count\r
+RefLoop:\r
+ call ReferenceZTimerOn\r
+ call ReferenceZTimerOff\r
+ loop RefLoop\r
+ sti\r
+ add cs:[ReferenceCount],8 ;total + (0.5 * 16)\r
+ mov cl,4\r
+ shr cs:[ReferenceCount],cl ;(total) / 16 + 0.5\r
+;\r
+; Restore the context of the program being timed and return to it.\r
+;\r
+ pop cx\r
+ pop ax\r
+ MPOPF\r
+ ret\r
+\r
+ZTimerOff endp\r
+\r
+;\r
+; Called by ZTimerOff to start the timer for overhead measurements.\r
+;\r
+\r
+ReferenceZTimerOn proc near\r
+;\r
+; Save the context of the program being timed.\r
+;\r
+ push ax\r
+ pushf\r
+;\r
+; Set timer 0 of the 8253 to mode 2 (divide-by-N), to cause\r
+; linear counting rather than count-by-two counting.\r
+;\r
+ mov al,00110100b ;mode 2\r
+ out MODE_8253,al\r
+;\r
+; Set the timer count to 0.\r
+;\r
+ DELAY\r
+ sub al,al\r
+ out TIMER_0_8253,al ;lsb\r
+ DELAY\r
+ out TIMER_0_8253,al ;msb\r
+;\r
+; Restore the context of the program being timed and return to it.\r
+;\r
+ MPOPF\r
+ pop ax\r
+ ret\r
+\r
+ReferenceZTimerOn endp\r
+\r
+;\r
+; Called by ZTimerOff to stop the timer and add the result to\r
+; ReferenceCount for overhead measurements. Doesn't need to look\r
+; at the BIOS count because timing a zero-length code fragment\r
+; isn't going to take anywhere near 54 ms.\r
+;\r
+\r
+ReferenceZTimerOff proc near\r
+;\r
+; Save the context of the program being timed.\r
+;\r
+ pushf\r
+ push ax\r
+ push cx\r
+\r
+;\r
+; Match the interrupt-window delay in ZTimerOff.\r
+;\r
+ sti\r
+ rept 10\r
+ jmp $+2\r
+ endm\r
+\r
+ mov al,00000000b\r
+ out MODE_8253,al ;latch timer\r
+;\r
+; Read the count and save it.\r
+;\r
+ DELAY\r
+ in al,TIMER_0_8253 ;lsb\r
+ DELAY\r
+ mov ah,al\r
+ in al,TIMER_0_8253 ;msb\r
+ xchg ah,al\r
+ neg ax ;convert from countdown\r
+ ; remaining to elapsed\r
+ ; count\r
+ add cs:[ReferenceCount],ax\r
+;\r
+; Restore the context and return.\r
+;\r
+ pop cx\r
+ pop ax\r
+ MPOPF\r
+ ret\r
+\r
+ReferenceZTimerOff endp\r
+\r
+;********************************************************************\r
+;* Routine called to report timing results. *\r
+;********************************************************************\r
+\r
+ZTimerReport proc near\r
+\r
+ pushf\r
+ push ax\r
+ push bx\r
+ push cx\r
+ push dx\r
+ push si\r
+ push di\r
+ push ds\r
+;\r
+ push cs ;DOS functions require that DS point\r
+ pop ds ; to text to be displayed on the screen\r
+ assume ds:_TEXT\r
+;\r
+; See if midnight or more than an hour passed during timing. If so,\r
+; notify the user.\r
+;\r
+ mov ax,[StartBIOSCountHigh]\r
+ cmp ax,[EndBIOSCountHigh]\r
+ jz CalcBIOSTime ;hour count didn't change,\r
+ ; so everything's fine\r
+ inc ax\r
+ cmp ax,[EndBIOSCountHigh]\r
+ jnz TestTooLong ;midnight or two hour\r
+ ; boundaries passed, so the\r
+ ; results are no good\r
+ mov ax,[EndBIOSCountLow]\r
+ cmp ax,[StartBIOSCountLow]\r
+ jb CalcBIOSTime ;a single hour boundary\r
+ ; passed-that's OK, so long as\r
+ ; the total time wasn't more\r
+ ; than an hour\r
+\r
+;\r
+; Over an hour elapsed or midnight passed during timing, which\r
+; renders the results invalid. Notify the user. This misses the\r
+; case where a multiple of 24 hours has passed, but we'll rely\r
+; on the perspicacity of the user to detect that case.\r
+;\r
+TestTooLong:\r
+ mov ah,9\r
+ mov dx,offset TurnOverStr\r
+ int 21h\r
+ jmp short ZTimerReportDone\r
+;\r
+; Convert the BIOS time to microseconds.\r
+;\r
+CalcBIOSTime:\r
+ mov ax,[EndBIOSCountLow]\r
+ sub ax,[StartBIOSCountLow]\r
+ mov dx,54925 ;number of microseconds each\r
+ ; BIOS count represents\r
+ mul dx\r
+ mov bx,ax ;set aside BIOS count in\r
+ mov cx,dx ; microseconds\r
+;\r
+; Convert timer count to microseconds.\r
+;\r
+ mov ax,[EndTimedCount]\r
+ mov si,8381\r
+ mul si\r
+ mov si,10000\r
+ div si ;* .8381 = * 8381 / 10000\r
+;\r
+; Add timer and BIOS counts together to get an overall time in\r
+; microseconds.\r
+;\r
+ add bx,ax\r
+ adc cx,0\r
+;\r
+; Subtract the timer overhead and save the result.\r
+;\r
+ mov ax,[ReferenceCount]\r
+ mov si,8381 ;convert the reference count\r
+ mul si ; to microseconds\r
+ mov si,10000\r
+ div si ;* .8381 = * 8381 / 10000\r
+ sub bx,ax\r
+ sbb cx,0\r
+ mov [CurrentCountLow],bx\r
+ mov [CurrentCountHigh],cx\r
+;\r
+; Convert the result to an ASCII string by trial subtractions of\r
+; powers of 10.\r
+;\r
+ mov di,offset PowersOfTenEnd - offset PowersOfTen - 4\r
+ mov si,offset TimedCountStr\r
+CTSNextDigit:\r
+ mov bl,'0'\r
+CTSLoop:\r
+ mov ax,[CurrentCountLow]\r
+ mov dx,[CurrentCountHigh]\r
+ sub ax,PowersOfTen[di]\r
+ sbb dx,PowersOfTen[di+2]\r
+ jc CTSNextPowerDown\r
+ inc bl\r
+ mov [CurrentCountLow],ax\r
+ mov [CurrentCountHigh],dx\r
+ jmp CTSLoop\r
+CTSNextPowerDown:\r
+ mov [si],bl\r
+ inc si\r
+ sub di,4\r
+ jns CTSNextDigit\r
+;\r
+;\r
+; Print the results.\r
+;\r
+ mov ah,9\r
+ mov dx,offset OutputStr\r
+ int 21h\r
+;\r
+ZTimerReportDone:\r
+ pop ds\r
+ pop di\r
+ pop si\r
+ pop dx\r
+ pop cx\r
+ pop bx\r
+ pop ax\r
+ MPOPF\r
+ ret\r
+\r
+ZTimerReport endp\r
+\r
+ end\r
+\1a
\ No newline at end of file
--- /dev/null
+;;=======================================================================;;\r
+;; ;;\r
+;; Scrolling Routines -- main program ;;\r
+;; ;;\r
+;; All other INC files are included here. The main routines for the ;;\r
+;; frame-by-frame execution loop are also here. Finally I tried to keep ;;\r
+;; global variables stored in this file as well. ;;\r
+;; ;;\r
+;;=======================================================================;;\r
+ dosseg\r
+ .model small\r
+ .386\r
+\r
+ .code\r
+ extrn ZTimerOn:far, ZTimerOff:far, ZTimerReport:far\r
+\r
+INCLUDE constant.inc\r
+\r
+\r
+DW_TABLE MACRO inc,num\r
+ count = 0\r
+ number = 0\r
+ WHILE (count LT num)\r
+ DW number\r
+ count = count + 1\r
+ number = number + inc\r
+ ENDM\r
+ ENDM\r
+\r
+DOSPRINT MACRO st\r
+ mov ah,9\r
+ mov dx,st\r
+ int 21h\r
+ ENDM\r
+\r
+EVEN\r
+Mult320 label WORD\r
+MultBufWidth label WORD\r
+ DW_TABLE 320,200\r
+MultVirtWidth label WORD\r
+ DW_TABLE (VIRTUAL_WIDTH/4),200\r
+\r
+INCLUDE palette.inc\r
+INCLUDE keyb.inc\r
+INCLUDE modex.inc\r
+INCLUDE page.inc\r
+INCLUDE init.inc\r
+INCLUDE map.inc\r
+;INCLUDE sprite.inc NOT FOR NOW\r
+INCLUDE scroll.inc\r
+\r
+;; Various segments that need to be filled in later...\r
+EVEN\r
+segVideo dw 0A000h ; videoram segment\r
+segText dw 0B800h ; text segment\r
+segMap dw -1 ; Map info segment\r
+segTiles dw -1 ; Tile bitmap segment\r
+segBuffer dw -1 ; Local 320x200 buffer segment\r
+segCode dw -1 ; Code segment\r
+segPSP dw -1 ; PSP segment\r
+segPalette dw -1 ; Palette segment\r
+segTextPal dw -1 ; Saved text palette\r
+\r
+EVEN\r
+bDoTransition db 0\r
+\r
+;; This routine is called for each frame.\r
+;; Right now it just scrolls, but later all sprite animation would\r
+;; occur here too.\r
+EVEN\r
+OneFrame PROC near\r
+ call Scroll ; Scrolls the screen\r
+; call AnimateSprites ; prepares sprites on drawpage\r
+ jmp FlipPage ; shows drawpage...\r
+ ; no RET necessary\r
+OneFrame ENDP\r
+\r
+;; Each frame -- call the frame motion code, then check for keyhit.\r
+EVEN\r
+MainLoop PROC NEAR\r
+next_frame: call OneFrame\r
+ JNKEY next_frame\r
+ JKEYP kESC,all_done ; ESC -> quit, always\r
+ call kprocCur\r
+ mov al,bDoTransition\r
+ cmp al,0\r
+ je next_frame\r
+transition: FLASH_OFF 16,segPalette\r
+ mov bDoTransition,0\r
+ mov ax,1\r
+ sub ax,nMap\r
+ mov nMap,ax ; Flip maps\r
+\r
+ call LoadData\r
+ call update_full ;<<<<\r
+ call OneFrame\r
+ FLASH_ON 16,segPalette\r
+ jmp next_frame\r
+all_done: ret\r
+MainLoop ENDP\r
+\r
+;; Beginning code -- Leaves text mode (saving the text screen) via\r
+;; a fade. It loads the map data and draws one\r
+;; frame before it fades on.\r
+Beginning PROC near\r
+ NEW_PAL segTextPal\r
+ PAL_SAVE segTextPal\r
+ FADE_OFF 1,segTextPal\r
+ call SaveVideo\r
+ MODEX_START ; 320x200 Mode X graphics mode\r
+ PAL_BLACK\r
+\r
+ call LoadData ; This call will change...\r
+\r
+ call update_full ;<<<<\r
+ call OneFrame\r
+ FADE_ON 1,segPalette\r
+ ret\r
+Beginning ENDP\r
+\r
+;; Ending code -- restore to text mode via a flash\r
+Ending PROC near\r
+ FLASH_OFF 8,segPalette\r
+ call RestoreVideo\r
+ FLASH_ON 8,segTextPal\r
+ ret\r
+Ending ENDP\r
+\r
+ .data\r
+\r
+ .stack 2048\r
+\r
+ END Initialize\r
+\1a
\ No newline at end of file
--- /dev/null
+OBJS=main.obj lztimer.obj\r
+INCLUDES=modex.inc keyb.inc palette.inc page.inc scroll.inc map.inc \\r
+ constant.inc init.inc\r
+PROGRAM=scroll\r
+\r
+.c.obj:\r
+ cl -c -Zi -Od -W4 $*.c\r
+\r
+.asm.obj:\r
+ masm -ml -zi $*.asm\r
+\r
+.obj.exe:\r
+ link /CO @$*.lnk\r
+\r
+project: scroll.exe gensq.exe genmap.exe genpal.exe\r
+\r
+main.asm: $(INCLUDES)\r
+\r
+scroll.lnk: makefile\r
+ echo $(OBJS: =+) > $*.lnk\r
+ echo $(PROGRAM); >> $*.lnk\r
+\r
+scroll.exe: $(OBJS) scroll.lnk\r
+ link /CO @$*.lnk\r
+\r
+gensq.obj: $*.c\r
+\r
+gensq.lnk: makefile\r
+ echo $*.obj; > $*.lnk\r
+\r
+gensq.exe: $*.obj $*.lnk\r
+\r
+genmap.obj: $*.c\r
+\r
+genmap.lnk: makefile\r
+ echo $*.obj; > $*.lnk\r
+\r
+genmap.exe: $*.obj $*.lnk\r
+\r
+genpal.obj: $*.c\r
+\r
+genpal.lnk: makefile\r
+ echo $*.obj; > $*.lnk\r
+\r
+genpal.exe: $*.obj $*.lnk\r
+\r
+\1a
\ No newline at end of file
--- /dev/null
+;; MAP in own segment allows map of tiles to be up to 65536 tiles in area\r
+;; which translates to about 16.8 million pixels of virtual screen. This\r
+;; can be represented in almost any rectangle -- just set MAP_WIDTH.\r
+\r
+;; Sorry this code isn't commented -- I was working on it right up until\r
+;; the point that I released this. You have any questions? Ask away\r
+;; (my internet address is in the DOC file).\r
+\r
+MAPHEADER STRUCT, NONUNIQUE\r
+ MapName BYTE "\ 1\ 1\ 1\ 1\ 1\ 1\ 1\ 1\ 1\ 1\ 1\ 1"\r
+ Wid WORD 2\r
+ Ht WORD 3\r
+ Extent WORD 4\r
+ OffX1 WORD 5\r
+ OffY1 WORD 6\r
+ OffX2 WORD 7\r
+ OffY2 WORD 8\r
+ WrapX WORD 9\r
+ WrapY WORD 10\r
+ Magic WORD 11\r
+MAPHEADER ENDS\r
+MapInfo MAPHEADER <>\r
+\r
+\r
+; In: DS:DX = offset of filename\r
+LoadMapFile PROC near\r
+ mov ax,segMap\r
+ cmp ax,-1\r
+ je map_not_loaded\r
+ sub ax,(SIZEOF MAPHEADER) / 16\r
+ mov es,ax\r
+ mov ah,49h\r
+ int 21h\r
+ mov nError,ERR_MEM\r
+ jc lm_err\r
+ mov segMap,-1\r
+\r
+map_not_loaded: call LoadFile\r
+ jc lm_err\r
+\r
+ mov ds,dx\r
+ mov si,0\r
+ mov ax,cs\r
+ mov es,ax\r
+ lea di,MapInfo\r
+ mov cx,(SIZEOF MAPHEADER) / 4\r
+ rep movsd\r
+\r
+ add dx,(SIZEOF MAPHEADER) / 16\r
+ mov cs:segMap,dx\r
+\r
+ mov BlankPage.Valid,0\r
+ mov ShowPage.Valid,0\r
+ mov DrawPage.Valid,0\r
+\r
+ mov upper_left,0\r
+ mov ScrollPosX,0\r
+ mov ScrollPosY,0\r
+ mov ScrollDX,0\r
+ mov ScrollDY,0\r
+\r
+lm_err: ret\r
+LoadMapFile ENDP\r
+\r
+LoadTilesFile PROC near\r
+ mov ax,segTiles\r
+ cmp ax,-1\r
+ je tiles_not_loaded\r
+ mov es,ax\r
+ mov ah,49h\r
+ int 21h\r
+ mov nError,ERR_MEM\r
+ jc lt_err\r
+ mov segMap,-1\r
+\r
+tiles_not_loaded: call LoadFile\r
+ jc lm_err\r
+ mov segTiles,dx\r
+\r
+ mov BlankPage.Valid,0\r
+ mov ShowPage.Valid,0\r
+ mov DrawPage.Valid,0\r
+\r
+lt_err: ret\r
+LoadTilesFile ENDP\r
+\r
+EVEN\r
+LoadData PROC near\r
+ ; Load squares from data file\r
+ mov bx,nMap\r
+ shl bx,1\r
+ mov dx,fntblTiles[bx]\r
+ mov ds,segCode\r
+ call LoadTilesFile\r
+ ; returns Carry if error\r
+ jc load_error\r
+\r
+ ; Load map from data file\r
+ mov ds,segCode\r
+ mov bx,nMap\r
+ shl bx,1\r
+ mov dx,fntblMap[bx]\r
+ call LoadMapFile\r
+ ; returns Carry if error\r
+\r
+load_error: ret\r
+LoadData ENDP\r
+\r
+EVEN\r
+update_full PROC\r
+ mov ds,segTiles\r
+ mov es,segVideo\r
+ mov fs,segMap\r
+\r
+ mov dx,SC_INDEX\r
+ mov al,MAP_MASK\r
+ out dx,al\r
+\r
+ mov di,DrawPage.Address\r
+ add di,upper_left\r
+ mov bp,MapInfo.OffX1\r
+ add bp,MapInfo.OffY1\r
+\r
+ mov dx,MapInfo.WrapX\r
+\r
+ mov ch,(VIRTUAL_WIDTH/SQUARE_WIDTH)\r
+draw_full_loop: push cx\r
+ push si\r
+ push dx\r
+\r
+ mov al,11h\r
+ mov si,0\r
+\r
+update_f_loop: mov dx,SC_INDEX + 1\r
+ out dx,al\r
+ push bp\r
+ call draw_col\r
+ pop bp\r
+ sub di,(VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4\r
+ add si,(SQUARE_WIDTH * SQUARE_HEIGHT) / 4\r
+ shl al,1\r
+ jnc update_f_loop\r
+\r
+ pop dx\r
+ dec dx\r
+ jnz update_f_go_on\r
+ mov dx,MapInfo.Wid\r
+ sub bp,dx\r
+update_f_go_on: inc bp\r
+ pop si\r
+ add di,(SQUARE_WIDTH/ 4)\r
+ pop cx\r
+ dec ch\r
+ jnz draw_full_loop\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+\r
+ mov dx,GC_INDEX\r
+ mov ax,ALL_COPY_BITS\r
+ out dx,ax\r
+\r
+ mov dx,SC_INDEX\r
+ mov ax,0F02h\r
+ out dx,ax\r
+\r
+ mov ds,segVideo\r
+ mov si,DrawPage.Address\r
+ add si,upper_left\r
+ mov es,segVideo\r
+ mov di,BlankPage.Address\r
+ add di,upper_left\r
+ mov cx,(VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4\r
+ rep movsb\r
+ mov si,DrawPage.Address\r
+ add si,upper_left\r
+ mov di,ShowPage.Address\r
+ add di,upper_left\r
+ mov cx,(VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4\r
+ rep movsb\r
+\r
+ mov dx,GC_INDEX\r
+ mov ax,ALL_DRAW_BITS\r
+ out dx,ax\r
+\r
+ ret\r
+update_full ENDP\r
+\r
+EVEN\r
+update_left PROC\r
+ mov ds,cs:segTiles\r
+ mov es,cs:segVideo\r
+ mov fs,cs:segMap\r
+\r
+ mov dx,SC_INDEX\r
+ mov al,MAP_MASK\r
+ out dx,al\r
+\r
+ mov al,011h\r
+ mov si,0\r
+ mov di,cs:DrawPage.Address\r
+ add di,cs:upper_left ; becomes DI later\r
+ mov bp,MapInfo.OffX1\r
+ add bp,MapInfo.OffY1\r
+\r
+update_l_loop: mov dx,SC_INDEX + 1\r
+ out dx,al\r
+ push bp\r
+ call draw_col\r
+ pop bp\r
+ sub di,(VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4\r
+ add si,(SQUARE_WIDTH * SQUARE_HEIGHT) / 4\r
+ shl al,1\r
+ jnc update_l_loop\r
+\r
+ ret\r
+update_left ENDP\r
+\r
+EVEN\r
+update_right PROC near\r
+ mov ds,cs:segTiles\r
+ mov es,cs:segVideo\r
+ mov fs,cs:segMap\r
+\r
+ mov dx,SC_INDEX\r
+ mov al,MAP_MASK\r
+ out dx,al\r
+\r
+ mov bp,MapInfo.OffX2\r
+ add bp,MapInfo.OffY1\r
+\r
+ mov al,011h\r
+ mov si,0\r
+\r
+ mov di,cs:DrawPage.Address ; becomes DI\r
+ add di,cs:upper_left\r
+ add di,(VIRTUAL_WIDTH - SQUARE_WIDTH) / 4\r
+\r
+update_r_loop: mov dx,SC_INDEX + 1\r
+ out dx,al\r
+\r
+ push bp\r
+ call draw_col\r
+ pop bp\r
+ sub di,(VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4\r
+ add si,(SQUARE_WIDTH * SQUARE_HEIGHT) / 4\r
+ shl al,1\r
+ jnc update_r_loop\r
+\r
+ ret\r
+update_right ENDP\r
+\r
+EVEN\r
+update_top PROC\r
+ mov ds,cs:segTiles\r
+ mov es,cs:segVideo\r
+ mov fs,cs:segMap\r
+\r
+ mov dx,SC_INDEX\r
+ mov al,MAP_MASK\r
+ out dx,al\r
+\r
+ mov di,cs:DrawPage.Address\r
+ add di,cs:upper_left\r
+ mov bp,MapInfo.OffX1\r
+ add bp,MapInfo.OffY1\r
+\r
+ mov al,011h\r
+ mov si,0\r
+\r
+update_top_loop:\r
+ mov dx,SC_INDEX + 1\r
+ out dx,al\r
+ push bp\r
+ call draw_row\r
+ pop bp\r
+ sub di,VIRTUAL_WIDTH / 4\r
+ add si,(SQUARE_WIDTH * SQUARE_HEIGHT) / 4\r
+ shl al,1\r
+ jnc update_top_loop\r
+\r
+ ret\r
+update_top ENDP\r
+\r
+EVEN\r
+update_bottom PROC\r
+ mov ds,cs:segTiles\r
+ mov es,cs:segVideo\r
+ mov fs,cs:segMap\r
+\r
+ mov dx,SC_INDEX\r
+ mov al,MAP_MASK\r
+ out dx,al\r
+\r
+ mov di,cs:DrawPage.Address\r
+ add di,cs:upper_left\r
+ add di,(VIRTUAL_WIDTH * (VIRTUAL_HEIGHT - SQUARE_HEIGHT)) / 4\r
+ mov bp,MapInfo.OffX1\r
+ add bp,MapInfo.OffY2\r
+\r
+ mov al,011h\r
+ mov si,0\r
+\r
+update_bottom_loop:\r
+ mov dx,SC_INDEX + 1\r
+ out dx,al\r
+ push bp\r
+ call draw_row\r
+ pop bp\r
+ sub di,VIRTUAL_WIDTH / 4\r
+ add si,(SQUARE_WIDTH * SQUARE_HEIGHT) / 4\r
+ shl al,1\r
+ jnc update_bottom_loop\r
+\r
+ ret\r
+update_bottom ENDP\r
+\r
+; Draws ONE plane of a single col\r
+EVEN\r
+draw_col PROC near\r
+ ; DI->upper left corner of col to draw\r
+ ; BP->col of map to draw\r
+ ; SI used to point at tiles\r
+ ; AX,CX used\r
+ ; BX used to push SI\r
+ ; DX unused\r
+ shl eax,16 ; save it\r
+ mov ax,MapInfo.WrapY\r
+\r
+ mov cl,(VIRTUAL_HEIGHT / SQUARE_HEIGHT)\r
+do_col_loop: mov bx,si\r
+ mov bh,byte ptr fs:[bp] ; change tile #\r
+\r
+ mov ch,SQUARE_HEIGHT\r
+do_col_sq_loop: mov dl,byte ptr ds:[bx+2]\r
+ mov dh,byte ptr ds:[bx+3]\r
+ shl edx,16\r
+ mov dl,byte ptr ds:[bx+0]\r
+ mov dh,byte ptr ds:[bx+1]\r
+ mov es:[di],edx ; 32-bit write\r
+ add di,VIRTUAL_WIDTH / 4\r
+ add bx,4\r
+ dec ch\r
+ jnz do_col_sq_loop\r
+\r
+ add bp,MapInfo.Wid\r
+ dec ax\r
+ jnz yayaya\r
+ mov ax,MapInfo.Ht\r
+ sub bp,MapInfo.Extent\r
+yayaya:\r
+\r
+ dec cl\r
+ jnz do_col_loop\r
+\r
+ shr eax,16 ; restore it\r
+\r
+ ret\r
+draw_col ENDP\r
+\r
+; Draws ONE plane of a single row\r
+EVEN\r
+draw_row PROC near\r
+ push ax\r
+; shl eax,16 ; save ax\r
+\r
+ mov ax,MapInfo.WrapX\r
+\r
+ ; DI->upper left corner of row to draw\r
+ ; BP->row of map to draw\r
+ ; SI used to point at tiles\r
+ ; AX,CX used\r
+ ; BX used to push SI\r
+ ; DX unused\r
+\r
+ mov cl,(VIRTUAL_WIDTH / SQUARE_WIDTH)\r
+do_row_loop: mov bx,si\r
+ mov bh,byte ptr fs:[bp] ; change tile #\r
+\r
+ mov ch,SQUARE_HEIGHT\r
+do_row_sq_loop: mov dl,byte ptr ds:[bx+2]\r
+ mov dh,byte ptr ds:[bx+3]\r
+ shl edx,16\r
+ mov dl,byte ptr ds:[bx+0]\r
+ mov dh,byte ptr ds:[bx+1]\r
+ mov es:[di],edx\r
+ add di,(VIRTUAL_WIDTH / 4)\r
+ add bx,4\r
+ dec ch\r
+ jnz do_row_sq_loop\r
+\r
+ add di,(-VIRTUAL_WIDTH*SQUARE_HEIGHT + SQUARE_WIDTH) / 4\r
+ inc bp\r
+ dec ax\r
+ jnz yayaya2\r
+ mov ax,MapInfo.Wid\r
+ sub bp,ax\r
+yayaya2:\r
+ dec cl\r
+ jnz do_row_loop\r
+\r
+; shr eax,16 ; restore it\r
+ pop ax\r
+ ret\r
+draw_row ENDP\r
+\1a
\ No newline at end of file
--- /dev/null
+; ====================================================================\r
+; Entry points:\r
+; ====================================================================\r
+MODEX_START MACRO\r
+ mov ax,13h ;let the BIOS set standard 256-color\r
+ int 10h ; mode (320x200 linear)\r
+; PALETTE_BLACK\r
+ call ModifyForX\r
+ ENDM\r
+\r
+; ====================================================================\r
+; This is MODE-X code from Dr. Dobb's Journal, by Michael Abrash.\r
+; I modified it from 320x240 back to 320x200, and then to 512 virtual\r
+; width, for scrolling purposes.\r
+; ====================================================================\r
+\r
+; Mode X (320x240, 256 colors) mode set routine. Works on all VGAs.\r
+; ****************************************************************\r
+; * Revised 6/19/91 to select correct clock; fixes vertical roll *\r
+; * problems on fixed-frequency (IBM 851X-type) monitors. *\r
+; ****************************************************************\r
+; Modified from public-domain mode set code by John Bridges.\r
+\r
+; Index/data pairs for CRT Controller registers that differ between\r
+; mode 13h and mode X.\r
+CRTParms label word\r
+; dw 00d06h ;vertical total\r
+; dw 03e07h ;overflow (bit 8 of vertical counts)\r
+; dw 04109h ;cell height (2 to double-scan)\r
+; dw 0ea10h ;v sync start\r
+; dw 0ac11h ;v sync end and protect cr0-cr7\r
+; dw 0df12h ;vertical displayed = 480\r
+ dw 00014h ;turn off dword mode *\r
+; dw 0e715h ;v blank start\r
+; dw 00616h ;v blank end\r
+ dw 0e317h ;turn on byte mode *\r
+\r
+ dw (VIRTUAL_WIDTH*32)+13h ; width of screen = VWid NEW\r
+; dw 09012h ;vertical displayed = 400 (already like this)\r
+CRT_PARM_LENGTH equ (($-CRTParms)/2)\r
+\r
+ModifyForX PROC near\r
+ mov dx,SC_INDEX\r
+ mov ax,0604h\r
+ out dx,ax ;disable chain4 mode\r
+ mov ax,0100h\r
+ out dx,ax ;synchronous reset while setting Misc\r
+ ; Output for safety, even though clock\r
+ ; unchanged\r
+ mov dx,MISC_OUTPUT\r
+ mov al,0e3h\r
+ out dx,al ;select 25 MHz dot clock & 60 Hz scanning rate\r
+\r
+ mov dx,SC_INDEX\r
+ mov ax,0300h\r
+ out dx,ax ;undo reset (restart sequencer)\r
+\r
+ mov dx,CRTC_INDEX ;reprogram the CRT Controller\r
+ mov al,11h ;VSync End reg contains register write\r
+ out dx,al ; protect bit\r
+ inc dx ;CRT Controller Data register\r
+ in al,dx ;get current VSync End register setting\r
+ and al,7fh ;remove write protect on various\r
+ out dx,al ; CRTC registers\r
+ dec dx ;CRT Controller Index\r
+ cld\r
+ push cs\r
+ pop ds\r
+ mov si,offset CRTParms ;point to CRT parameter table\r
+ mov cx,CRT_PARM_LENGTH ;# of table entries\r
+SetCRTParmsLoop:\r
+ lodsw ;get the next CRT Index/Data pair\r
+ out dx,ax ;set the next CRT Index/Data pair\r
+ loop SetCRTParmsLoop\r
+\r
+ mov dx,SC_INDEX\r
+ mov ax,0f02h\r
+ out dx,ax ;enable writes to all four planes\r
+ mov ax,SCREEN_SEG ;now clear all display memory, 8 pixels\r
+ mov es,ax ; at a time\r
+ sub di,di ;point ES:DI to display memory\r
+ sub ax,ax ;clear to zero-value pixels\r
+ mov cx,8000h ;# of words in display memory\r
+ rep stosw ;clear all of display memory\r
+\r
+ ret\r
+ModifyForX ENDP\r
+\1a
\ No newline at end of file
--- /dev/null
+;; ====================================================================\r
+;; (Code follows)\r
+;; ====================================================================\r
+\r
+EVEN\r
+upper_left dw 0 ; Stores upper left corner offset\r
+ ; relative to page offset.\r
+pages dw 0 ; for counting frame-per-sec\r
+\r
+PAGE_INFO STRUCT 2,NONUNIQUE\r
+ Address dw 0\r
+ UpperLeftAddress dw 0\r
+ MapPosX dw 0\r
+ MapPosY dw 0\r
+ Alignment db 0\r
+ AlignmentMask db 0\r
+ ScrollOffset dw 0\r
+ Rectangles dw 0\r
+ Valid db 0\r
+PAGE_INFO ENDS\r
+\r
+DrawPage PAGE_INFO <PAGE_0,PAGE_0>\r
+ShowPage PAGE_INFO <PAGE_1,PAGE_1>\r
+BlankPage PAGE_INFO <PAGE_2,PAGE_2>\r
+\r
+ROTATE3 MACRO reg,item\r
+ mov reg,cs:ShowPage.item\r
+ xchg reg,cs:BlankPage.item\r
+ xchg reg,cs:DrawPage.item\r
+ mov cs:ShowPage.item,reg\r
+ ENDM ; Leaves ShowPage.item in reg!\r
+\r
+;; This procedure is used to flip between the three available pages.\r
+;; Originally from Dr. Dobb's Journal's Graphics Programming column by\r
+;; Michael Abrash, I've reworked the code to be more specific to my\r
+;; own purposes, and commented it more.\r
+EVEN\r
+FlipPage PROC near\r
+ ; This series of instructions circles the show_page, blank_page,\r
+ ; and draw page appropriately and leaves the current page to show\r
+ ; in AX. Note that it's a lot more instructions than it looks like,\r
+ ; but I unrolled the copy loop for speed. So-so good idea, because\r
+ ; if you add a field and forget to rotate it, it could mean trouble!\r
+ ROTATE3 ax,Rectangles\r
+ ROTATE3 ax,ScrollOffset\r
+ ROTATE3 ax,MapPosX\r
+ ROTATE3 ax,MapPosY\r
+; ROTATE3 al,AlignmentMask SPRITES ...\r
+ ROTATE3 al,Alignment\r
+ mov di,ax ; DI = scroll offset low, and\r
+ ; garbage in the high bits...\r
+ and di,3 ; DI = pixel pan, 0 to 3.\r
+ shl di,1 ; Mode X requires 0 2 4 or 6.\r
+ ROTATE3 ax,Address\r
+ ROTATE3 al,Valid\r
+ ROTATE3 ax,UpperLeftAddress ; Leaves AX=ShowPage.ULAddr\r
+\r
+ add ax,cs:ShowPage.ScrollOffset\r
+\r
+ ; AX is set up to be the current show page already.\r
+ ; By pre-loading BX with the low-address set code, and CX with\r
+ ; the high-address set code, we can more quickly flip the page\r
+ ; after the vertical retrace period.\r
+ mov bl,START_ADDRESS_LOW ;preload for fastest\r
+ mov bh,al ; flipping once display\r
+ mov cl,START_ADDRESS_HIGH ; enable is detected\r
+ mov ch,ah\r
+\r
+ ; Wait for display enable to be active (status is active low), to be\r
+ ; sure both halves of the start address will take in the same frame.\r
+ mov dx,INPUT_STATUS_1\r
+WaitDE: in al,dx\r
+ test al,01h\r
+ jnz WaitDE ;display enable is active low (0 = active)\r
+\r
+ ; Set the start offset in display memory of the page to display.\r
+ mov dx,CRTC_INDEX\r
+ mov ax,bx\r
+ out dx,ax ;start address low\r
+ mov ax,cx\r
+ out dx,ax ;start address high\r
+\r
+ ; Now wait for vertical sync, so the other page will be invisible when\r
+ ; we start drawing to it.\r
+ mov dx,INPUT_STATUS_1\r
+WaitVS: in al,dx\r
+ test al,08h\r
+ jz WaitVS ;vertical sync is active high (1 = active)\r
+\r
+ ; Finally, have to adjust the pixel panning register in order\r
+ ; to fine-tune the starting address on a pixel level.\r
+ ; This pixel pan value is the scroll offset mod 4 -- but since\r
+ ; Mode X's pixel pan works by values of 2 (0, 2, 4 or 6) we\r
+ ; have to shift appropriately.\r
+ mov dx,ATC_INDEX\r
+ mov al,13h ; 13h = set pixel pan\r
+ out dx,al\r
+ mov ax,di ; DI = pixel pan calculated above\r
+ out dx,al\r
+ mov dx,ATC_INDEX\r
+ mov al,32 ; Allows the computer to use this register\r
+ out dx,al ; again. Without this OUT, the screen will\r
+ ; remain blank!\r
+\r
+ ; Increment the page counter now!\r
+ inc cs:pages\r
+ ret\r
+FlipPage ENDP\r
+\1a
\ No newline at end of file
--- /dev/null
+;; Palette operations\r
+;; Note that where needed in the macros, a "palette" refers to\r
+;; the segment handle to a 768-byte piece of memory. So palettes\r
+;; can be loaded and freed, they're not permanent, but if you want\r
+;; to use a fixed (not allocated) palette you'd better make sure\r
+;; it's segment aligned or else you can't use these macros. If it\r
+;; is, you can just supply "seg myPalette" as the 'palette' argument\r
+;; to any of these macros.\r
+\r
+;; Fade from a palette to black\r
+FADE_OFF MACRO fade,palette\r
+ mov si,0\r
+ mov ds,palette\r
+ mov bh,fade ; positive -> Gets dimmer...\r
+ mov bl,0 ; Starts exact\r
+ mov cx,64/fade+1 ; Total number of loops required\r
+ call FadePalette\r
+ ENDM\r
+\r
+;; Fade from black to a palette\r
+FADE_ON MACRO fade,palette\r
+ mov si,0\r
+ mov ds,palette\r
+ mov bh,-fade ; negative -> Gets brighter...\r
+ mov bl,64 ; Starts totally dimmed\r
+ mov cx,64/fade+1 ; Total number of loops required\r
+ call FadePalette\r
+ ENDM\r
+\r
+;; Flash from a palette to white\r
+FLASH_OFF MACRO fade,palette\r
+ mov si,0\r
+ mov ds,palette\r
+ mov bh,-fade ; negative -> gets brighter\r
+ mov bl,0 ; Starts exact\r
+ mov cx,64/fade+1 ; Total number of loops required\r
+ call FadePalette\r
+ ENDM\r
+\r
+;; Flash from white to a palette\r
+FLASH_ON MACRO fade,palette\r
+ mov si,0\r
+ mov ds,palette\r
+ mov bh,fade ; positive -> Gets dimmer...\r
+ mov bl,-64 ; Starts totally bright\r
+ mov cx,64/fade+1 ; Total number of loops required\r
+ call FadePalette\r
+ ENDM\r
+\r
+;; Save a palette into a palette-sized piece of memory\r
+PAL_SAVE MACRO palette\r
+ mov es,palette\r
+ mov di,0\r
+ call SavePalette\r
+ ENDM\r
+\r
+; Returns AX = a new segment for a palette\r
+NEW_PAL MACRO palette\r
+ mov bx,(256 * 3) / 16\r
+ mov ah,48h\r
+ int 21h\r
+ mov palette,ax\r
+ ENDM\r
+\r
+;; Black the entire palette temporarily. Used to blank the screen while\r
+;; drawing a frame before fading in.\r
+PAL_BLACK MACRO\r
+ mov ax,seg tmppal\r
+ mov ds,ax\r
+ mov si,OFFSET tmppal\r
+ mov bh,-1 ; Doesn't really matter...\r
+ mov bl,64 ; Starts totally dimmed\r
+ mov cx,1 ; Just one time -- to leave it black\r
+ call FadePalette\r
+ ENDM\r
+\r
+;; drawing a frame before fading in.\r
+PAL_WHITE MACRO\r
+ mov ax,seg tmppal\r
+ mov ds,ax\r
+ mov si,OFFSET tmppal\r
+ mov bh,-1 ; Doesn't really matter...\r
+ mov bl,-64 ; Starts totally dimmed\r
+ mov cx,1 ; Just one time -- to leave it black\r
+ call FadePalette\r
+ ENDM\r
+\r
+;; Black the entire palette temporarily. Used to blank the screen while\r
+;; drawing a frame before fading in.\r
+PAL_UPDATE MACRO\r
+ mov cx,0 ; 0 times = update\r
+ call FadePalette\r
+ ENDM\r
+\r
+WAITBORDER MACRO\r
+ LOCAL wbr1,wbr2\r
+ mov dx,INPUT_STATUS_1\r
+wbr1: in al,dx\r
+ test al,8\r
+ jnz wbr1\r
+wbr2: in al,dx\r
+ test al,8\r
+ jz wbr2\r
+ ENDM\r
+\r
+;; Fade Palette:\r
+;; The following code is modified greatly from the Future Crew's palette\r
+;; fading code. Works on blocks of 256 colors only, so far, but I might\r
+;; change it later. Also, it theoretically could "anti-fade" -- fade to\r
+;; white -- which I call flashing, so I added that ability, which was\r
+;; missing from FC's code.\r
+EVEN\r
+tmppal DB 768 dup (?) ; Stores old palette\r
+FadePalette PROC NEAR\r
+ mov ax,seg tmppal\r
+ mov es,ax\r
+\r
+FadeLoop: push cx\r
+ push si\r
+\r
+ cmp cx,0\r
+ je JustUpdate\r
+\r
+ ; Load in the colors in the palette\r
+ mov di,OFFSET tmppal ; ES:DI -> temp palette\r
+ mov cx,768 ; Reads 256*3 bytes at a time.\r
+loadpal_loop: mov al,ds:[si] ; Load one color byte\r
+ inc si\r
+ sub al,bl ; Subtract the fade amount\r
+ jge pal_more ; Limit the range by clipping\r
+ xor al,al ; to between 0 and 63\r
+ jmp pal_ok ; (there's probably a faster\r
+pal_more: cmp al,63 ; way to do it than this,\r
+ jle pal_ok ; but I don't know it)\r
+ mov al,63\r
+pal_ok: mov es:[di],al ; Store that byte in the new\r
+ inc di\r
+ dec cx ; temp palette and loop.\r
+ jnz loadpal_loop\r
+\r
+ ; Get ready to move this block of palette values\r
+JustUpdate: sti ; Let interrupts happen now,\r
+ WAITBORDER ; while waiting for a retrace,\r
+ cli ; instead of more critical times\r
+\r
+ mov dx,PEL_WRITE_REG; Set up to write to color register,\r
+ xor al,al ; starting at palette entry 0.\r
+ out dx,al\r
+ mov dx,PEL_DATA_REG ; Point at color port\r
+\r
+ ; Quickly put out the first half of the color palette\r
+ mov di,OFFSET tmppal\r
+ mov cl,(768/6)/2 ; Does 2 loops of 128 colors each.\r
+ cli ; Waits a retrace inbetween...\r
+FirstHalfLoop: REPEAT 6 ; Steps of 6 -- reduces the\r
+ mov al,es:[di] ; number of LOOP instructions\r
+ inc di\r
+ out dx,al\r
+ ENDM\r
+ dec cl\r
+ jnz FirstHalfLoop\r
+ sti\r
+\r
+ WAITBORDER ; Waits one retrace -- less flicker\r
+ mov dx,PEL_DATA_REG ; Reset DX\r
+\r
+ ; Now, quickly put out the other half of the colors.\r
+ mov cl,(768/6)/2\r
+ cli\r
+SecondHalfLoop: REPEAT 6 ; Steps of 6 -- reduces the\r
+ mov al,es:[di] ; number of LOOP instructions\r
+ inc di\r
+ out dx,al\r
+ ENDM\r
+ dec cl\r
+ jnz SecondHalfLoop\r
+\r
+ ; For the next iteration, restore everything and loop\r
+ pop si\r
+ pop cx\r
+\r
+ cmp cx,0\r
+ je JustUpdated\r
+\r
+ add bl,bh ; Change brightness by BH\r
+\r
+ dec cx\r
+ jnz FadeLoop\r
+\r
+ ; All done, re-enable interrupts and return\r
+JustUpdated: sti\r
+ ret\r
+FadePalette ENDP\r
+\r
+;; Saves the palette into the memory pointed at by DS:SI. That memory\r
+;; must be at least 768 bytes long...\r
+SavePalette PROC NEAR\r
+ mov dx,PEL_READ_REG ; Set up to read from color register,\r
+ xor al,al ; starting at palette entry 0.\r
+ out dx,al\r
+ mov dx,PEL_DATA_REG\r
+\r
+ ; Quickly read in the first half of the color palette\r
+ mov cl,(768/6)\r
+ cli\r
+ReadPalLoop: REPEAT 6 ; Steps of 6 -- reduces the\r
+ in al,dx ; number of LOOP instructions\r
+ mov es:[di],al\r
+ inc di\r
+ ENDM\r
+ dec cl\r
+ jnz ReadPalLoop\r
+ ; All done, re-enable interrupts and return\r
+ sti\r
+ ret\r
+SavePalette ENDP\r
+\r
+;; Load a palette from a file. Opens the file and reads it into\r
+;; memory (standard LoadFile) and then points the palette at that\r
+;; newly allocated memory. Also, frees old memory before it does\r
+;; any loading ...\r
+LoadPaletteFile PROC near\r
+ mov ax,segPalette\r
+ cmp ax,-1\r
+ je pal_not_loaded\r
+ mov es,ax\r
+ mov ah,49h\r
+ int 21h\r
+ mov nError,ERR_MEM\r
+ jc lp_err\r
+ mov segPalette,-1\r
+\r
+pal_not_loaded: call LoadFile\r
+ jc lp_err\r
+\r
+ mov segPalette,dx\r
+lp_err: ret\r
+LoadPaletteFile ENDP\r
+\1a
\ No newline at end of file
--- /dev/null
+ ________________________________________________\r
+ |+----------------------------------------------+|\r
+ || I N T R O D U C I N G : ||\r
+ |+----------------------------------------------+|\r
+ || Steve's 4-Way ||\r
+ || ___ ___ ____ ___ _ _ ||\r
+ || / __| / __| | _ \ / _ \ | | | | ||\r
+ || | <_ | | | |_> | | | | | | | | | ||\r
+ || \_ \ | | | / | | | | | | | | ||\r
+ || __> | | |__ | |\ \ | |_| | | |__ | |__ ||\r
+ || |___/ \___| |_| \_| \___/ |____| |____| ||\r
+ |+______________________________________________+|\r
+ +------------------------------------------------+\r
+\r
+ There, now that I have the hype outta the way, let me explain what\r
+this program is. I'm releasing the source code to my 4-way scrolling\r
+code so that others can learn from it. There aren't enough really\r
+good resources out there for someone learning to program games, so I'm\r
+trying to do my part to help.\r
+\r
+WHAT IT IS:\r
+\r
+ The code is 100% assembly, for which I use MASM 6.0, so there may\r
+be a few problems converting to Turbo Assembler. I also use the ".386"\r
+directive, meaning that you can't run this code with a 286 or earlier.\r
+But most of the code should be easily convertible. I haven't been\r
+programming for 386's much so I really don't make the use of the 386\r
+registers like I could have. Mostly I just did it for some extra 386\r
+instructions.\r
+\r
+ You'll need a VGA which can support mode 13h, the MCGA mode. This\r
+code runs in "tweaked" MCGA mode, or what is called "Mode X". For more\r
+information on Mode X, check out the 1991 - 1992 issues of Doctor Dobbs\r
+Journal, wherein you will find Michael Abrash's excellent Graphics\r
+Programming column. This is where I (and many others) found out about\r
+Mode X, which is an excellent graphics mode for fast 256-color graphics.\r
+Also, you can take a look at XLIB, YakIcons, FastGraph, etc which are all\r
+graphics libraries (public domain or otherwise) which support Mode X\r
+graphics and probably have some good documentation on programming the mode.\r
+Additionally, check out _The Programmer's Guide to the EGA and VGA Cards_,\r
+by Richard Ferraro, and _Power Graphics Programming_ (out of print, but\r
+available directly from Que Books) by Michael Abrash. Finally, you can\r
+ask about graphics programming on many newsgroups such as\r
+"rec.games.programmer"...\r
+\r
+WHAT IT DOES:\r
+\r
+ The code will allow you to create "tiled" background patterns and then\r
+to omnidirectionally scroll over them. You could implement sprite routines\r
+and then animate them over the background, but I haven't gotten this far\r
+yet. The scrolling is always relational -- ie no "jump-to"'s, just "scroll\r
+left", "scroll up", etc. Jump to would be very easy to implement, I just\r
+haven't done it yet.\r
+\r
+ It runs at about 60-70 fps on a 386/20, which means that it is operating\r
+in under the time of one vertical refresh (_just_ under, according to some\r
+timing I've done). This could probably be reduced, but the best way to\r
+reduce it is to limit the speed at which it scrolls -- if you stick to\r
+scrolling at most 8 pixels at a time in two axes or 16 pixels at a time\r
+in one axis, it is very fast. More than that, and it occasionally takes\r
+more than one refresh period even on my 486. Still, that should be\r
+fast enough for just about any game.\r
+\r
+ I also included some routines to generate maps, tiles, and palettes\r
+so you can see the file formats. These are in C, and the executables\r
+are around in case you don't care to recompile. None of the utilities\r
+are exactly production quality. You'll have to look at the code to\r
+figure out the arguments! Luckily you can just run them with no args\r
+and they perform default behavior.\r
+\r
+ Lastly, the program SCROLL.EXE is a demo of what it can do. In this\r
+demo you can use one of two sets of keyboard controls to scroll around.\r
+One, the default set of commands, lets you press up/down/left/right and\r
+scroll in that direction. The other has "intertia" -- pressing up/down\r
+left/right will accelerate you in that direction. You'll see what I\r
+mean, just experiment. You can switch keyhandlers by pressing K.\r
+You can also switch between the diagonal pattern map and a logo map\r
+by pressing M. (By the way, it will eventually run out of memory loading\r
+the maps and the diagonal map will screw up... don't worry about it,\r
+it'd be fixed if I had more time). Try it out.\r
+\r
+CREDIT WHERE CREDIT IS DUE:\r
+\r
+ People who (unknowingly) helped me out:\r
+\r
+ Keyboard by Steven Dollins, Brown Computer Group. From his\r
+ KEYINT routines, which is an INT 9 handler to let you\r
+ keep track of many keys being pressed at the same time.\r
+ Graphics, basically, by Michael Abrash, whose Mode X columns\r
+ influenced me greatly.\r
+ Palette fades and file I/O by the Future Crew. Thanks for\r
+ letting out the Mental Surgery code!\r
+ CPU detection by Ray Duncan, taken from one of his books.\r
+\r
+ Obviously I haven't just pirated the code, it's all from publicly\r
+released source code and I modified it a bit. But I wouldn't have come\r
+up with this whole thing without those helping hands. Thanks.\r
+\r
+HOW IT WORKS:\r
+\r
+ Here's how the scrolling works. I'll explain it from a single-page\r
+point of view, although it actually uses several pages of video memory.\r
+The video memory is laid out like this:\r
+ÉÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÑÍÍÍÍÍÍÍÍÍÍÍÍ» ÄÄÄ\r
+º ³ / / / / º ³\r
+º ³/ / / / º ³\r
+º ³ / / / /º ³\r
+º Visible page ³ / Not / / º ³\r
+º ³/ visible/ º ³\r
+º ³ / / / /º 64K\r
+º ³ / / / / º ³\r
+ÇÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÙ/ / / / º ³\r
+º / / / / / / / / / / / / / / /º ³\r
+º / / / / / / / / / / / / / / / º ³\r
+º/ / / / / / / / / / / / / / / º ³\r
+ÈÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍͼ ÄÄÄ\r
+In other words, it has a virtual width greater than the actual screen\r
+width, and a virtual height higher than the actual screen height. The\r
+VGA hardware allows hardware panning around within the virtual area, so\r
+that makes panning much easier: you only have to draw the information\r
+that is coming on to the screen with each pan.\r
+\r
+What is Happening: What the user sees:\r
+ÉÍÍÍÍÍÍÍÍÑÍÍÍÍ» ÚÄÄÄÄÄÄÄÄ¿\r
+º hel³////º ³ hel³ The picture that is\r
+ÇÄÄÄÄÄÄÄÄÙ////º ÀÄÄÄÄÄÄÄÄÙ coming on to the screen\r
+º/////////////º ("hello") appears to\r
+ÈÍÍÍÍÍÍÍÍÍÍÍÍͼ the user to be scrolling\r
+ÉÍÑÍÍÍÍÍÍÍÑÍÍÍ» ÚÄÄÄÄÄÄÄÄ¿ left, although it is\r
+º/³ hell³///º ³ hell³ actually at a stationary\r
+º/ÀÄÄÄÄÄÄÄÙ///º ÀÄÄÄÄÄÄÄÄÙ location in memory...\r
+º/////////////º Each time the frame moves,\r
+ÈÍÍÍÍÍÍÍÍÍÍÍÍͼ it is not necessary to\r
+ÉÍÍÑÍÍÍÍÍÍÍÑÍÍ» ÚÄÄÄÄÄÄÄÄ¿ redraw the parts that stay\r
+º//³ hello³//º ³ hello³ on the screen, just the\r
+º//ÀÄÄÄÄÄÄÄÙ//º ÀÄÄÄÄÄÄÄÄÙ parts that become visible.\r
+º/////////////º\r
+ÈÍÍÍÍÍÍÍÍÍÍÍÍͼ\r
+\r
+ The same works up&down too, or even left/right and up/down at the same\r
+time. The problem occurs when you scroll enough to hit the edge of the\r
+virtual space. Luckily, video memory increases and wraps at the right\r
+edge to one line down on the left edge. So you end up with a situation\r
+like this after scrolling too far right:\r
+ÉÍÍÍÍÍÍÍÍÍÍÍÍÍÍÑÍÍÍÍ» ÄÄÄ\r
+ÇÄÄÄÄÄÄÄ¿//////³ º ³ User sees:\r
+º ³//////³ Thº ³ ÚÄÄÄÄÄÄÄÄÄÄÄ¿\r
+ºe quick³//////³ º 64K ³ ³\r
+º ³//////ÀÄÄÄĶ ³ ³ The quick³\r
+ÇÄÄÄÄÄÄÄÙ///////////º ³ ³ ³\r
+º///////////////////º ³ ÀÄÄÄÄÄÄÄÄÄÄÄÙ\r
+ÈÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍͼ ÄÄÄ\r
+The wrapping is transparent to the user. So, it appears that you can\r
+scroll left & right infinitely, by simply always updating the amount of\r
+memory that has scrolled into view.\r
+\r
+ But what happens when you scroll too far down? Now Intel segments come\r
+to the rescue! Because the video memory is 64K, and that is also the\r
+largest amount of memory you can access in a segment, the segment arithmetic\r
+performs the top-to-bottom wrapping for me. It results in a similar\r
+situation as is pictured above, but with the screen split horizontally\r
+instead of vertically. Again, it's completely transparent to the user.\r
+\r
+ One performance optimization that I've done is to organize the background\r
+picture that is being scrolled into quantitized "tiles" -- 16x16 pixels in\r
+area. This means that you can store a large amount of picture data if that\r
+data is repetitive -- as the backgrounds of many video games are. This also\r
+helps when figuring out how much new stuff to draw on the screen. I can wait\r
+until the panning crosses a 16-pixel border, then draw another 16-pixel\r
+strip, and then wait for another tile crossing, etc. You can see this in\r
+the MAP.INC and SCROLL.INC code. 16x16 pixels also leads to 256-pixel-square\r
+tiles, which is always a convenient number in assembly... it helps out in\r
+several places in the code.\r
+\r
+ So, the display page is "wandering" around the video memory, only drawing\r
+what is necessary at any time. Meanwhile you can animate sprites over the\r
+background, etc. The only problem is that with one page, information is\r
+constantly being drawn to that page and you can never guarantee that it is in\r
+a correct state at the time of a vertical refresh period. Instead, I actually\r
+use several pages, so that one can be shown while the other is worked on.\r
+This guarantees a perfect picture at any time. So for now, let's ignore the\r
+scrolling for a second, and talk about the paging, because it's easier to\r
+understand the paging if scrolling isn't happening.\r
+\r
+ Here's a basic explanation of how the paging works. I use three separate\r
+pages, a Draw page, a Show page, and a Blank page. The Show page refers to\r
+the page that is currently showing, the Draw page to the page that is\r
+under construction (to be shown next frame), and the Blank page should always\r
+be maintained as an up-to-date blank background page. (The Blank page is\r
+useful for sprite programming which I am going to be doing next.) Each\r
+of the pages is 352x240, although the screen resolution is only 320x200.\r
+\r
+ Each frame, the pages rotate DrawPage->ShowPage->BlankPage->DrawPage.\r
+This means that at the beginning of the frame, the Draw Page is already\r
+blank, so all that is necessary is to draw on a bunch of sprites. The\r
+BlankPage, though, is no longer blank (it's still got stuff from what\r
+was the ShowPage) so we have to erase it, by blanking out the sprites\r
+(luckily the new DrawPage _is_ empty, so we can use a Mode X 32-bit video-\r
+to-video copy to blank it). Hope you're still with me.\r
+\r
+ So, this loop continues with each frame, and the loop invariants are\r
+maintained: Show Page is always a "good" frame -- don't touch it. Blank\r
+Page is always blank. Draw Page can look like anything. Now to include\r
+the scrolling again:\r
+\r
+ The way I do scrolling with several pages is that the pages ALL wander\r
+around video memory, only they're smaller (1/3 of the size that they could\r
+have been, to be exact!). Here's a picture of the situation at its worst:\r
+ÉÍÍÍÍÍÍÍÍÍÍÍÍÍÍ» ÄÄÄ\r
+º ³//ÀÄÄÄÄÄÄÄĺ ³\r
+ºÄÄÙ///////////º ³\r
+º/////ÚÄÄÄÄÄÄÄĺ ³\r
+ºÄÄ¿//³PAGE 0 º ³\r
+º ³//³ (Draw) º ³\r
+º ³//ÀÄÄÄÄÄÄÄĺ ³\r
+ºÄÄÙ///////////º 64K\r
+º/////ÚÄÄÄÄÄÄÄĺ (21K each page)\r
+ºÄÄ¿//³PAGE 1 º ³\r
+º ³//³ (Show) º ³\r
+º ³//ÀÄÄÄÄÄÄÄĺ ³\r
+ºÄÄÙ///////////º ³\r
+º/////ÚÄÄÄÄÄÄÄĺ ³\r
+ºÄÄ¿//³PAGE 2 º ³\r
+º ³//³ (Blank)º ³\r
+ÈÍÍÍÍÍÍÍÍÍÍÍÍÍͼ ÄÄÄ\r
+The pages always maintain an equal distance apart as they wander. Since\r
+all pages move in parallel, the way it used to work is that as soon as the\r
+pages scrolled, I would draw the newly-visible picture information on\r
+all three of the pages. This worked great, except that it resulted in\r
+a slight pause every time the screen scrolled because it was doing hardware\r
+pan most of the time (which is very fast) and the drawing routines were\r
+slow. Now, I've spread the copying over successive frames to allow a\r
+smoother scrolling rate. This is possible because it's not really necessary\r
+to draw the new information to a page before that page becomes the show\r
+page...\r
+\r
+ I hope that this has made some sense. It's pretty complicated stuff.\r
+Take a look at the code and maybe that will help. Or, write me mail\r
+(my email address is below). Or, design your own way and ignore this\r
+whole thing.\r
+\r
+COMING SOON:\r
+\r
+ Next up are Sprite routines. I threw in what I started as SPRITE.INC,\r
+although it's not included in the project right now.\r
+ Sound support\r
+\r
+ Who knows what else? Depends on what people send me!\r
+\r
+-------------------------------------------------------------------------\r
+ R E A D T H I S\r
+-------------------------------------------------------------------------\r
+ R E A D T H I S\r
+-------------------------------------------------------------------------\r
+\r
+ This code is being released as "SwapWare". That means that if you wanted\r
+to go ahead and use my code directly, I really wouldn't care. But I ask\r
+that you send me some of your code that you think is neat. Especially if\r
+it's modifications that you make to this code, such as quick sprite drawing\r
+or optimizations.\r
+\r
+ I'm not going to brag and say that I "threw this together in a few hours".\r
+I didn't, it took me many days of work to get it working properly. But\r
+I'm also not looking for money as recompensation for my labor. I make\r
+great money at my real day job and you probably have a better use for your\r
+donations, such as legitimizing your unregistered shareware and pirated\r
+games. I'm in this for the knowledge ... so my best payback would be to\r
+get lots of code from people out there, stuff to really help make a great\r
+game. In particular, these would be great:\r
+ * 32-bit code\r
+ * Tricky optimizations\r
+ * Fast BitBlt/masked BitBlt code\r
+ * Useful File I/O functions\r
+ * 3D polygon and texture mapping code\r
+ * Maintenance routines -- like numeric conversions, etc.\r
+ * Hardware access code like timing routines and interrupt\r
+ handlers\r
+Any of those would be very helpful when writing a fast scrolling game.\r
+\r
+You can contact me (for the rest of this term only) at\r
+ seisner@athena.mit.edu\r
+Feel free to ask any questions you want! I check my mail about once or\r
+twice a week so don't expect instant turnaround... If you're desperate\r
+to talk to me, say if you work at Origin and want to give me the source\r
+code to Strike Commander or whatnot, you can also reach me at:\r
+ Steve Eisner\r
+ (617) 247-8691\r
+and leave a message. But I'd rather you wrote e-mail.\r
+\r
+ Thanks,\r
+ Steve Eisner\r
+\r
+* Read rec.games.programmer! And for those who already do:\r
+ I dream of a world where no one argues over why Wolfenstein\r
+ 3-D sucks or why it doesn't. Would people just give it a\r
+ break?\r
+\1a
\ No newline at end of file
--- /dev/null
+;; Global variables used here ...\r
+EVEN\r
+ScrollPosX dw 0 ; Scroll origin, upper-left X\r
+ScrollPosY dw 0 ; Scroll origin, upper-left Y\r
+ScrollDX dw 0 ; Amount to change scroll origin, X\r
+ScrollDY dw 0 ; Amount to change scroll origin, Y\r
+\r
+;; SCROLL:\r
+;; This routine takes care of all of the scrolling, however it calls\r
+;; outside drawing routines to update the screen. Scrollx and\r
+;; Scrolly determine the amount to scroll by.\r
+;; Note that this does only RELATIVE scrolling, not absolute scrolling.\r
+;; Scroll saves time by updating only up to the one row or column of\r
+;; tiles which have come into view due to a change in scroll offset.\r
+;; In other words, it's not good for "jumping" to a particular point,\r
+;; although this effect can be accomplished in other ways -- the draw_full\r
+;; routine is available to draw a full screen again.\r
+;; Sometimes this means that you will have to calculate values ahead of\r
+;; time, for instance if you wish the scrolling to keep a certain sprite\r
+;; in the center of the screen. In this case, just set ScrollDX and\r
+;; ScrollDY to the delta-x and delta-y of the sprite.\r
+;; * Newly added:\r
+;; Since there are three pages, it is necessary to keep each one of them\r
+;; up to date with each scroll. Recently, I was doing some fast (8+\r
+;; pixels per frame) scrolling and noticed that there was a significant\r
+;; pause when the screen snapped to a new origin. (The origin is always\r
+;; at a square's corner, even though it may not look like it because it\r
+;; disguises things by smooth-panning the hardware.) Every time it\r
+;; scrolled, it was drawing the new information and copying it to the\r
+;; two other planes. I've now distributed the load over successive\r
+;; pages, in other words it doesn't copy the new info all at once, but\r
+;; over several frames. This really smoothed out the scrolling so that\r
+;; while there are still some jumps, they only occur very infrequently\r
+;; and then only at 15 or 16 pixel/frame scroll rates...) That's the\r
+;; "catchup" code at the bottom, and that's why it's more complex than\r
+;; it maybe could be...\r
+EVEN\r
+Scroll PROC near\r
+ ; Using the ScrollDX variable as delta-x, move the scroll-origin\r
+ ; in the x direction. Then, if the visible screen is now\r
+ ; viewing invalid data, snap the origin to a new point and\r
+ ; draw any new columns that are necessary.\r
+do_x_scroll: mov ax,cs:ScrollPosX\r
+ add ax,cs:ScrollDX ; ScrollDX is a delta-x\r
+ jl wrap_l ; wrap left if negative\r
+ cmp ax,VIRTUAL_WIDTH - SCREEN_WIDTH ; too far right?\r
+ jge wrap_r ; wrap right if too big\r
+ mov cs:ScrollPosX,ax ; Stores new scroll-x\r
+ ; (just like above, for y:)\r
+ ; Using the ScrollDY variable as delta-y, move the scroll-origin\r
+ ; in the y direction. Then, if the visible screen is now\r
+ ; viewing invalid data, snap the origin to a new point and\r
+ ; draw any new rows that are necessary.\r
+do_y_scroll: mov ax,cs:ScrollPosY\r
+ add ax,cs:ScrollDY ; ScrollDY is a delta-y\r
+ jl wrap_t ; wrap top if negative\r
+ cmp ax,(VIRTUAL_HEIGHT - SCREEN_HEIGHT) * VIRTUAL_WIDTH\r
+ jge wrap_b ; wrap bottom if too big\r
+ mov cs:ScrollPosY,ax ; Store the new scroll-y\r
+ jmp calculate\r
+\r
+ ; To wrap to the right:\r
+ ; Add a square's width to the origin's upper left corner, and\r
+ ; subtract the same amount from the scroll origin's upper left\r
+ ; corner. This makes no difference on the screen but allows\r
+ ; us to forget about the leftmost column on the screen (it's\r
+ ; offscreen now...) so we can take over the right column.\r
+ ; See any documentation I included for an explanation of the\r
+EVEN ; scrolling...\r
+wrap_r: add cs:upper_left,SQUARE_WIDTH / 4\r
+ sub ax,SQUARE_WIDTH\r
+ mov cs:ScrollPosX,ax\r
+\r
+ mov dx,MapInfo.Wid\r
+ mov bp,MapInfo.OffX1\r
+ inc bp\r
+ cmp bp,dx\r
+ jb wrap_r1_ok\r
+ sub bp,dx\r
+wrap_r1_ok: mov MapInfo.OffX1,bp\r
+\r
+ mov bp,MapInfo.OffX2\r
+ inc bp\r
+ cmp bp,dx\r
+ jb wrap_r2_ok\r
+ sub bp,dx\r
+wrap_r2_ok: mov MapInfo.OffX2,bp\r
+\r
+ mov bp,MapInfo.WrapX\r
+ dec bp\r
+ jnz wrap_r3_ok\r
+ add bp,dx\r
+wrap_r3_ok: mov MapInfo.WrapX,bp\r
+\r
+ call update_right\r
+ jmp do_y_scroll ; Jump back to do Y\r
+\r
+EVEN ; Same for left side\r
+wrap_l: sub cs:upper_left,SQUARE_WIDTH / 4\r
+ add ax,SQUARE_WIDTH\r
+ mov cs:ScrollPosX,ax\r
+\r
+ mov dx,MapInfo.Wid\r
+ mov bp,MapInfo.OffX1\r
+ dec bp\r
+ cmp bp,dx\r
+ jb wrap_l1_ok\r
+ add bp,dx\r
+wrap_l1_ok: mov MapInfo.OffX1,bp\r
+\r
+ mov bp,MapInfo.OffX2\r
+ dec bp\r
+ cmp bp,dx\r
+ jb wrap_l2_ok\r
+ add bp,dx\r
+wrap_l2_ok: mov MapInfo.OffX2,bp\r
+\r
+ mov bp,MapInfo.WrapX\r
+ inc bp\r
+ cmp bp,dx\r
+ jbe wrap_l3_ok\r
+ sub bp,dx\r
+wrap_l3_ok: mov MapInfo.WrapX,bp\r
+\r
+ call update_left\r
+ jmp do_y_scroll ; Jump back to do Y\r
+\r
+EVEN ; Same for bottom\r
+wrap_b: add cs:upper_left,(SQUARE_HEIGHT * VIRTUAL_WIDTH) / 4\r
+ sub ax,SQUARE_HEIGHT * VIRTUAL_WIDTH\r
+ mov cs:ScrollPosY,ax\r
+\r
+ mov bp,MapInfo.OffY1\r
+ mov dx,MapInfo.Extent\r
+ add bp,MapInfo.Wid\r
+ cmp bp,dx\r
+ jb wrap_b1_ok\r
+ sub bp,dx\r
+wrap_b1_ok: mov MapInfo.OffY1,bp\r
+\r
+ mov bp,MapInfo.OffY2\r
+ add bp,MapInfo.Wid\r
+ cmp bp,dx\r
+ jb wrap_b2_ok\r
+ sub bp,dx\r
+wrap_b2_ok: mov MapInfo.OffY2,bp\r
+\r
+ mov dx,MapInfo.Ht\r
+ mov bp,MapInfo.WrapY\r
+ dec bp\r
+ jg wrap_b3_ok\r
+ add bp,dx\r
+wrap_b3_ok: mov MapInfo.WrapY,bp\r
+\r
+ call update_bottom\r
+ mov ax,cs:ScrollPosY\r
+ jmp calculate ; Jump down to calc new offsets\r
+\r
+EVEN ; Same for top\r
+wrap_t: sub cs:upper_left,(SQUARE_HEIGHT * VIRTUAL_WIDTH) / 4\r
+ add ax,SQUARE_HEIGHT * VIRTUAL_WIDTH\r
+ mov cs:ScrollPosY,ax\r
+\r
+ mov bp,MapInfo.OffY1\r
+ mov dx,MapInfo.Extent\r
+ sub bp,MapInfo.Wid\r
+ cmp bp,dx\r
+ jb wrap_t1_ok\r
+ add bp,dx\r
+wrap_t1_ok: mov MapInfo.OffY1,bp\r
+\r
+ mov bp,MapInfo.OffY2\r
+ sub bp,MapInfo.Wid\r
+ cmp bp,dx\r
+ jb wrap_t2_ok\r
+ add bp,dx\r
+wrap_t2_ok: mov MapInfo.OffY2,bp\r
+\r
+ mov bp,MapInfo.WrapY\r
+ mov dx,MapInfo.Ht\r
+ inc bp\r
+ cmp bp,dx\r
+ jbe wrap_t3_ok\r
+ sub bp,dx\r
+wrap_t3_ok: mov MapInfo.WrapY,bp\r
+\r
+ call update_top\r
+ mov ax,cs:ScrollPosY\r
+ jmp calculate ; Jump down to calc new offsets\r
+\r
+EVEN\r
+align_mask_table DB 11h,22h,44h,88h\r
+calculate:\r
+ ; Calculate the scroll offset\r
+ ; AX already = ScrollPosY\r
+ add ax,cs:ScrollPosX ;Now AX = scroll offset\r
+\r
+ ; Calculate the plane alignment\r
+ mov bl,al\r
+ and bx,0003h\r
+ mov cs:DrawPage.Alignment,bl\r
+; mov bl,cs:align_mask_table[bx]\r
+; mov cs:DrawPage.AlignmentMask,bl\r
+\r
+ ; Now we don't need Scroll Offset on a pixel level any more,\r
+ ; so shift it to a byte level (/4) and store it away.\r
+ shr ax,2\r
+ mov cs:DrawPage.ScrollOffset,ax\r
+\r
+ ; Calculate the actual upper left corner address\r
+ mov si,cs:DrawPage.Address\r
+ add si,cs:upper_left\r
+ mov cs:DrawPage.UpperLeftAddress,si\r
+\r
+ ; And the map offset:\r
+ mov bx,MapInfo.WrapX\r
+ mov cs:DrawPage.MapPosX,bx\r
+ mov di,MapInfo.WrapY\r
+ mov cs:DrawPage.MapPosY,di\r
+\r
+ mov cs:DrawPage.Valid,1\r
+ cmp cs:BlankPage.Valid,0\r
+ je no_catch_up\r
+\r
+ ; Lastly, update dirty area (if any) on blank page.\r
+ ; BP still contains the draw page's mapoffset.\r
+ sub bx,cs:BlankPage.MapPosX\r
+ sub di,cs:BlankPage.MapPosY\r
+ jnz yes_catch_up\r
+ cmp bx,0\r
+ jnz yes_catch_up\r
+ ; No catchup necessary -- return.\r
+no_catch_up: ret\r
+\r
+;; Okay, this stuff is a mess. I've registerized everything except\r
+;; for the video data itself. I'll try to comment it best I can.\r
+EVEN\r
+yes_catch_up:\r
+ ; First, switch into full-copy mode. This means latching the\r
+ ; bit mask as coming entirely from the local 32-bit registers\r
+ ; and then setting the map mask to write to all 4 planes. This\r
+ ; is Mode X's greatest advantage, when you can do it! It\r
+ ; provides a 2x speedup or so...\r
+ mov dx,SC_INDEX ; Select Sequencer input\r
+ mov ax,0F02h\r
+ out dx,ax ; set map mask = all bits\r
+\r
+ mov dx,GC_INDEX\r
+ mov ax,ALL_COPY_BITS\r
+ out dx,ax\r
+\r
+ JKEYNP kB,isntbp\r
+isbp: nop\r
+isntbp:\r
+ ; Next, calculate the amount to catch up the top/bottom rows\r
+ ; If we just wrapped over the edge, it is possible that the\r
+ ; distance traveled will be as high as MapInfo.Ht - 1. So,\r
+ ; in the fashion of signed numbers, if the number is greater\r
+ ; than MapInfo.Ht / 2, we take it to mean negative. To convert\r
+ ; it to signed, we have to shift it into the proper range. But\r
+ ; if it's less than MapInfo.Ht / 2, then it's okay as it is.\r
+ mov ax,di\r
+ cmp ax,0\r
+ je y_mod\r
+\r
+ mov cx,MapInfo.Ht\r
+ cwd ; DX = -1 or 0 based on AX's sign.\r
+ and dx,cx ; DX = Ht or 0\r
+ add ax,dx ; AX = 0 ... Ht (unsigned)\r
+\r
+ mov di,ax\r
+ shl di,1\r
+ cmp di,cx\r
+ jb y_signed\r
+ sub ax,cx\r
+y_signed: neg ax\r
+\r
+ ; Find DI MOD MapInfo.Wid, and then convert to it into virtual\r
+ ; coordinates from map offset coordinates.\r
+ ; This routine also calculates BP, which will be used as a loop\r
+ ; counter to determine how many rows to draw on the left/right\r
+ ; column copy.\r
+y_mod: mov bp,ax\r
+ cwd\r
+ add bp,dx\r
+ xor bp,dx\r
+ shl bp,3 ; BP = (SQUARE_HEIGHT / 2) * dX\r
+ mov di,cs:MultVirtWidth[bp] ; Use multiplication table\r
+ add di,dx ; to calculate new DI, then\r
+ xor di,dx ; restore the sign.\r
+ sub bp,VIRTUAL_HEIGHT / 2\r
+ ; Out: DI = # of pixels traveled,\r
+ ; BP = (VIRTUAL_HEIGHT - # of rows) / 2\r
+\r
+ ; Change BX (delta-x) to signed from unsigned, store in AX\r
+ mov ax,bx\r
+ mov cx,MapInfo.Wid\r
+ cwd\r
+ and dx,cx ; DX = Wid or 0\r
+ add ax,dx ; AX = 0 ... Wid\r
+\r
+ mov bx,ax\r
+ shl bx,1\r
+ cmp bx,cx\r
+ jb x_signed\r
+ sub ax,cx\r
+x_signed:\r
+\r
+ ; The following is an optimization which would slow down on\r
+ ; normal memory, but I believe it will be okay on VGA memory,\r
+ ; which is so incredibly slow. Basically, I've replaced all\r
+ ; "rep movsb"'s with a loop that first calculates "bx = di - si",\r
+ ; and then loops performing "mov ds:[si],es:[si+bx]". Why?\r
+ ; Because of several reasons, none of which I'm sure actually\r
+ ; help out, but they do make for smaller code. 1) It means that\r
+ ; I only have to maintain SI, and "DI" is maintained automatically\r
+ ; (because DI - SI should remain constant). 2) Don't have to\r
+ ; calculate DS. Not much gain here. 3) Because I'd already\r
+ ; unrolled the loops, and the "rep movsb"'s had become instead\r
+ ; "mov al, ds:[si] / mov es:[di], al / mov al, ds:[si + 1] /\r
+ ; mov es:[di + 1],al ... etc ... add si, 4 / add di, 4". In\r
+ ; other words, I wasn't using MOVSB anyway. The only advantage\r
+ ; I can see in MOVSB is that it doesn't have to store the answer\r
+ ; in AL so it could be slightly faster. By unrolling the loops,\r
+ ; I'd already made up for that, I think. 4) Normally, using\r
+ ; [SI + BX + 1] would incur a penalty of an additional clock\r
+ ; cycle (because it has to add two indexs + an offset). But\r
+ ; the VGA memory and the '86 CPU can multi-task, and the VGA\r
+ ; is very slow. So by the time the VGA is ready to write the\r
+ ; next byte, the one extra clock cycle has already passed.\r
+ ;\r
+ ; Am I right? Does this make things faster? I have no idea.\r
+ ; I haven't bothered to check both ways. Please let me know\r
+ ; if I've missed something important...\r
+ ;\r
+ ; Here's the calculation of BX. SI is already set.\r
+ ; si already = DrawPage.UpperLeftAddress\r
+ mov bx,cs:BlankPage.Address\r
+ sub bx,cs:DrawPage.Address\r
+\r
+ ; Now, converts SI into "1/4" units. I do all the calculations\r
+ ; in "1/4" scale and then scale back up, mostly because it saved\r
+ ; me some instructions elsewhere.\r
+ shr si,2\r
+ ; Stores this value of SI. This will be restored after doing\r
+ ; the top/bottom copying.\r
+ mov dx,si\r
+\r
+ ; Check if it's necessary to catch up the top or bottom.\r
+catchup_tb: cmp di,0\r
+ je catchup_tb_end\r
+ jl catchup_t\r
+catchup_b: ; COPY BOTTOM\r
+ ; Move SI to point at the bottom of the screen - # of rows\r
+ ; to update.\r
+ add si,((VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4) / 4\r
+ sub si,di\r
+ jmp copy_tb\r
+catchup_t: ; COPY_TOP\r
+ ; Leave SI, but add to the "pushed" value of SI the number of\r
+ ; rows that will be drawn. This prevents overlap between top\r
+ ; and right/left when moving diagonally. Also, DI = |DI|\r
+ neg di\r
+ add dx,di\r
+\r
+ ; Now do the actual copying. Shifts SI back into scale "1",\r
+ ; then performs an unrolled loop to copy the entire virtual\r
+ ; width * # of pixel rows. Since DI is already in "1/4" scale,\r
+ ; it is only decremented once for each four pixels drawn.\r
+copy_tb: shl si,2\r
+copy_tb_loop: mov cl,es:[si]\r
+ mov es:[si+bx],cl\r
+ mov cl,es:[si+1]\r
+ mov es:[si+bx+1],cl\r
+ mov cl,es:[si+2]\r
+ mov es:[si+bx+2],cl\r
+ mov cl,es:[si+3]\r
+ mov es:[si+bx+3],cl\r
+ add si,4\r
+ dec di\r
+ jnz copy_tb_loop\r
+catchup_tb_end:\r
+\r
+ ; Next, check to see if it's necessary to draw the right or\r
+ ; the left side.\r
+catchup_rl: cmp ax,0\r
+ je catchup_rl_end\r
+ jg catchup_l\r
+catchup_r: ; COPY RIGHT\r
+ ; Adds to the "pushed" SI the width of the screen, minus\r
+ ; the number of rows to be drawn.\r
+ neg ax\r
+ add dx,(VIRTUAL_WIDTH / 4) / 4\r
+ sub dx,ax\r
+catchup_l: ; COPY LEFT (or nothing)\r
+\r
+ ; Does the actual copying. First pops SI from its stored value\r
+ ; and shifts it back into scale "1"\r
+copy_rl: mov si,dx\r
+ shl si,2\r
+\r
+ ; This is a loop over BP -- which has already been set as\r
+ ; VIRTUAL_HEIGHT - (# of bytes drawn in vertical update)\r
+ ; Again, this loop is unrolled such that it does two rows @\r
+ ; 4 bytes each with every iteration.\r
+ ; This LEA instruction is just a quick MOV DI, SI + 2 *y\r
+ ; DI is used to push the next value of SI for each iteration\r
+ ; of the loop.\r
+copy_rl_loop: lea di,[si + 2*(VIRTUAL_WIDTH/4)]\r
+ mov cx,ax\r
+copy_rl_col: mov dl,es:[si]\r
+ mov es:[si+bx],dl\r
+ mov dl,es:[si+1]\r
+ mov es:[si+bx+1],dl\r
+ mov dl,es:[si+2]\r
+ mov es:[si+bx+2],dl\r
+ mov dl,es:[si+3]\r
+ mov es:[si+bx+3],dl\r
+ mov dl,es:[si+VIRTUAL_WIDTH/4]\r
+ mov es:[si+bx+VIRTUAL_WIDTH/4],dl\r
+ mov dl,es:[si+VIRTUAL_WIDTH/4+1]\r
+ mov es:[si+bx+VIRTUAL_WIDTH/4+1],dl\r
+ mov dl,es:[si+VIRTUAL_WIDTH/4+2]\r
+ mov es:[si+bx+VIRTUAL_WIDTH/4+2],dl\r
+ mov dl,es:[si+VIRTUAL_WIDTH/4+3]\r
+ mov es:[si+bx+VIRTUAL_WIDTH/4+3],dl\r
+ add si,4\r
+ dec cx\r
+ jnz copy_rl_col\r
+ mov si,di ; SI = pop (SI + VIRTUAL_WIDTH/4)\r
+ inc bp ; (BP is negative, so INC it)\r
+ jnz copy_rl_loop\r
+catchup_rl_end:\r
+\r
+ ; Switch back to all-draw mode.\r
+ mov dx,GC_INDEX\r
+ mov ax,ALL_DRAW_BITS\r
+ out dx,ax\r
+ ret\r
+Scroll ENDP\r
+\1a
\ No newline at end of file
--- /dev/null
+main.obj+lztimer.obj \r
+scroll; \r
--- /dev/null
+; SPRITE routines\r
+MAX_SPRITE EQU 100\r
+\r
+RECTANGLE STRUCT 2,NONUNIQUE\r
+ X WORD 0\r
+ Y WORD 0\r
+ Wid4 BYTE 0\r
+ Ht BYTE 0\r
+ Color BYTE 0\r
+ Next WORD 0\r
+ ; DrawMe is used to not bother with sprites that you know\r
+ ; are contained totally within another, allowing animated\r
+ ; eyes, etc to be stored in separate sprites. These will be\r
+ ; drawn to the local buffer but skipped when copying to the\r
+ ; screen, so if they are not TOTALLY contained, they will\r
+ ; just get clipped away.\r
+ DrawMe BYTE 1 ; default, yes draw me.\r
+ ; (Storage from this point on ... NEVER provide anything but\r
+ ; default for these values!)\r
+ address_virt WORD 0\r
+ address_buf WORD 0\r
+ next_line_virt WORD 0\r
+ next_line_buf WORD 0\r
+RECTANGLE ENDS\r
+\r
+SPRITE STRUCT 2, NONUNIQUE\r
+ RECTANGLE <> ; Contains rectangle info\r
+SPRITE ENDS\r
+\r
+EVEN\r
+rect5 SPRITE <<40 ,60 , 2,8, C_TRANSPARENT, 0 , 0>>\r
+rect4 SPRITE <<80 ,30 , 2,8, C_TRANSPARENT, offset rect5, 0>>\r
+rect3 SPRITE <<120,60 , 2,8, C_TRANSPARENT, offset rect4, 0>>\r
+rect2 SPRITE <<55 ,100, 2,8, C_TRANSPARENT, offset rect3, 0>>\r
+rect1 SPRITE <<105,100, 2,8, C_TRANSPARENT, offset rect2, 0>>\r
+\r
+rect6 SPRITE <<36 ,56 , 4,16, C_BLUE, offset rect1, 1>>\r
+rect7 SPRITE <<76 ,26 , 4,16, C_BLUE, offset rect6, 1>>\r
+rect8 SPRITE <<116,56 , 4,16, C_BLUE, offset rect7, 1>>\r
+rect9 SPRITE <<51 ,96 , 4,16, C_BLUE, offset rect8, 1>>\r
+rect10 SPRITE <<101,96 , 4,16, C_BLUE, offset rect9, 1>>\r
+\r
+;; Simply adding in these 5 rectangles (~20000 pixels for both\r
+;; drawing and erasing) really slows things down! That's why\r
+;; it's important to optimize the sprite drawing routines!\r
+rect11 SPRITE <<35 ,55 ,14,36, C_GREEN, offset rect10, 1>>\r
+rect12 SPRITE <<75 ,25 ,14,36, C_GREEN, offset rect11, 1>>\r
+rect13 SPRITE <<115,55 ,14,36, C_GREEN, offset rect12, 1>>\r
+rect14 SPRITE <<50 ,95 ,14,36, C_GREEN, offset rect13, 1>>\r
+rect15 SPRITE <<100,95 ,14,36, C_GREEN, offset rect14, 1>>\r
+\r
+FIRST_SPRITE EQU rect10\r
+\r
+EVEN\r
+AnimateSprites PROC near\r
+ ret\r
+ ; Blank out the draw page, by copying from the blank page\r
+ ; to the draw page all rectangles which had changed. The\r
+ ; blank page must always be entirely blank if this is going\r
+ ; to work!\r
+ mov di,cs:DrawPage.UpperLeftAddress\r
+ add di,cs:DrawPage.ScrollOffset\r
+ mov si,cs:BlankPage.UpperLeftAddress\r
+ add si,cs:BlankPage.ScrollOffset\r
+ mov bp,cs:BlankPage.Rectangles\r
+ call CopyRectangles\r
+\r
+ ; Now draw the sprites. Uses a temporary buffer to ensure\r
+ ; minimal drawing to the screen, but that's not really necessary,\r
+ ; if memory is at a minimum. It's just faster...\r
+ mov bp,offset FIRST_SPRITE\r
+ mov cs:DrawPage.Rectangles,bp\r
+ call do_fill_buffer\r
+ mov di,cs:DrawPage.UpperLeftAddress\r
+ add di,cs:DrawPage.ScrollOffset\r
+ mov bh,cs:DrawPage.AlignmentMask\r
+ mov bp,offset FIRST_SPRITE\r
+ jmp smart_rects ; "call"\r
+AnimateSprites ENDP\r
+\r
+smart_dest DW 0\r
+out_di DW 0\r
+out_si DW 0\r
+\r
+EVEN\r
+smart_rects PROC near\r
+ add di,cs:DrawPage.Address\r
+ mov ds,cs:segBuffer\r
+ mov es,cs:segVideo\r
+ mov dx,3c4h\r
+ mov al,02h\r
+ out dx,al\r
+ inc dx\r
+ mov cs:smart_dest,di\r
+\r
+ ; === Beginning of loop through rectangles! ===\r
+sp_nextrect:\r
+ cmp cs:[bp].RECTANGLE.DrawMe,1\r
+ jne sp_next\r
+ ; Draw this rectangle from the buffer to screen memory.\r
+ ; Calculate the output address.\r
+ mov si,cs:[bp].RECTANGLE.address_buf\r
+ mov di,cs:[bp].RECTANGLE.address_virt\r
+ add di,cs:smart_dest\r
+\r
+ ; Loop over 4 planes\r
+ mov bl,4\r
+sp_plane_loop: mov al,bh\r
+ out dx,al\r
+\r
+ mov cs:out_di,di\r
+ mov cs:out_si,si\r
+\r
+ ; Loop over height\r
+ mov ch,cs:[bp].RECTANGLE.Ht\r
+sp_row_loop:\r
+\r
+ ; Loop over width of rectangle (Wid4 is actually width/4)\r
+ mov cl,cs:[bp].RECTANGLE.Wid4\r
+sp_col_loop:\r
+\r
+ ; Read a byte from the buffer\r
+ ; Is it transparent (no-modify)? If so, just jump over the draw\r
+ mov al,byte ptr ds:[si]\r
+ cmp al,C_TRANSPARENT\r
+ je sp_next_pixel\r
+ ; Otherwise, draw it on the spreen, and mark it transparent\r
+ ; so that it won't be drawn again.\r
+ mov byte ptr es:[di],al\r
+ mov byte ptr ds:[si],C_TRANSPARENT\r
+\r
+ ; Skip to next 4-byte group (next column that can be drawn in\r
+ ; Mode X) Also increment spreen draw address, but only by 1\r
+ ; because ModeX is 4 pixels per byte\r
+sp_next_pixel:\r
+ add si,4\r
+ inc di\r
+\r
+ dec cl\r
+ jnz sp_col_loop\r
+\r
+ ; End of row. Skip space to get to left edge of next row down\r
+ ; Skip SI = (SCREEN_WIDTH - #bytesdrawn)\r
+ ; Only draw up to height of rectangle\r
+ add si,cs:[bp].RECTANGLE.next_line_buf\r
+ add di,cs:[bp].RECTANGLE.next_line_virt\r
+ dec ch\r
+ jnz sp_row_loop\r
+\r
+ mov di,cs:out_di\r
+ mov si,cs:out_si\r
+ inc si\r
+ rol bh,1\r
+ adc di,0\r
+\r
+ dec bl\r
+ jnz sp_plane_loop\r
+\r
+ ; Follow chain to next rectangle\r
+sp_next: mov bp,cs:[bp].RECTANGLE.Next\r
+ cmp bp,0\r
+ jne sp_nextrect\r
+ ; All done\r
+sp_end: ret\r
+smart_rects ENDP\r
+\r
+; BP -> first rectangle. Follows BP->next, stops when BP = 0\r
+EVEN\r
+do_fill_buffer PROC near\r
+ mov es,cs:segBuffer\r
+\r
+ cmp bp,0\r
+ je fill_end\r
+fill_loop:\r
+\r
+ mov bx,cs:[bp].RECTANGLE.Y\r
+ shl bx,1 ; BX = word index y\r
+ mov di,cs:MultBufWidth[bx] ; DI = SW * y\r
+ mov cx,cs:[bp].RECTANGLE.X ; CX = x\r
+ add di,cx ; DI = (SW * y) + x\r
+ mov cs:[bp].RECTANGLE.address_buf,di ; (DI used later)\r
+\r
+ mov ax,cs:MultVirtWidth[bx] ; AX = (VW/4) * y\r
+ shr cx,2 ; CX = (x / 4)\r
+ add ax,cx ; AX = (VW * y + x)/4\r
+ mov cs:[bp].RECTANGLE.address_virt,ax\r
+\r
+ mov dx,(VIRTUAL_WIDTH / 4)\r
+ sub dl,cs:[bp].RECTANGLE.Wid4 ; DX = (VW - w) / 4\r
+ mov cs:[bp].RECTANGLE.next_line_virt,dx\r
+\r
+ mov dx,(SCREEN_WIDTH / 4)\r
+ sub dl,cs:[bp].RECTANGLE.Wid4 ; DX = (SW - w) / 4\r
+ shl dx,2 ; DX = SW - w\r
+ mov cs:[bp].RECTANGLE.next_line_buf,dx\r
+\r
+ mov ah,cs:[bp].RECTANGLE.Color\r
+ mov al,cs:[bp].RECTANGLE.Color\r
+\r
+ mov ch,cs:[bp].RECTANGLE.Ht\r
+fill_row_loop: mov cl,cs:[bp].RECTANGLE.Wid4\r
+fill_col_loop: mov es:[di],ax\r
+ mov es:[di+2],ax\r
+ add di,4\r
+ dec cl\r
+ jnz fill_col_loop\r
+ add di,dx\r
+ dec ch\r
+ jnz fill_row_loop\r
+\r
+ mov bp,cs:[bp].RECTANGLE.Next\r
+ cmp bp,0\r
+ jne fill_loop\r
+fill_end: ret\r
+do_fill_buffer ENDP\r
+\r
+EVEN\r
+CopyRectangles PROC near\r
+ mov ax,cs:segVideo\r
+ mov ds,ax\r
+ mov es,ax\r
+\r
+ ; Calculate the difference between the source and destination\r
+ ; pages. Since in a movsb loop the two would remain a constant\r
+ ; distance apart, we can just calculate a displacement and then\r
+ ; not have to worry about SI; instead use DI and DI+BX, thanks\r
+ ; to the thoughtful x86 ALU!\r
+ mov bx,di\r
+ sub bx,si\r
+\r
+ mov dx,GC_INDEX\r
+ mov ax,ALL_COPY_BITS\r
+ out dx,ax\r
+\r
+ mov dx,SC_INDEX\r
+ mov ax,0F02h\r
+ out dx,ax\r
+ mov si,di ;store destination\r
+\r
+ ; === Beginning of loop through rectangles! ===\r
+cr_nextrect: cmp cs:[bp].RECTANGLE.DrawMe,1\r
+ jne cr_next\r
+ ; Draw this rectangle from the buffer to screen memory.\r
+ ; Calculate the output address.\r
+ mov di,cs:[bp].RECTANGLE.address_virt\r
+ mov dx,cs:[bp].RECTANGLE.next_line_virt\r
+ add di,si\r
+\r
+ ; Loop over height\r
+ mov ch,cs:[bp].RECTANGLE.Ht\r
+cr_row_loop:\r
+\r
+ ; Loop over width of rectangle (Wid4 is actually width/4)\r
+ mov cl,cs:[bp].RECTANGLE.Wid4\r
+cr_col_loop: mov al,ds:[di + bx]\r
+ stosb\r
+ dec cl\r
+ jnz cr_col_loop\r
+ mov al,ds:[di + bx]\r
+ mov es:[di],al\r
+\r
+ ; End of row. Skip space to get to left edge of next row down\r
+ ; Only draw up to height of rectangle\r
+ add di,dx\r
+ dec ch\r
+ jnz cr_row_loop\r
+\r
+ ; Follow chain to next rectangle\r
+cr_next: mov bp,cs:[bp].RECTANGLE.Next\r
+ cmp bp,0\r
+ jne cr_nextrect\r
+ ; All done\r
+cr_end:\r
+ mov dx,GC_INDEX\r
+ mov ax,ALL_DRAW_BITS\r
+ out dx,ax\r
+ ret\r
+CopyRectangles ENDP\r
+\r
+\1a
\ No newline at end of file