From: sparky4 <sparky4@4ch.maidlab.jp>
Date: Fri, 13 Jun 2014 17:06:51 +0000 (-0500)
Subject: 	modified:   16/DOS_GFX.EXE
X-Git-Url: http://4ch.mooo.com/gitweb/?a=commitdiff_plain;h=b50a0eb714c64dee65050539243e02ef2aa308b5;p=16.git

	modified:   16/DOS_GFX.EXE
	modified:   16/DOS_GFX.OBJ
	modified:   16/Project 16.bfproject
	modified:   16/dos_gfx.cpp
	modified:   16/dos_gfx.h
	new file:   16/lib/x/MODEX.BAK
	new file:   16/scrasm/80X86.ASC
	new file:   16/scrasm/80X86.TXT
	new file:   16/scrasm/CONSTANT.INC
	new file:   16/scrasm/DIAGONAL.MAP
	new file:   16/scrasm/DIAGONAL.PAL
	new file:   16/scrasm/DIAGONAL.TIL
	new file:   16/scrasm/GENMAP.C
	new file:   16/scrasm/GENMAP.EXE
	new file:   16/scrasm/GENMAP.LNK
	new file:   16/scrasm/GENMAP.OBJ
	new file:   16/scrasm/GENPAL.C
	new file:   16/scrasm/GENPAL.EXE
	new file:   16/scrasm/GENPAL.LNK
	new file:   16/scrasm/GENPAL.OBJ
	new file:   16/scrasm/GENSQ.C
	new file:   16/scrasm/GENSQ.EXE
	new file:   16/scrasm/GENSQ.LNK
	new file:   16/scrasm/GENSQ.OBJ
	new file:   16/scrasm/INIT.INC
	new file:   16/scrasm/KEYB.INC
	new file:   16/scrasm/LZTIMER.ASM
	new file:   16/scrasm/LZTIMER.OBJ
	new file:   16/scrasm/MAIN.ASM
	new file:   16/scrasm/MAIN.OBJ
	new file:   16/scrasm/MAKEFILE
	new file:   16/scrasm/MAP.INC
	new file:   16/scrasm/MODEX.INC
	new file:   16/scrasm/PAGE.INC
	new file:   16/scrasm/PALETTE.INC
	new file:   16/scrasm/SCROLL.DOC
	new file:   16/scrasm/SCROLL.EXE
	new file:   16/scrasm/SCROLL.INC
	new file:   16/scrasm/SCROLL.LNK
new file:   16/scrasm/SCROLL.MAP
	new file:   16/scrasm/SCROLL.PAL
	new file:   16/scrasm/SCROLL.TIL
	new file:   16/scrasm/SPRITE.INC
---

diff --git a/16/DOS_GFX.EXE b/16/DOS_GFX.EXE
index 13b89317..d4df1da3 100644
Binary files a/16/DOS_GFX.EXE and b/16/DOS_GFX.EXE differ
diff --git a/16/DOS_GFX.OBJ b/16/DOS_GFX.OBJ
index 35e3f4ed..f17a0b16 100644
Binary files a/16/DOS_GFX.OBJ and b/16/DOS_GFX.OBJ differ
diff --git a/16/Project 16.bfproject b/16/Project 16.bfproject
index b5009ceb..378d9de2 100644
--- a/16/Project 16.bfproject	
+++ b/16/Project 16.bfproject	
@@ -1,7 +1,7 @@
 c2e.convert_special: 0
 e2c.convert_num: 0
-openfiles: /dos/z/16/16/dos_gfx.cpp:8135:7436:1:
-openfiles: /dos/z/16/16/dos_gfx.h:327:0:0:
+openfiles: /dos/z/16/16/dos_gfx.cpp:1620:647:1:
+openfiles: /dos/z/16/16/dos_gfx.h:665:373:0:
 openfiles: /dos/z/16/16/dos_kb.c:1039:46:0:
 openfiles: /dos/z/16/16/dos_kb.h:23:0:0:
 openfiles: /dos/z/16/16/lib/lib_com.cpp:0:0:0:
@@ -80,25 +80,25 @@ recent_files: file:///dos/z/16/16/xx.bat
 recent_files: file:///dos/z/16/16/lib/x/MXPN.ASM
 recent_files: file:///dos/z/4x4_16/!/c/TUT10.C
 recent_files: file:///dos/z/16/16/lib/x/MXVS.ASM
-recent_files: file:///dos/z/16/16/lib/x/MODEX.H
-recent_files: file:///dos/z/4x4_16/modex/DEMO01.PAS
-recent_files: file:///dos/z/4x4_16/modex/DEMO07.PAS
+recent_files: file:///dos/z/16/16/lib/x/MAKEFILE
+recent_files: file:///dos/z/16/16/lib/x/MODEX.DEF
 recent_files: file:///dos/z/16/16/dos_gfx.h
+recent_files: file:///dos/z/16/16/dos_gfx.cpp
 recent_files: file:///dos/z/16/16/dos_kb.c
 recent_files: file:///dos/z/16/16/dos_kb.h
 recent_files: file:///dos/z/16/16/lib/lib_com.cpp
-recent_files: file:///dos/z/16/16/lib/lib_com.h
 recent_files: file:///dos/z/16/16/16.txt
+recent_files: file:///dos/z/16/16/lib/lib_com.h
 recent_files: file:///dos/z/16/16/scroll.txt
 recent_files: file:///dos/z/16/16/project16.txt
-recent_files: file:///dos/z/16/16/lib/intro/lib.c
-recent_files: file:///dos/z/16/src/lib/dos_gfx.h
+recent_files: file:///dos/z/16/16/lib/x/MODEX.H
+recent_files: file:///dos/z/4x4_16/modex/DEMO07.PAS
 recent_files: file:///dos/z/16/16/lib/x/MXBB.ASM
-recent_files: file:///dos/z/16/src/lib/dos_gfx.cpp
+recent_files: file:///dos/z/16/src/lib/dos_gfx.h
+recent_files: file:///dos/z/4x4_16/modex/DEMO01.PAS
 recent_files: file:///dos/z/16/16/lib/x/MXCR.ASM
-recent_files: file:///dos/z/16/16/dos_gfx.cpp
-recent_files: file:///dos/z/16/16/lib/x/MAKEFILE
-recent_files: file:///dos/z/16/16/lib/x/MODEX.DEF
+recent_files: file:///dos/z/16/16/lib/intro/lib.c
+recent_files: file:///dos/z/16/src/lib/dos_gfx.cpp
 snr_replacetype: 0
 savedir: file:///dos/z/16/16
 spell_check_default: 1
diff --git a/16/dos_gfx.cpp b/16/dos_gfx.cpp
index 41b7d533..52e259db 100644
--- a/16/dos_gfx.cpp
+++ b/16/dos_gfx.cpp
@@ -48,10 +48,11 @@ void setvideo(/*byte mode, */short vq){
 				mxSetMode( MX_320x240 );
 //				mxSetVirtualScreen(SW+(SW/4), SH+(SH/4));
 //				mxSetVirtualScreen(SW*2, SH*2);
-				mxSetVirtualScreen(VW,(VH+(TILEWH*BUFFMX)));
+				mxSetVirtualScreen(VW,BH);
 //				mxSetVirtualScreen((640-(TILEWH*4)),(480-(TILEWH*4)));
 				mxSetClip(true);
-				mxSetClipRegion(0, 0, VW, (VH+(TILEWH*BUFFMX)));
+				mxSetClipRegion(0, 0, VW, BH);
+				mxPan(TILEWH*2,TILEWH*2);
 				//mxSetClipRegion(0, VH+1, VW, (TILEWH*BUFFMX));
 		}
 }
@@ -212,17 +213,17 @@ short ding(int q){
 						}
 				}
 				// fixer
-				if(q!=16){
+				if(q!=16){
 					#ifdef TILE
 						if(xx<0) xx=(VW-TILEWH);
 						if(yy<0) yy=(VH-TILEWH);
 						if(xx>(VW-TILEWH)) xx=0;
-						if(yy>(VH-TILEWH)/*+(TILEWH*BUFFMX)*/) yy=0;
-					#else
+						if(yy>(VH-TILEWH)/*+(TILEWH*BUFFMX)*/) yy=0;
+					#else
 						if(xx<0) xx=VW;
 						if(yy<0) yy=VH;
 						if(xx>VW) xx=0;
-						if(yy>VH) yy=0;
+						if(yy>VH) yy=0;
 					#endif
 				}
 
@@ -272,8 +273,8 @@ int main(void)
 		// main variables
 		d=4; // switch variable
 		key=4; // default screensaver number
-		xpos=0;
-		ypos=0;
+		xpos=TILEWH*2;
+		ypos=TILEWH*2;
 		xdir=1;
 		ydir=1;
 		setvideo(1);
@@ -319,21 +320,21 @@ int main(void)
 				mxPutPixel(VW-1, y, 15);
 			}
 			
-			getch();
+			getch();
 			//text box
-			mxSetTextColor(10, OP_TRANS); //set font
-			mxBitBlt(xpos, ypos+(TILEWH*12), 320, TILEWH*BUFFMX, 0, VH); //copy background
-			mxFillBox(xpos, ypos+(TILEWH*12), 320, TILEWH*BUFFMX, 0, OP_SET); // background for text box
-			//+(QUADWH*6)
-			mxOutText(xpos+1, ypos+SH-48, "========================================");
+			mxSetTextColor(10, OP_TRANS); //set font
+			mxBitBlt(xpos, ypos+(TILEWH*12), 320, TILEWH*BUFFMX, 0, BS); //copy background
+			mxFillBox(xpos, ypos+(TILEWH*12), 320, TILEWH*BUFFMX, 0, OP_SET); // background for text box
+			//+(QUADWH*6)
+			mxOutText(xpos+1, ypos+SH-48, "========================================");
 			mxOutText(xpos+1, ypos+SH-40, "|    |Chikyuu:$line1");
 			mxOutText(xpos+1, ypos+SH-32, "|    |$line2");
 			mxOutText(xpos+1, ypos+SH-24, "|    |$line3");
-			mxOutText(xpos+1, ypos+SH-16, "|    |$line4");
-			mxOutText(xpos+1, ypos+SH-8,  "========================================");
-			mxFillBox(xpos+QUADWH, ypos+QUADWH+(TILEWH*12), TILEWH*2, TILEWH*2, 9, OP_SET);
-			getch();
-			mxBitBlt(0, VH, 320, TILEWH*BUFFMX, xpos, ypos+(TILEWH*12)); //copy background
+			mxOutText(xpos+1, ypos+SH-16, "|    |$line4");
+			mxOutText(xpos+1, ypos+SH-8,  "========================================");
+			mxFillBox(xpos+QUADWH, ypos+QUADWH+(TILEWH*12), TILEWH*2, TILEWH*2, 9, OP_SET); //portriat~
+			getch();
+			mxBitBlt(0, BS, 320, TILEWH*BUFFMX, xpos, ypos+(TILEWH*12)); //copy background
 			//mxBitBlt(0, (TILEWH*12)+1, 320, TILEWH*3, 0, 0);
 			getch();
 		while(!kbhit()){
@@ -341,18 +342,19 @@ int main(void)
 //			scrolly(1);
 //			vScroll(1);
 //			delay(100);
-			//for(int i=0;i<TILEWH;i++){
+			//for(int i=0;i<TILEWH;i++){
 				
 				ding(key);
 				mxPan(xpos,ypos);
 				//for(short o = 0; o<TILEWH; o++){
-					//xpos+=xdir;
-					//ypos+=ydir;
-					if(ypos==1 || (ypos==((VH+(TILEWH*BUFFMX))-SH-1)))delay(1000);
-					//mxWaitRetrace();
+					xpos+=xdir;
+					ypos+=ydir;
+					//if(ypos==1 || (ypos==(BH-SH-1)))delay(500);
+					//if((xpos>(VW-SW-1)) || (xpos<1))delay(500);
+					mxWaitRetrace();
 				//}
 				if( (xpos>(VW-SW-1))  || (xpos<1)){xdir=-xdir;}
-				if( (ypos>((VH+(TILEWH*BUFFMX))-SH-1)) || (ypos<1)){ydir=-ydir;} // { Hit a boundry, change
+				if( (ypos>(BH-SH-1)) || (ypos<1)){ydir=-ydir;} // { Hit a boundry, change
 			//    direction! }
 			}
 			ch=getch();
@@ -360,7 +362,8 @@ int main(void)
 			if(ch==0x1b)break; // 'ESC'
 		}
 		setvideo(0);
-		printf("wwww\nVirtual Resolution: %dx%d\n", VW,VH);
+		printf("wwww\nFull Buffer Virtual Resolution: %dx%d\n", VW,BH);
+		printf("Virtual Resolution: %dx%d\n", VW,VH);
 		printf("Resolution: %dx%d\n", SW,SH);
 		printf("Mode X Library Version: %d\n", mxGetVersion());
 		printf("bakapi ver. 1.04.09.04\nis made by sparky4iÖj feel free to use it ^^\nLicence: GPL v2\n");
diff --git a/16/dos_gfx.h b/16/dos_gfx.h
index b9045057..231c1061 100644
--- a/16/dos_gfx.h
+++ b/16/dos_gfx.h
@@ -21,8 +21,12 @@
 
 #define SW	320
 #define SH	240
-#define VW	560
-#define VH	416
+//#define VW	560
+//#define VH	416
+#define VW	(SW+64)
+#define VH	(SH+64)
+#define BS (VH*2) // buffer space! not BULLSHIT
+#define BH	BS+(TILEWH*BUFFMX) // buffer resolution
 
 //void drawChar(int x, int y, int color, byte c);
 //void drawText(int x, int y, int color, byte string);
diff --git a/16/lib/x/MODEX.BAK b/16/lib/x/MODEX.BAK
new file mode 100644
index 00000000..560a1c68
Binary files /dev/null and b/16/lib/x/MODEX.BAK differ
diff --git a/16/scrasm/80X86.ASC b/16/scrasm/80X86.ASC
new file mode 100644
index 00000000..e16af972
--- /dev/null
+++ b/16/scrasm/80X86.ASC
@@ -0,0 +1,164 @@
+_80x86 OPTIMIZATION_
+by Michael Abrash
+
+
+[LISTING ONE]
+
+; Copies one string to another string, converting all characters to
+; uppercase in the process, using a loop containing LODSB and STOSB.
+; Adapted from Zen of Assembly Language, by Michael Abrash; not a
+; standalone program, but designed to be used with the Zen timer from
+; that book via the Zen timer's PZTIME.BAT batch file: ZTimerOn starts
+; the clock, ZTimerOff stops it, and the test-bed program linked in by
+; PZTIME.BAT starts the program, reports the results, and ends.
+
+	jmp	Skip		;skip over data in CS and subroutine
+
+SourceString	label	word		;sample string to copy
+	db	'This space intentionally left not blank',0
+DestString	db	100 dup (?)	;destination for copy
+
+; Copies one zero-terminated string to another string,
+; converting all characters to uppercase.
+; Input: DS:SI = start of source string; DS:DI = start of destination buffer
+; Output: none
+; Registers altered: AX, BX, SI, DI, ES
+; Direction flag cleared
+
+CopyStringUpper:
+	mov	ax,ds
+	mov	es,ax	;for STOS
+	mov	bl,'a'	;set up for fast register-register
+	mov	bh,'z'	; comparisons
+	cld
+StringUpperLoop:
+	lodsb		;get next character and point to following character
+	cmp	al,bl	;below 'a'?
+	jb	IsUpper	;yes, not lowercase
+	cmp	al,bh	;above 'z'?
+	ja	IsUpper	;yes, not lowercase
+	and	al,not 20h ;is lowercase-make uppercase
+IsUpper:
+	stosb		;put character into new string and point to 
+                        ; following location
+	and	al,al	;is this the zero that marks end of the string?
+	jnz	StringUpperLoop ;no, do the next character
+	ret
+
+; Calls CopyStringUpper to copy & convert SourceString->DestString.
+Skip:
+	call	ZTimerOn		;start timing
+	mov	si,offset SourceString	;point SI to the string to copy from
+	mov	di,offset DestString	;point DI to the string to copy to
+	call	CopyStringUpper		;copy & convert to uppercase
+	call	ZTimerOff		;stop timing
+
+
+[LISTING TWO]
+
+; Copies one string to another string, converting all characters to
+; uppercase in the process, using no string instructions.
+; Not a standalone program, but designed to be used with the Zen
+; timer, as described in Listing 1.
+
+	jmp	Skip		;skip over data in CS and subroutine
+
+SourceString	label	word		;sample string to copy
+	db	'This space intentionally left not blank',0
+DestString	db	100 dup (?)	;destination for copy
+
+; Copies one zero-terminated string to another string,
+; converting all characters to uppercase. 
+; Input: DS:SI = start of source string; DS:DI = start of destination string
+; Output: none
+; Registers altered: AL, BX, SI, DI
+
+CopyStringUpper:
+	mov	bl,'a'	;set up for fast register-register
+	mov	bh,'z'	; comparisons
+StringUpperLoop:
+	mov	al,[si]	;get the next character and
+	inc	si	; point to the following character
+	cmp	al,bl	;below 'a'?
+	jb	IsUpper	;yes, not lowercase
+	cmp	al,bh	;above 'z'?
+	ja	IsUpper	;yes, not lowercase
+	and	al,not 20h ;is lowercase-make uppercase
+IsUpper:
+	mov	[di],al	;put the character into the new string and
+	inc	di	; point to the following location
+	and	al,al	;is this the zero that marks the end of the string?
+	jnz	StringUpperLoop ;no, do the next character
+	ret
+
+; Calls CopyStringUpper to copy & convert SourceString->DestString.
+Skip:
+	call	ZTimerOn
+	mov	si,offset SourceString	;point SI to the string to copy from
+	mov	di,offset DestString	;point DI to the string to copy to
+	call	CopyStringUpper		;copy & convert to uppercase
+	call	ZTimerOff
+
+
+[LISTING THREE]
+
+; Clears a buffer using MOV/ADD in a loop.
+; Not a standalone program, but designed to be used with the Zen
+; timer, as described in Listing 1.
+
+	mov	dx,2		;repeat the test code twice, to make
+				; sure it's in the cache (if there is one)
+	mov	bx,dx		;distance from the start of one word
+				; to the start of the next
+	sub	ax,ax		;set buffer to zeroes
+TestTwiceLoop:
+	mov	cx,1024		;clear 1024 words starting at address
+	mov	di,8000h	; DS:8000h (this is just unused memory
+				; past the end of the program)
+	call	ZTimerOn	;start timing (resets timer to 0)
+StoreLoop:
+	mov	[di],ax		;clear the current word
+	add	di,bx		;point to the next word
+	dec	cx		;count off words to clear until none
+	jnz	StoreLoop	; remain
+	call	ZTimerOff	;stop timing
+	dec	dx		;count off passes through test code
+	jz	StoreDone	;that was the second pass; we're done
+	jmp	TestTwiceLoop	;that was first pass; do second pass with all 
+                                ; instructions and data in the cache
+StoreDone:
+
+
+[LISTING FOUR]
+
+; Clears a buffer using MOV/ADD in an unrolled loop.
+; Not a standalone program, but designed to be used with the Zen
+; timer, as described in Listing 1.
+
+	mov	dx,2		;repeat the test code twice, to make
+				; sure it's in the cache (if there is one)
+	mov	bx,dx		;distance from the start of one word
+				; to the start of the next
+	sub	ax,ax		;set buffer to zeroes
+TestTwiceLoop:
+	mov	si,1024		;clear 1024 words starting at address
+	mov	di,8000h	; DS:8000h (this is just unused memory
+				; past the end of the program)
+	call	ZTimerOn	;start timing (resets timer to 0)
+	mov	cl,4		;divide the count of words to clear by
+	shr	si,cl		; 16, because we'll clear 16 words
+				; each time through the loop
+StoreLoop:
+	REPT	16		;clear 16 words in a row without looping
+	mov	[di],ax		;clear the current word
+	add	di,bx		;point to the next word
+	ENDM
+	dec	si		;count off blocks of 16 words to clear
+	jnz	StoreLoop	; until none remain
+	call	ZTimerOff	;stop timing
+	dec	dx		;count off passes through test code
+	jz	StoreDone	;that was the second pass; we're done
+	jmp	TestTwiceLoop	;that was the first pass; do the second pass 
+                                ; with all instructions and data in the cache
+StoreDone:
+
diff --git a/16/scrasm/80X86.TXT b/16/scrasm/80X86.TXT
new file mode 100644
index 00000000..afba3707
--- /dev/null
+++ b/16/scrasm/80X86.TXT
@@ -0,0 +1,494 @@
+Journal:   Dr. Dobb's Journal  March 1991 v16 n3 p16(8)
+-----------------------------------------------------------------------------
+Title:     80x86 optimization: aim down the middle and pray. (80x86 family of
+           microprocessors) (tutorial)
+Author:    Abrash, Michael.
+AttFile:    Program:  80X86.ASC  Source code listing.
+
+Summary:   Optimizing code for 8088, 80286, 80386 and 80486 microprocessors
+           is difficult because the chips use significantly different memory
+           architectures and instruction execution times.  Code cannot be
+           optimized for the 80x86 family; rather, code must be designed to
+           produce good performance on a range of systems or optimized for
+           particular combinations of processors and memory.  Programmers
+           must avoid the unusual instructions supported by the 8088, which
+           have lost their performance edge in subsequent chips.  String
+           instructions should be used but not relied upon.  Registers should
+           be used rather than memory operations.  Branching is also slow for
+           all four processors.  Memory accesses should be aligned to improve
+           performance.  Generally, optimizing an 80486 requires exactly the
+           opposite steps as optimizing an 8088.
+-----------------------------------------------------------------------------
+Descriptors..
+Company:   Intel Corp. (Products).
+Ticker:    INTC.
+Product:   Intel 80286 (Microprocessor) (Programming)
+           Intel 80386 (Microprocessor) (Programming)
+           Intel 80486 (Microprocessor) (Programming)
+           Intel 8088 (Microprocessor) (Programming).
+Topic:     Microprocessors
+           Optimization
+           Programming
+           Tutorial
+           Assembly Language
+           Guidelines
+           Type-In Programs
+           Microcode
+           Processor Architecture.
+Feature:   illustration
+           graph.
+Caption:   Official and actual cycles per binary-to-hex ASCII conversion.
+           (graph)
+           Actual performance in microseconds of two solutions to a problem.
+           (graph)
+           Actual performance of three clearing approaches across the 80x86
+           family. (graph)
+
+-----------------------------------------------------------------------------
+Full Text:
+
+Optimization
+
+Picture this: You're an archer aiming at a target 100 feet away.  A strong
+wind comes up and pushes each arrow to the left as it flies.  Naturally, you
+compensate by aiming farther to the right.  That's what it's like optimizing
+for the 8088; once you learn to compensate for the strong but steady effects
+of the prefetch queue and the 8-bit bus, you can continue merrily on your
+programming way.
+
+Now the wind starts gusting unpredictably.  There's no way to compensate, so
+you just aim for the bull's-eye and hope for the best.  That's what it's like
+writing code for good performance across the entire 80x86 family, or even for
+the 286/386SX/386 heart of today's market.  You just aim down the middle and
+pray.
+
+The New World of the 80x86
+
+In the beginning, the 8088 was king, and that was good.  The optimization
+rules weren't obvious, but once you learned them, you could count on them
+serving you well on every computer out there.
+
+Not so these days.  There are four major processor types--8088, 80286, 80386,
+and 80486--with a bewildering array of memory architectures: cached (in
+several forms), page mode, static-column RAM, interleaved, and, of course,
+the 386SX, with its half-pint memory interface.  The processors offer wildly
+differing instruction execution times, and memory architectures warp those
+times further by affecting the speed of instruction fetching and access to
+memory operands.  Because actual performance is a complex interaction of
+instruction characteristics, instruction execution times, and memory access
+speed, the myriad processor-memory combinations out there make "exact
+performance" a meaningless term.  A specific instruction sequence may run at
+a certain speed on a certain processor in a certain system, but that often
+says little about the performance of the same instructions on a different
+processor, or even on the same processor with a different memory system.  The
+result: Precise optimization for the general PC market is a thing of the
+past.  (We're talking about optimizing for speed here; optimizing for size is
+the same for all processors so long as you stick to 8088-compatible code.)
+
+So there is no way to optimize performance ideally across the 80x86 family.
+An optimization that suits one processor beautifully is often a dog on
+another.  Any 8088 programmer would instinctively replace:
+
+DEC  CX JNZ  LOOPTOP
+
+with:
+
+LOOP  LOOPTOP
+
+because LOOP is significantly faster on the 8088.  LOOP is also faster on the
+286.  On the 386, however, LOOP is actually two cycles slower than DEC/JNZ.
+The pendulum swings still further on the 486, where LOOP is about twice as
+slow as DEC/JNZ--and, mind you, we're talking about what was originally
+perhaps the most obvious optimization in the entire 80x86 instruction set.
+
+In short, there is no such thing as code that's truly optimized for the
+80x86.  Instead, code is either optimized for specific processor-memory
+combinations, or aimed down the middle, designed to produce good performance
+across a range of systems.  Optimizing for the 80x86 family by aiming down
+the middle is quite different from optimizing for the 8088, but many PC
+programmers are inappropriately still applying the optimization lore they've
+learned over the years on the PC (or AT).  The world has changed, and many of
+those old assumptions and tricks don't hold true anymore.
+
+You will not love the new world of 80x86 optimization, which is less precise
+and offers fewer clever tricks than optimizing for the 8088 alone.  Still,
+isn't it better to understand the forces affecting your code's performance
+out in the real world than to optimize for a single processor and hope for
+the best?
+
+Better, yes.  As much fun, no.  Optimizing for the 8088 was just about as
+good as it gets.  So it goes.
+
+Optimization Rules for a New World
+
+So, how do you go about writing fast code nowadays?  One way is to write
+different versions of critical code for various processors and memory access
+speeds, selecting the best version at runtime.  That's a great solution, but
+it requires an awful lot of knowledge and work.
+
+An alternative is to optimize for one particular processor and settle for
+whatever performance you get on the others.  This might make sense when the
+8088 is the target processor because it certainly needs the optimization more
+than any other processor.  However, 8088 optimization works poorly at the
+upper end of the 80x86 family.
+
+Nowadays, though, most of us want to optimize for the 286 and 386 systems
+that dominate the market, or across all 80x86 processors, and that's a tough
+nut to crack.  The 286 and 386 come in many configurations, and you can be
+sure, for example, that a 386SX, an interleaved 386, and a cached 386 have
+markedly different performance characteristics.  There are, alas, no hard and
+fast optimization rules that apply across all these environments.
+
+My own approach to 80x86 optimization has been to develop a set of general
+rules that serve reasonably well throughout the 80x86 line, especially the
+286 and 386, and to select a specific processor (in my case a cached 386, for
+which cycle times tend to be accurate) to serve as the tiebreaker when
+optimization details vary from one processor to another.  (Naturally, it's
+only worth bothering with these optimizations in critical code.)  The rules
+I've developed are:
+
+* Avoid accessing memory operands; use the registers to the max.
+
+* Don't branch.
+
+* Use string instructions, but don't go much out of your way to do so.
+
+* Keep memory accesses to a minimum by avoiding memory operands and keeping
+instructions short.
+
+* Align memory accesses.
+
+* Forget about many of those clever 8088 optimizations, using oddball
+instructions such as DAA and XLAT, that you spent years learning.
+
+Next I'll discuss each of these rules in turn in the context of
+8088-compatible real mode, which is still the focus of the 80x86 world.
+Later, I'll touch on protected mode.
+
+Let's start by looking at the last--and most surprising--rule.
+
+Kiss Those Tricks Goodbye
+
+To skilled assembly language programmers, the 8088 is perhaps the most
+wonderful processor ever created, largely because the instruction set is
+packed with odd instructions that are worthless to compilers but can work
+miracles in the hands of clever assembly programmers.  Unfortunately, each
+new generation of the 80x86 has rendered those odd instructions and marvelous
+tricks less desirable.  As the execution time for the commonly used
+instruction ADD BX, 4 has gone down from four cycles (8088) to three cycles
+(286) to two cycles (386) to one cycle (486), the time for the less
+frequently used instruction CBW has gone from two cycles (8088 and 286) up to
+three cycles (386 and 486)!
+
+Consider this ancient optimization for converting a binary digit to hex
+ASCII:
+
+ADD  AL,90H DAA ADC  AL,40H DAA
+
+Now consider the standard alternative:
+
+ADD  AL,'0' CMP  AL,'9' JBE  HaveAscii ADD  AL,'A'-('9'+1) HaveAscii:
+
+As Figure 1 indicates, the standard code should be slower on an 8088 or 286,
+but faster on a 386 or a 486--and real-world tests confirm those results, as
+shown in Figure 2.  (All "actual performance" timings in this article were
+performed with the Zen timer from Zen of Assembly Language, see "References"
+for details.  The systems used for the tests were: 8088, standard 4.77 MHz PC
+XT; 80286, standard one-wait-state, 8 MHz PC AT; 386SX, 16 MHz noncached;
+80386, 20 MHz externally cached with all instructions and data in external
+cache for all tests except Listings One and Two; 80486, 25 MHz internally
+cached, with all instructions and data in internal cache for all tests except
+Listings One and Two.)
+
+In other words, this nifty, time-tested optimization is an anti-optimization
+on the 386 and 486.
+
+Why is this?  On the 386, DAA--a rarely used instruction--takes four cycles,
+and on the 486 it takes two cycles, in both cases twice as long as the more
+common instructions CMP and ADD; in contrast, on the 8088 all three
+instructions are equally fast at four cycles.  Also, the instruction-fetching
+advantage that the 1-byte DAA provides on the 8088 means nothing on a cached
+386.
+
+Nor is this an isolated example.  Most oddball instructions, from AAA to
+XCHG, have failed to keep pace with the core instructions--ADC, ADD, AND,
+CALL, CMP, DEC, INC, Jcc, JMP, LEA, MOV, OR, POP, PUSH, RET, SBB, SUB, TEST,
+and XOR--during the evolution from 8088 to 486.  As we saw earlier, even LOOP
+lags behind on the 386 and 486.  Check your favorite tricks for yourself;
+they might or might not hold up on the 386, but will most likely be
+liabilities on the 486.  Sorry, but I just report the news, and the news is:
+Kiss most of those tricks goodbye as the 386 and 486 come to dominate the
+market.  (This means that hand-optimization in assembly language yields less
+of a performance boost nowadays than it did when the 8088 was king; the
+improvement is certainly significant, but rarely in the 200-500 percent range
+anymore.  Sic transit gloria mundi.)  Most startling of all, string
+instructions lose much of their allure as we move away from the 8088, hitting
+bottom on the 486.
+
+The 486: All the Rules Change
+
+The 486 represents a fundamental break with 8088-style optimization.
+Virtually all the old rules fail on the 486, where, incredibly, a move to or
+from memory often takes just one cycle, but exchanging two registers takes
+three cycles.  The nonbranching core instructions mentioned earlier take only
+one cycle on the 486 when operating on registers; MOV can, under most
+conditions, access memory in one cycle; and CALL and JMP take only three
+cycles, given a cache hit.  However, noncore instructions take considerably
+longer.  XLAT takes four cycles; even STC and CLC take two cycles each.  The
+486's highly asymmetric execution times heavily favor core instructions and
+defeat most pre-486 optimizations.
+
+Core instructions do have a weakness on the 486.  While 486 MOVs involving
+memory are remarkably fast, accessing memory for an operand to OR, ADD, or
+the like costs cycles.  Even with the 8K internal cache, memory is not as
+fast as registers, except when MOV is used (and sometimes not even then), so
+registers are still preferred operands.  (AND [BX],1 is fast, at only three
+cycles, but AND BX,1 takes only one cycle--three times as fast.)
+
+OUT should be avoided whenever possible on the 486, and likewise for IN.  OUT
+takes anywhere from 10 to 31 cycles, depending on processor mode and
+privileges, more than an order of magnitude slower than MOV.  The lousy
+performance of OUT -- true on the 386 as well -- has important implications
+for graphics applications.
+
+String instructions are so slow on the 486 that you should check cycle times
+before using any string instruction other than the always superior REP MOV's.
+For example, LODSB takes five cycles on the 486, but MOV AL,[SI]/INC SI takes
+only two cycles; likewise for STOSB and MOV [DI],AL/INC DI.  Listing One
+(page 73) uses LODSB/STOSB to copy a string, converting lowercase to
+uppercase while copying; Listing Two (page 73) uses MOV/INC instead.  Figure
+3 summarizes the performance of the two routines on a variety of processors;
+note the diminishing effectiveness of string instructions on the newer
+processors.  Think long and hard before using string instructions other than
+REP MOVS on the 486.
+
+Optimization for the 486 is really a whole new ball game.  When optimizing
+across the 80x86 family, the 486 will generally be the least of your worries
+because it is so much faster than the rest of the family; anything that runs
+adequately on any other processor will look terrific on the 486.  Still, the
+future surely holds millions of 486s, so it wouldn't hurt to keep one eye on
+the 486 as you optimize.
+
+String Instructions: Fading Stars
+
+On the 8088, string instructions are so far superior to other instructions
+that it's worth going to great lengths to use them, but they lose much of
+that status on newer processors.  One of the best things about string
+instructions on the 8088 is that they require little instruction fetching,
+because they're 1-byte instructions and because of the REP prefix; however,
+instruction fetching is less of a bottleneck on newer processors.  String
+instructions also have superior cycle times on the 8088, but that advantage
+fades on the 286 and 386 as well.
+
+On the 286, string instructions (when they do exactly what you need) are
+still clearly better than the alternatives.  On the 386, however, some string
+instructions are, even under ideal circumstances, the best choice only by a
+whisker, if at all.  For example, since Day One, clearing a buffer has been
+done with REP STOS.  That's certainly faster than the looping MOV/ADD
+approach shown in Listing Three (page 73), but on the 386 and 486 it's no
+faster than the unrolled loop MOV/ADD approach of Listing Four (page 73), as
+shown in Figure 4.  (Actually, in my tests REP STOS was a fraction of a cycle
+slower on the 386, and fractionally faster on the 486.)  REP STOS is much
+easier to code and more compact, so it's still the approach of choice for
+buffer clearing--but it's not necessarily fastest on a 486 or fast-memory
+386.  This again demonstrates just how unreliable the old optimization rules
+are on the newer processors.
+
+The point is not that you shouldn't use string instructions on the 386.  REP
+MOVs is the best way to move data, and the other string instructions are
+compact and usually faster, especially on uncached systems.  However, on the
+386 it's no longer worth going to the trouble of juggling registers and
+reorganizing data structures to use string instructions.  Furthermore, when
+you truly need maximum performance on the 386, check out nonstring
+instructions in unrolled loops.  It goes against every lesson learned in a
+decade of 8088 programming, but avoiding string instructions sometimes pays
+on the 386.
+
+The Siren Song of Memory Accesses
+
+Finally, here's a rule that's constant from the 8088 to the 486: Use the
+registers.  Avoid memory.
+
+Don't be fooled by the much faster memory access times of the 286 and 386.
+The effective address calculation time of the 8088 is mostly gone, so MOV
+AX,[BX] takes only five cycles on the 286, and ADD [SI],DX takes only seven
+on the 386.  That's so much faster than the 17 and 29 cycles, respectively,
+that they take on the 8088 that you might start thinking that memory is
+pretty much interchangeable with registers.
+
+Think again.  MOV AX,BX is still more than twice as fast as MOV AX,[BX] on
+the 286, and ADD SI,DX is more than three times as fast as ADD [SI],DX on the
+386.  Memory operands can also reduce performance by slowing instruction
+fetching.  Memory is fast on the 286 and 386.  Registers are faster.  Use
+them as heavily as possible.
+
+Don't Branch
+
+Here's another rule that stays the same across the 80x86 family: Don't
+branch.  Branching suffers on the 8088 from lengthy cycle counts and emptying
+the prefetch queue.  Emptying the prefetch queue is a lesser but nonetheless
+real problem in the post-8088 world, and the cycle counts of branches are
+still killers.  As Figure 4 indicates, it pays to eliminate branches by
+unrolling loops or using repeated string instructions.
+
+Modern-Day Instruction Fetching
+
+Instruction fetching is the bugbear of 8088 performance; the 8088 simply
+can't fetch instruction bytes as quickly as it can execute them, thanks to
+its undersized bus.  Minimizing all memory accesses, including instruction
+fetches, is paramount on the 8088.
+
+Instruction fetching is less of a problem nowadays.  Figure 5 shows the
+maximum rates at which various processors can fetch instruction bytes;
+clearly, matters have improved considerably since the 8088, although
+instructions also execute in fewer cycles on the newer processors.  Fetching
+problems can occur on any 80x86 processor, even the 486, but the only
+processors other than the 8088 that face major instruction fetching problems
+are the one-wait-state 286 and the 386SX, although uncached 386s may also
+outrun memory.  However, the problems here are different from and less
+serious than with the 8088.
+
+Consider: An 8088 executes a register ADD in three cycles, but requires eight
+cycles to fetch that instruction, a fetch/execute ratio of 2.67.  A
+one-wait-state 286 requires three cycles to fetch a register ADD and executes
+it in two cycles, a ratio of 1.5.  A 386SX can fetch a register ADD in two
+cycles, matching the execution time nicely, and a cached 386 can fetch two
+register ADDs in the two cycles it takes to execute just one.  For
+register-only code--the sort of code critical loops should contain--the 386
+generally runs flat out, and the 286 and 386SX usually (not always, but
+usually) outrun memory by only a little at worst.  Greater fetching problems
+can arise when working with large instructions or instruction sequences that
+access memory nonstop, but those are uncommon in critical code.  This is a
+welcome change from the 8088, where small, register-only instructions tend to
+suffer most from inadequate instruction fetching.
+
+Also, uncached 386 systems often use memory architectures that provide
+zero-wait-state performance when memory is accessed sequentially.  In
+register-only code, instruction fetches are the only memory accesses, so
+fetching proceeds at full speed when the registers are used heavily.
+
+So, is instruction fetching a problem in the post-8088 world?  Should
+instructions be kept short?
+
+Yes.  Smaller instructions can help considerably on the one-wait-state 286
+and on the 386SX.  Not as much as on the 8088, but it's still worth the
+trouble.  Even a cached 386 can suffer from fetching problems, although
+that's fairly uncommon.  For example, when several MOV WORD PTR [MemVar],0
+instructions are executed in a row, as might happen when initializing memory
+variables, performance tends to fall far below rated speed, as shown in
+Figure 6.  The particular problem with MOV WORD PTR [MemVar],0 is that it
+executes in just two (386) or three (286) cycles, yet has both an addressing
+displacement field and a constant field.  This eats up memory bandwidth by
+requiring more instruction fetching.  It also accesses memory, eating up
+still more bandwidth.  We'll see this again, and worse, when we discuss
+protected mode.
+
+Generally, though, post-8088 processors with fast memory systems and
+full-width buses run most instructions at pretty near their official cycle
+times; for these systems, optimization consists mostly of counting cycles.
+Slower memory or constricted buses (as in the 386SX) require that memory
+accesses (both instruction fetches and operand accesses) be minimized as
+well.  Fortunately, the same sort of code--register only--meets both
+requirements.
+
+Use the registers.  Avoid constants.  Avoid displacements.  Don't branch.
+That's the big picture.  Don't sweat the details.
+
+Alignment: The Easy Optimization
+
+The 286, 386SX, and 386 take twice as long to access memory words at odd
+addresses as at even addresses.  The 386 takes twice as long to access memory
+dwords at addresses that aren't multiples of four as those that are.  You
+should use ALIGN 2 to word align all word-sized data, and ALIGN 4 to dword
+align all data that's accessed as a dword operand, as in:
+
+ALIGN  4 MemVar  dd  ? : MOV EAX,[MemVar]
+
+Alignment also applies to code; you may want to word or dword align the
+starts of procedures, labels that can only be reached by branching, and the
+tops of loops.  (Code alignment matters only at branch targets, because only
+the first instruction fetch after a branch can suffer from nonalignment.)
+Dword alignment of code is optimal, and will help on the 386 even in real
+mode, but word alignment will produce nearly as much improvement as dword
+alignment without wasting nearly as many bytes.
+
+Alignment improves performance on many 80x86 systems without hindering it on
+any.  Recommended.
+
+Protected Mode
+
+There are two sorts of protected mode, 16-bit and 32-bit.  The primary
+optimization characteristic of 16-bit protected mode (OS/2 1.X, Rational DOS
+Extender) is that it takes an ungodly long time to load a segment register
+(for example, MOV ES,AX takes 17 cycles on a 286) so load segment registers
+as infrequently as possible in 16-bit protected mode.
+
+Optimizing for 32-bit protected mode (OS/2 2.0, SCO Unix, Phar Lap DOS
+Extender) is another matter entirely.  Typically, no segment loads are needed
+because of the flat address space.  However, 32-bit protected mode code can
+be bulky, and that can slow instruction fetching.  Constants and addressing
+displacements can be as large as 4 bytes each, and an extra byte, the SIB
+byte, is required whenever two 32-bit registers are used to address an
+operand or scaled addressing is used.  So, for example, MOV DWORD PTR
+[MemVar],0 is a 10-byte instruction in 32-bit protected mode.  The
+instruction is supposed to execute in two cycles, but even a 386 needs four
+to six cycles to fetch it, plus another two cycles to access memory; a few
+such instructions in a row can empty the prefetch queue and slow performance
+considerably.  The slowdown occurs more quickly and is more acute on a 386SX,
+which needs 14 cycles to perform the memory accesses for this nominally
+2-cycle instruction.
+
+Code can get even larger when 32-bit instructions are executed in 16-bit
+segments, adding prefix bytes.  (Avoid prefix bytes if you can; they increase
+instruction size and can cost cycles.)  Figure 7 shows actual versus nominal
+cycle times of multiple MOV DWORD PTR [EBX*4+MemVar],0 instructions running
+in a 16-bit segment.  Although cache type (write-back, write-through) and
+main-memory write time also affect the performance of stores to memory, there
+is clearly a significant penalty for using several large (in this case,
+13-byte) instructions in a row.
+
+Fortunately, this is a worst case, easily avoided by keeping constants and
+displacements out of critical loops.  For example, you should replace:
+
+ADDLOOP: MOV  DWORD PTR BaseTable[EDX+EBX],0 ADD  EBX,4 DEC  ECX JNZ  ADDLOOP
+
+with:
+
+LEA  EBX,BaseTable[EDX+EBX] SUB  EAX,EAX ADDLOOP: MOV  [EBX],EAX ADD  EBX,4
+DEC  ECX JNZ  ADDLOOP
+
+Better yet, use REP STOSD or unroll the loop!
+
+Happily, register-only instructions are no larger in 32-bit protected mode
+than otherwise and run at or near their rated speed in 32-bit protected mode
+on all processors.  All in all, in protected mode it's more important than
+ever to avoid large constants and displacements and to use the registers as
+much as possible.
+
+Conclusion
+
+Optimization across the 80x86 family isn't as precise as 8088 optimization,
+and it's a lot less fun, with fewer nifty tricks and less spectacular
+speed-ups.  Still, familiarity with the basix 80x86 optimization rules can
+give you a decided advantage over programmers still laboring under the
+delusion that the 286, 386, and 486 are merely faster 8088s.
+
+References
+
+Abrash, Michael.  Zen of Assembly Language.  Glenview, Ill.: Scott, Foresman,
+1990.
+
+Barrenechea, Mark.  "Peak Performance: On to the 486."  Programmer's Journal,
+(November-December 1990).
+
+Paterson, Tim.  "Assembly Language Tricks of the Trade."  Dr. Dobb's Journal
+(March 1990).
+
+Turbo Assembler Quick Reference Guide.  Borland International, 1990.
+
+i486 Microprocessor Programmer's Reference Manual.  Intel Corporation, 1989.
+
+80386 Programmer's Reference Manual.  Intel Corporation, 1986.
+
+Microsystems Components Handbook: Microprocessors Volume I.  Intel
+Corporation, 1985.
diff --git a/16/scrasm/CONSTANT.INC b/16/scrasm/CONSTANT.INC
new file mode 100644
index 00000000..02ce404b
--- /dev/null
+++ b/16/scrasm/CONSTANT.INC
@@ -0,0 +1,127 @@
+PEL_READ_REG    EQU     03C7h   ;Color register, read address
+PEL_WRITE_REG   EQU     03C8h   ;Color register, write address
+PEL_DATA_REG    EQU     03C9h   ;Color register, data port
+SC_INDEX        equ     03C4h   ;Sequence Controller Index
+CRTC_INDEX      equ     03D4h   ;CRT Controller Index
+MISC_OUTPUT     equ     03C2h   ;Miscellaneous Output register
+SCREEN_SEG      equ     0a000h  ;segment of display memory in mode X
+INPUT_STATUS_1  equ     03DAh   ;Input Status 1 register
+ATC_INDEX       equ     03C0h   ;Attribute Controller
+START_ADDRESS_HIGH equ  0Ch     ;bitmap start address high byte
+START_ADDRESS_LOW equ   0Dh     ;bitmap start address low byte
+GC_INDEX        EQU     03CEh
+BIT_MASK        EQU     08h
+MAP_MASK        EQU     02h
+
+ALL_COPY_BITS   EQU     00000h+BIT_MASK
+ALL_DRAW_BITS   EQU     0FF00h+BIT_MASK
+
+SQUARE_WIDTH    EQU     16
+SQUARE_HEIGHT   EQU     16
+SCREEN_WIDTH    EQU     320
+SCREEN_HEIGHT   EQU     240
+VIRTUAL_WIDTH   EQU     352
+VIRTUAL_HEIGHT  EQU     240
+
+PAGE_0          EQU     0
+PAGE_1          EQU     05540h  ;05470h  ;5540h
+PAGE_2          EQU     0AA80h  ;0A8E0h  ;AA80h
+
+SCROLL_SPEED    EQU     1               ; Don't let it go above 8!
+MAGIC_NUM       EQU     100
+
+CPU8086         EQU     0
+CPU80286        EQU     1
+CPU80386        EQU     2
+CPU80486        EQU     3
+
+;======================================================================
+;                           Key Assignments
+;======================================================================
+kESC            EQU       2
+kONE            EQU       4
+kTWO            EQU       6
+kTHREE          EQU       8
+kFOUR           EQU      10
+kFIVE           EQU      12
+kSIX            EQU      14
+kSEVEN          EQU      16
+kEIGHT          EQU      18
+kNINE           EQU      20
+kZERO           EQU      22
+kMINUS          EQU      24
+kEQUAL          EQU      26
+kBACKSPACE      EQU      28
+kTAB            EQU      30
+kQ              EQU      32
+kW              EQU      34
+kE              EQU      36
+kR              EQU      38
+kT              EQU      40
+kY              EQU      42
+kU              EQU      44
+kI              EQU      46
+kO              EQU      48
+kP              EQU      50
+kL_BRACE        EQU      52
+kR_BRACE        EQU      54
+kENTER          EQU      56
+kCTRL           EQU      58
+kA              EQU      60
+kS              EQU      62
+kD              EQU      64
+kF              EQU      66
+kG              EQU      68
+kH              EQU      70
+kJ              EQU      72
+kK              EQU      74
+kL              EQU      76
+kSEMICOLON      EQU      78
+kQUOTE          EQU      80
+kBACKQUOTE      EQU      82
+kL_SHIFT        EQU      84
+kBACKSLASH      EQU      86
+kZ              EQU      88
+kX              EQU      90
+kC              EQU      92
+kV              EQU      94
+kB              EQU      96
+kN              EQU      98
+kM              EQU     100
+kCOMMA          EQU     102
+kPERIOD         EQU     104
+kSLASH          EQU     106
+kR_SHIFT        EQU     108
+kGREY_STAR      EQU     110
+kALT            EQU     112
+kSPACE          EQU     114
+kCAPSLOCK       EQU     116
+kF1             EQU     118
+kF2             EQU     120
+kF3             EQU     122
+kF4             EQU     124
+kF5             EQU     126
+kF6             EQU     128
+kF7             EQU     130
+kF8             EQU     132
+kF9             EQU     134
+kF10            EQU     136
+kNUMLOCK        EQU     138
+kSCRLLOCK       EQU     140
+kHOME           EQU     142
+kUP             EQU     144
+kPAGE_UP        EQU     146
+kGREY_MINUS     EQU     148
+kLEFT           EQU     150
+kPAD_FIVE       EQU     152
+kRIGHT          EQU     154
+kGREY_PLUS      EQU     156
+kEND            EQU     158
+kDOWN           EQU     160
+kPAGE_DOWN      EQU     162
+kINSERT         EQU     164
+kDELETE         EQU     166
+
+kF11            EQU     174
+kF12            EQU     176
+
\ No newline at end of file
diff --git a/16/scrasm/DIAGONAL.MAP b/16/scrasm/DIAGONAL.MAP
new file mode 100644
index 00000000..1fb5529e
Binary files /dev/null and b/16/scrasm/DIAGONAL.MAP differ
diff --git a/16/scrasm/DIAGONAL.PAL b/16/scrasm/DIAGONAL.PAL
new file mode 100644
index 00000000..5dee1969
Binary files /dev/null and b/16/scrasm/DIAGONAL.PAL differ
diff --git a/16/scrasm/DIAGONAL.TIL b/16/scrasm/DIAGONAL.TIL
new file mode 100644
index 00000000..95eee9f2
Binary files /dev/null and b/16/scrasm/DIAGONAL.TIL differ
diff --git a/16/scrasm/GENMAP.C b/16/scrasm/GENMAP.C
new file mode 100644
index 00000000..01652048
--- /dev/null
+++ b/16/scrasm/GENMAP.C
@@ -0,0 +1,99 @@
+#include <stdio.h>
+#include <string.h>
+#include <memory.h>
+#include <stdlib.h>
+
+#define WIDTH   255
+
+#define MAPNAME "Diagonal"
+#define FILENAME "%s.MAP"
+char    fn[100] = FILENAME;
+typedef unsigned char BYTE;
+typedef unsigned short int WORD;
+typedef BYTE    ROW[WIDTH];
+
+ROW     r;
+
+#define MAGIC_NUM       100
+#define SQUARE_WIDTH    16
+#define SQUARE_HEIGHT   16
+#define SCREEN_WIDTH    320
+#define SCREEN_HEIGHT   200
+#define VIRTUAL_WIDTH   352
+#define VIRTUAL_HEIGHT  240
+typedef struct MAPHEADER {
+        BYTE    name[12];       /* 12    Includes [n]=0 and [n+1]=26 */
+        WORD    width;          /*  2                                */
+        WORD    height;         /*  2                                */
+        WORD    extent;         /*  2                                */
+        WORD    off_x1;         /*  2                                */
+        WORD    off_y1;         /*  2                                */
+        WORD    off_x2;         /*  2                                */
+        WORD    off_y2;         /*  2                                */
+        WORD    x_wrap;         /*  2                                */
+        WORD    y_wrap;         /*  2                                */
+        WORD    magic;          /*  2                                */
+        } MAPHEADER, far *LPMAPHEADER;
+MAPHEADER mh;
+
+void main(int argc, char *argv[])
+        {
+        FILE    *fp;
+        int     i,j;
+        BYTE    b;
+        int     width = WIDTH;
+        int     height = WIDTH;
+
+        if (argc > 1) {
+                width = atoi(argv[1]);
+                if (width > WIDTH)
+                        width = WIDTH;
+                printf("Width = %d\n",width);
+                height=width;
+                if (argc > 2) {
+                        height = atoi(argv[2]);
+                        if (height > WIDTH)
+                                height = WIDTH;
+                        printf("Height = %d\n",height);
+                        }
+                }
+
+        sprintf(fn,FILENAME,MAPNAME);
+        fp = fopen(fn,"wb");
+        if (!fp) {
+                printf("Couldn't open %s for write.\n",fn);
+                exit(1);
+                }
+
+        memset(&mh, 0xFF, sizeof(MAPHEADER));   /* Will reveal missing initializing */
+        strcpy((char *)mh.name,MAPNAME);
+        mh.name[8]=0;
+        mh.name[9]=26;  /* Ctrl-Z */
+        mh.width = (WORD)width;
+        mh.height = (WORD)height;
+        mh.extent = (WORD)((WORD)width * (WORD)height);
+        mh.off_x1 = (WORD)0;
+        mh.off_y1 = (WORD)0;
+        mh.off_x2 = (WORD)(((VIRTUAL_WIDTH / SQUARE_WIDTH) - 1) % width);
+        mh.off_y2 = (WORD)((((VIRTUAL_HEIGHT / SQUARE_HEIGHT) - 1) % height) * width);
+        mh.x_wrap = (WORD)width;
+        mh.y_wrap = (WORD)height;
+        mh.magic = MAGIC_NUM;
+        fwrite(&mh, 1, sizeof(MAPHEADER), fp);
+
+        for (i = 0; i<width; i++) {
+                b = (BYTE)(i%width);
+                for (j = 0; j<width; j++) {
+                        r[j] = b;
+                        b = (BYTE)(((int)b+1) % width);
+                        }
+                r[0]=1;
+                if (i == 0) r[0]=0;
+                fwrite(r, width,1, fp);
+                printf("Map row %d\r",i);
+                }
+        fclose(fp);
+        printf("All done!    \n");
+        exit(0);
+        }
+
\ No newline at end of file
diff --git a/16/scrasm/GENMAP.EXE b/16/scrasm/GENMAP.EXE
new file mode 100644
index 00000000..258ec9f1
Binary files /dev/null and b/16/scrasm/GENMAP.EXE differ
diff --git a/16/scrasm/GENMAP.LNK b/16/scrasm/GENMAP.LNK
new file mode 100644
index 00000000..95ee9882
--- /dev/null
+++ b/16/scrasm/GENMAP.LNK
@@ -0,0 +1 @@
+genmap.obj; 
diff --git a/16/scrasm/GENMAP.OBJ b/16/scrasm/GENMAP.OBJ
new file mode 100644
index 00000000..6f1e7a9d
Binary files /dev/null and b/16/scrasm/GENMAP.OBJ differ
diff --git a/16/scrasm/GENPAL.C b/16/scrasm/GENPAL.C
new file mode 100644
index 00000000..f913ae07
--- /dev/null
+++ b/16/scrasm/GENPAL.C
@@ -0,0 +1,51 @@
+#include <stdio.h>
+#include <string.h>
+#include <memory.h>
+#include <stdlib.h>
+
+#define COLORS  256
+#define PALNAME "Diagonal"
+#define FILENAME "%s.PAL"
+char    fn[100] = FILENAME;
+typedef unsigned char BYTE;
+typedef unsigned short int WORD;
+typedef struct COLOR {
+        BYTE    r,g,b;
+        } COLOR, far *LPCOLOR;
+
+void main()             /* int argc, char *argv[]) */
+        {
+        FILE    *fp;
+        int     i;
+        int     colors = COLORS;
+        COLOR   c;
+        int     r,dr,g,dg,b,db;
+
+        sprintf(fn,FILENAME,PALNAME);
+        fp = fopen(fn,"wb");
+        if (!fp) {
+                printf("Couldn't open %s for write.\n",fn);
+                exit(1);
+                }
+
+        r=0;    dr=2;
+        g=0;    dg=3;
+        b=0;    db=5;
+        for (i = 0; i < colors; i++) {
+                c.r = (BYTE)r;  r+=dr;
+                if (r > 63) { r = 63; dr = -dr; }
+                 else if (r < 0) { r = 0; dr = -dr; }
+                c.g = (BYTE)g;  g+=dg;
+                if (g > 63) { g = 63; dg = -dg; }
+                 else if (g < 0) { g = 0; dg = -dg; }
+                c.b = (BYTE)b;  b+=db;
+                if (b > 63) { b = 63; db = -db; }
+                 else if (b < 0) { b = 0; db = -db; }
+                fwrite(&c, sizeof(c),1, fp);
+                printf("Palette %d\r",i);
+                }
+        fclose(fp);
+        printf("All done!    \n");
+        exit(0);
+        }
+
\ No newline at end of file
diff --git a/16/scrasm/GENPAL.EXE b/16/scrasm/GENPAL.EXE
new file mode 100644
index 00000000..c242a3d4
Binary files /dev/null and b/16/scrasm/GENPAL.EXE differ
diff --git a/16/scrasm/GENPAL.LNK b/16/scrasm/GENPAL.LNK
new file mode 100644
index 00000000..4b4ff212
--- /dev/null
+++ b/16/scrasm/GENPAL.LNK
@@ -0,0 +1 @@
+genpal.obj; 
diff --git a/16/scrasm/GENPAL.OBJ b/16/scrasm/GENPAL.OBJ
new file mode 100644
index 00000000..ae94092e
Binary files /dev/null and b/16/scrasm/GENPAL.OBJ differ
diff --git a/16/scrasm/GENSQ.C b/16/scrasm/GENSQ.C
new file mode 100644
index 00000000..402853f3
--- /dev/null
+++ b/16/scrasm/GENSQ.C
@@ -0,0 +1,102 @@
+#include <stdio.h>
+#include <string.h>
+#include <memory.h>
+#include <stdlib.h>
+
+#define WIDTH   256
+
+#define FILENAME "DIAGONAL.TIL"
+char    fn[100] = FILENAME;
+typedef unsigned char BYTE;
+typedef BYTE    ROW[16];
+typedef ROW     BITMAP[16];
+
+BITMAP  b;
+BITMAP  c;
+
+BITMAP  pattern={{1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0},
+                 {1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
+                 {1,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0},
+                 {1,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0},
+                 {1,0,0,1,0,0,0,0,0,0,0,2,0,0,0,0},
+                 {0,0,1,0,0,1,1,0,1,1,0,0,2,0,0,0},
+                 {0,0,1,0,0,1,2,0,1,2,0,0,2,0,0,0},
+                 {0,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0},
+                 {0,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0},
+                 {0,1,0,0,1,2,0,0,0,1,2,0,0,2,0,0},
+                 {0,0,1,0,1,2,1,1,1,1,2,0,2,0,0,0},
+                 {0,0,1,0,0,2,2,2,2,2,0,0,2,0,0,2},
+                 {0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,2},
+                 {0,0,0,0,2,2,0,0,0,2,2,0,0,0,0,2},
+                 {0,0,0,0,0,0,2,2,2,0,0,0,0,0,2,2},
+                 {0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2}};
+
+void copy_pattern(BITMAP b,BITMAP patt, BYTE l, BYTE m, BYTE h)
+        {
+        int     x,y;
+
+        for (y=0; y<16; y++) {
+                for (x=0; x<16; x++) {
+                        switch (patt[y][x]) {
+                                case 0:
+                                        b[y][x] = m;
+                                        break;
+                                case 1:
+                                        b[y][x] = l;
+                                        break;
+                                case 2:
+                                        b[y][x] = h;
+                                        break;
+                                }
+                        }
+                }
+        }
+
+/* Transforms linear to planar */
+void transform(BITMAP b,BITMAP c)
+        {
+        int     x,y,p;
+        BYTE    *pb = (BYTE *)c;
+
+        for (p=0; p<4; p++) {
+                for (y=0; y<16; y++) {
+                        for (x=0; x<16; x+=4) {
+                                *(pb++) = b[y][x+p];
+                                }
+                        }
+                }
+        }
+
+void main(int argc,char *argv[])
+        {
+        FILE    *fp;
+        int     i;
+        int     width = WIDTH;
+
+        fp = fopen(fn,"wb");
+        if (!fp) {
+                printf("Couldn't open %s for write.\n",fn);
+                exit(1);
+                }
+        if (argc > 1) {
+                width = atoi(argv[1]);
+                if (width > WIDTH)
+                        width = WIDTH;
+                printf("Width = %d\n",width);
+                }
+
+        for (i = 0; i< width; i++) {
+                BYTE    less,more;
+
+                less = (BYTE)((i + width - 1) % width);
+                more = (BYTE)((i + 1) % width);
+                copy_pattern(b, pattern, less, (BYTE)i, more);
+                transform(b,c);
+                fwrite(c, 16,16, fp);
+                printf("Square %d\r",i);
+                }
+        fclose(fp);
+        printf("All done!     \n");
+        exit(0);
+        }
+
\ No newline at end of file
diff --git a/16/scrasm/GENSQ.EXE b/16/scrasm/GENSQ.EXE
new file mode 100644
index 00000000..71cf2657
Binary files /dev/null and b/16/scrasm/GENSQ.EXE differ
diff --git a/16/scrasm/GENSQ.LNK b/16/scrasm/GENSQ.LNK
new file mode 100644
index 00000000..a9d9b795
--- /dev/null
+++ b/16/scrasm/GENSQ.LNK
@@ -0,0 +1 @@
+gensq.obj; 
diff --git a/16/scrasm/GENSQ.OBJ b/16/scrasm/GENSQ.OBJ
new file mode 100644
index 00000000..7166ee65
Binary files /dev/null and b/16/scrasm/GENSQ.OBJ differ
diff --git a/16/scrasm/INIT.INC b/16/scrasm/INIT.INC
new file mode 100644
index 00000000..eddeeed4
--- /dev/null
+++ b/16/scrasm/INIT.INC
@@ -0,0 +1,375 @@
+;; Error messages
+ERR_OK          EQU     0
+msgErr0         db      'Later!',13,10,'$'
+ERR_MEM         EQU     1
+msgErr1         db      'Error 001:  Out of memory?',13,10,'$'
+ERR_CPU         EQU     2
+msgErr2         db      'Error 002:  CPU must be at least an 80386.',13,10,'$'
+ERR_FILE        EQU     3
+msgErr3         db      'Error 003:  File error.',13,10,'$'
+ERR_FILENOTFOUND EQU    4
+msgErr4         db      'Error 004:  File not found.',13,10,'$'
+msgtblError     dw      offset msgErr0, offset msgErr1, offset msgErr2,
+                        offset msgErr3, offset msgErr4
+nError          db      0
+
+;; CPU name strings
+CPUName86       DB      "8088/8086$"
+CPUName286      DB      "80286DX/SX$"
+CPUName386      DB      "80386DX/SX$"
+CPUName486      DB      "80486DX/SX or better$"
+CPUNameTable    DW      CPUName86,CPUName286,CPUName386,CPUName486
+
+EVEN
+msgCPUTypeIs    DB      "Your CPU type:  $"
+EVEN
+msgCPUTypeIsEnd DB      13,10,'$'
+nCPU            DB      0
+
+EVEN
+msgPages        DB      'Pages displayed:  '
+strNumPages     DB      6 dup (?),13,10,'$'
+
+EVEN
+bufText         DW      80*50 DUP (?)   ; Needs this much to hold
+                                        ; a 50-line screen...
+wCPos           DW      0
+nDisplay        DB      0
+
+EVEN
+fnMap1          db      'DIAGONAL.MAP',0
+fnTiles1        db      'DIAGONAL.TIL',0
+fnPalette       db      'DIAGONAL.PAL',0        ; only one allowed, for now
+fnMap2          db      'SCROLL.MAP',0
+fnTiles2        db      'SCROLL.TIL',0
+
+fntblMap        dw      offset fnMap1,offset fnMap2
+fntblTiles      dw      offset fnTiles1,offset fnTiles2
+nMap            dw      0
+
+;; CPUType routine snatched from Ray Duncan's _Power Programming MASM_
+;; chapter 14.  Reformatted to my style, but I left the code alone
+;; except that it used to push a bunch of stuff, but doesn't any more
+;; because I don't care what gets destroyed.
+CPUType         PROC    near
+                pushf                   ; now try to clear bits 12-15
+                pop     ax              ; of CPU flags
+                and     ax,0fffh
+                push    ax              ; set modified CPU flags
+                popf
+                pushf
+                pop     ax              ; get flags again
+                and     ax,0f000h       ; if bits 12-15 are still
+                cmp     ax,0f000h       ; set, this is 8086/88
+                jne     cpu1            ; jump, not 8086/88
+                mov     nCPU,CPU8086    ; set nCPU = 86/88 CPU type
+                jmp     cpux            ; and exit
+
+cpu1:           or      ax,0f000h       ; must be 286 or later,
+                push    ax              ; now try to set bits 12-15
+                popf                    ; of CPU flags
+                pushf
+                pop     ax              ; if bits 12-15 can't be
+                and     ax,0f000h       ; set, this is a 286
+                jnz     cpu2            ; jump, not 80286
+                mov     nCPU,CPU80286   ; set nCPU = 286 CPU type
+                jmp     cpux            ; and exit
+
+cpu2:           mov     bx,sp           ; 386 or later, save SP
+                and     sp,not 3        ; avoid stack alignment fault
+                pushfd                  ; get value of EFLAGS
+                pop     eax
+                mov     ecx,eax         ; save copy of EFLAGS
+                xor     eax,40000h      ; flip AC bit in EFLAGS
+                push    eax             ; try and force EFLAGS
+                popfd
+                pushfd                  ; get back EFLAGS value
+                pop     eax
+                mov     sp,bx           ; restore old stack pointer
+                xor     eax,ecx         ; can AC bit be changed?
+                jnz     cpu3            ; no, jump, not a 386
+                mov     nCPU,CPU80386   ; set nCPU = 386 CPU type
+                jmp     cpux            ; and exit
+
+cpu3:           mov     nCPU,CPU80486   ; set nCPU = 486 CPU type
+
+cpux:           mov     bl,nCPU
+                xor     bh,bh
+                shl     bx,1
+                DOSPRINT <offset msgCPUTypeIs>
+                DOSPRINT CPUNameTable[bx]
+                DOSPRINT <offset msgCPUTypeIsEnd>
+                ret                     ; return with nCPU = CPU type
+CPUType         ENDP
+
+;; Initialize:  So far, all it does is make sure you have a 386 +
+;; (because that's what I assembled the code for).
+Initialize      PROC    near
+        ; Set DS = CS in this program, since data is local
+                mov     ax,cs
+                mov     segCode,ax      ; Store the Code Segment
+                mov     bx,ds
+                mov     segPSP,bx       ; Store the PSP Segment
+                mov     ds,ax           ; Set DS = CS
+
+        ; Resize code to 64K
+        CODE_SIZE EQU 64                ; <- this is arbitrary.
+                ; ES already -> allocated segment
+                mov     ah,4ah
+                mov     bx,64*CODE_SIZE
+                int     21h
+                mov     nError,ERR_MEM
+                jc      TerminateError
+
+;; I've chosen not to implement sprites yet so that I can get this out
+;; the door...
+;; ; 320x200 buffer for sprite drawing.  To draw sprites, first draw them
+;; ; into this buffer, adding rectangles to the current rectangle list.
+;; ; Then, use BUFFER_COPY to put out the buffers with the current
+;; ; rectangle list to the screen.  BUFFER_COPY will ensure minimal VGA
+;; ; writing.
+;;         ; Create a buffer segment
+;;                 mov     bx,(320 * 200) / 16
+;;                 mov     ah,48h
+;;                 int     21h
+;;                 mov     nError,ERR_MEM
+;;                 jc      TerminateError
+;;                 mov     segBuffer,ax
+
+                call    CPUType
+                mov     nError,ERR_CPU
+                cmp     nCPU,2
+                jl      TerminateError
+
+                mov     ds,segCode
+                mov     dx,offset fnPalette
+                call    LoadPaletteFile
+                jc      TerminateError
+
+                call    LoadIndex
+                jc      TerminateError
+
+                KEYB_START
+
+                call    Beginning       ; Can display an entry screen here
+
+        ; This is linked in from Michael Abrash's zen timer code.
+        ; (But I wrote the Click myself)
+        call    Click
+        call    ZTimerOn
+
+                call    MainLoop
+
+        call    ZTimerOff
+        call    Click
+
+                call    Ending          ; Can display an exit screen here
+
+                KEYB_END
+
+Terminate:      mov     nError,ERR_OK
+TerminateError:
+                mov     ax,cs   ;DOS functions require that DS point
+                mov     ds,ax   ; to text to be displayed on the screen
+                mov     bh,0
+                mov     bl,nError
+                shl     bx,1
+                DOSPRINT msgtblError[bx]
+
+                mov     ax,pages
+                mov     ds,segCode
+                mov     si,offset strNumPages
+                call    Int2Ascii
+                DOSPRINT <offset msgPages>
+
+        call    ZTimerReport
+
+                mov     al,nError
+                mov     ah,4ch        ; DOS Terminate
+                int     21h
+                ; Don't need to RET!  We're outta here
+Initialize      ENDP
+
+;; Clicks the internal speaker.  I use this to indicate that page timing
+;; has started.
+Click           PROC
+                in      al,61h
+                mov     ah,al
+                or      al,3
+                out     61h,al
+
+                mov     cx,5000         ; (this is an arbitrary delay!)
+spkr_on:        loop    spkr_on
+                mov     al,ah
+                out     61h,al
+                ret
+Click           ENDP
+
+;; Copied from an old 8088 "Learn Assembly" book and changed a bit
+Int2Ascii       PROC
+                mov     cx,6
+                mov     byte ptr cs:[si],' '
+                mov     byte ptr cs:[si+1],'0'
+                mov     byte ptr cs:[si+2],'0'
+                mov     byte ptr cs:[si+3],'0'
+                mov     byte ptr cs:[si+4],'0'
+                mov     byte ptr cs:[si+5],'0'
+                add     si,6
+                mov     cx,10
+                or      ax,ax
+                jns     clear_divide
+                neg     ax
+                mov     byte ptr cs:[si-6],'-'
+clear_divide:   mov     dx,0
+                div     cx
+                add     dx,'0'
+                dec     si
+                mov     cs:[si],dl
+                or      ax,ax
+                jnz     clear_divide
+                ret
+Int2Ascii       ENDP
+
+;; Given a filename at DS:DX, reads the file into memory and returns
+;; a pointer to it as DX:0000.
+;; Note that this routine obviously will only work correctly for
+;; a file < 640k in size, but you can bring in files bigger than 64k.
+;; This code comes from Future Crew's STMIK sampler "Mental Surgery"
+;; and I commented it up to make it fit in with my stuff a little better.
+;; Thank you, FC, for releasing that code!  Several of the routines
+;; in this program were inspired or helped along by having that source...
+;; Most recently, added in error codes.
+EVEN
+LoadFile        PROC    NEAR
+                ;set: DX=offset to filename
+                ;return: DX=segment of file
+
+        ; Open the datafile at DS:DX.
+                mov     ax,3D00h        ; 3D,00 -> Open file, read only
+                                        ; DS:DX already points at filename
+                int     21h             ;  returns AX=file handle
+                mov     cl,ERR_FILENOTFOUND
+                jc      ferror
+                mov     bx,ax           ; Store file handle in BX
+                mov     si,bx           ; and also in a variable
+
+        ; Get the length of the file so we know how much to allocate
+                mov     ax,4202h        ; 42,02 -> Seek, signed from end
+                mov     cx,0            ; CX:DX is a long file offset,
+                                        ; BX is already set as file handle
+                mov     dx,0            ;  zero in this case = end of file
+                int     21h             ; (returns long offset in DX:AX)
+                mov     cl,ERR_FILE
+                jc      ferror
+
+;;;             shr     dx,1            ; This is original FC code,
+;;;             rcr     ax,1            ; which I removed because the
+;;;             shr     dx,1            ; 386 has a nice instruction
+;;;             rcr     ax,1            ; to do this all!
+;;;             shr     dx,1            ; But 286 users will want to
+;;;             rcr     ax,1            ; return to this code, instead
+;;;             shr     dx,1            ; of SHRD dx,ax,4
+;;;             rcr     ax,1            ;
+
+        ; Now turn that long DX:AX into a number of paragraphs to allocate
+        ; for when we read the file.
+                shrd    ax,dx,4         ; Divides long DX:AX by 4,
+                mov     bx,ax           ;  and stores this in BX
+                inc     bx      ; HHMMMM?  One more needed for small #'s
+                mov     ah,48h          ; 48 -> Allocate memory
+                                        ; BX already = # of paragraphs
+                int     21h
+                mov     cl,ERR_MEM
+                jc      ferror
+                mov     di,ax           ; store this in a variable
+
+        ; Seek the file back to the beginning in order to read it into
+        ; the memory we just allocated.
+                mov     ax,4200h        ; 42,00 -> Seek, absolute offset
+                mov     bx,si           ; BX is the file handle.
+                mov     cx,0            ; CX:DX is a long offset
+                mov     dx,0
+                int     21h
+                jc      ferror
+
+        ; Now read the file into memory
+                mov     ds,di           ; DS points at alloc'd memory
+ReadBlock:      mov     ah,3fh          ; 3F -> Read file
+                mov     cx,32768        ; read 32768 bytes at a time
+                mov     dx,0            ; DS:DX points at beginning of
+                int     21h             ;  this block of memory.
+                mov     cl,ERR_FILE
+                jc      ferror
+                mov     dx,ds           ; Offset DS by (32768/16), which
+                add     dx,800h         ;  is the number of paragraphs in
+                mov     ds,dx           ;  each block of 32768 bytes.
+                cmp     ax,32768        ; Did we actually read 32768 bytes?
+                je      ReadBlock       ; If so, there's more to read...
+                                        ; Otherwise, we've read all the
+                                        ;  data in the file.
+
+        ; So now, close the file handle.
+                mov     ah,3Eh          ; 3E -> Close file
+                                        ; BX still is the file handle
+                int     21h
+
+        ; Everything went ok.  Return the segment in DX.
+                mov     dx,di
+                mov     nError,ERR_OK
+                ret
+ferror:         mov     nError,cl
+                ret
+LoadFile        ENDP
+
+;; Eventually, this should load in an index of all data files to
+;; allow for filenames to be specified outside of the program.  The
+;; goal is to make the program have no hardcoded filenames...
+;; Of course, the structure of this index and its entries will be
+;; hardcoded, as will the structures of all of the files it includes.
+LoadIndex       PROC    near
+                ret
+LoadIndex       ENDP
+
+;; Save the current video mode and cursor position with standard
+;; BIOS calls
+SaveVideo       PROC    near
+                mov     ah,0Fh
+                int     10h             ; Get current display Mode
+                mov     nDisplay,al
+                mov     ah,03h
+                mov     bh,0
+                int     10h
+                mov     wCPos,dx
+
+                mov     ds,segText
+                mov     si,0
+                mov     es,segCode
+                mov     di,offset bufText
+                mov     cx,80*50
+            rep movsw
+                ret
+SaveVideo       ENDP
+
+;; Restore the current video mode and cursor position with standard
+;; BIOS calls
+RestoreVideo    PROC    near
+                mov     ah,00h
+                mov     al,nDisplay
+                int     10h             ; Get current display Mode
+                mov     ah,02h
+                mov     bh,0
+                mov     dx,wCPos
+                int     10h
+
+                PAL_UPDATE      ; When flipping into text mode, re-do the
+                                ; palette because the BIOS changes it.
+
+                mov     es,segText
+                mov     di,0
+                mov     ds,segCode
+                mov     si,offset bufText
+                mov     cx,80*50
+            rep movsw
+                ret
+RestoreVideo    ENDP
+
\ No newline at end of file
diff --git a/16/scrasm/KEYB.INC b/16/scrasm/KEYB.INC
new file mode 100644
index 00000000..ef730cf0
--- /dev/null
+++ b/16/scrasm/KEYB.INC
@@ -0,0 +1,237 @@
+;; ====================================================================
+;; Macros
+;; ====================================================================
+;; Jump if key pressed
+JKEYP           MACRO   key,label
+                cmp     byte ptr cs:_keyFlags[key+1],1
+                je      label
+                ENDM
+;; Jump if key not pressed
+JKEYNP          MACRO   key,label
+                cmp     byte ptr cs:_keyFlags[key+1],1
+                jne     label
+                ENDM
+
+;; Note that JNKEY and JKEY both modify _flKeyChanged, so you cannot
+;; use one after the other!  In other words,
+;;  JKEYNP no_key
+;;  JKEYP  yes_key      ;<-- this will fail
+;; will not work like you'd think it would.  The second call (JKEYP)
+;; will not know that a key has been pressed!
+;; Jump if no key pressed:
+JNKEY           MACRO   label
+                cmp     cs:_flKeyChanged,0
+                je      label
+                mov     cs:_flKeyChanged,0      ; <--- important!
+                ENDM
+;; Jump if key pressed:
+JKEY            MACRO   label
+                cmp     cs:_flKeyChanged,0
+                mov     cs:_flKeyChanged,0
+                jne     label
+                ENDM
+
+;; Start keyboard interrupts
+KEYB_START      MACRO
+                call    SwapInt9
+                mov     cs:_flKeyChanged,0
+                ENDM
+
+;; Clear keyboard interrupts
+KEYB_END        MACRO
+                call    SwapInt9
+                ENDM
+
+;; Credit for these routines:  Steve Dollins, Brown Computer Group.
+;; I didn't write any of the code below -- just heisted it from some
+;; stuff that he wrote and released!  Very useful keyboard routines.
+;; Any comments prefixed SDE were added by me.
+_keyFlags       dw      256 dup (0)     ; SDE: since they only use 2 bits
+                                        ; per word, this is a tradeoff,
+                                        ; space for time
+
+oldint9_offset  dw      offset newint9
+oldint9_segment dw      seg newint9
+
+_flKeyChanged   dw      0
+
+;-----------------------------------------------------------------------
+; void SwapInt9( void )
+;
+;       SwapInt9() exchanges the vector in oldint9_segment:oldint9_offset
+;       with the vector in the interrupt table for INT 9h.
+;-----------------------------------------------------------------------
+
+SwapInt9        PROC    far
+                mov     ax,cs
+                mov     ds,ax
+
+                mov     ax,03509h       ; Get interrupt 09h
+                int     21h             ;   return in ES:BX
+
+                mov     ax,oldint9_segment
+                mov     dx,oldint9_offset
+                push    ds
+                mov     ds,ax
+                mov     ax,02509h       ; Set new interrupt
+                int     21h             ;  to address in DS:DX
+                pop     ds
+
+                mov     oldint9_segment,es    ; Save the old interrupt
+                mov     oldint9_offset,bx
+                ret
+SwapInt9        ENDP
+
+
+;-----------------------------------------------------------------------
+; newint9 is the new keyboard interrupt (INT 9h).
+;
+;       Reads the scan code from the keyboard and modifies the key
+;       flags table.  The high byte is set to the position of the key,
+;       pressed=1, release=0.  The low byte is set to 1 when the key
+;       is pressed and left unmodified when the key is released.
+;-----------------------------------------------------------------------
+newint9         PROC    far
+                push    ax
+                push    bx
+                push    ds
+
+                mov     ax,cs
+                mov     ds,ax
+
+                JKEYNP  kCTRL,not_ctrlaltdel    ; SDE code
+                JKEYNP  kALT,not_ctrlaltdel     ; To still allow ctrl-
+                JKEYNP  kDELETE,not_ctrlaltdel  ; alt-delete.  Nothing
+                jmp     ctrlaltdel      ; worse than a total lockup!
+not_ctrlaltdel:
+
+                in      ax,60h          ; get scan code in AL, control byte in AH
+                mov     bx,ax           ; save a copy in BX
+                xchg    ah,al           ; swap to get control byte in AL
+                or      al,80h          ; clear keyboard
+                out     61h,al          ;   of interrupt
+                and     al,7Fh
+                out     61h,al
+                mov     al,20h          ; send generic EOI to
+                out     20h,al          ;   PIC
+
+                and     bx,0007fh       ; strip all but the scan code
+                shl     bx,1            ; multiply by two to get our offset
+
+                ; if the key was released, the high bit is set in the scan code
+                bt      ax,15           ; move this high bit into the carry flag
+                setnc   byte ptr [_keyFlags+bx+1] ; set "Is being pressed" flag
+                jc      short int09done ; if the key was released, we're done
+                mov     byte ptr [_keyFlags+bx],1 ; set "Has been pressed" flag
+                mov     _flKeyChanged,1         ; state of keyboard has changed
+int09done:
+                mov     _flKeyChanged,1         ; state of keyboard has changed
+                pop     ds
+                pop     bx
+                pop     ax
+                iret
+ctrlaltdel:     int     19h                     ; SDE -- added this.
+                                                ;  Allows a reboot.
+newint9         ENDP
+
+;; Defines the current key procedure (used as a jump-through)
+kprocCur        dw      KprocDirect
+
+;; This is a keyboard procedure.  Normally, this would control some
+;; sprite, or something, and the screen would follow the sprite.  For
+;; the purposes of this code, though (namely, sprite-less scrolling)
+;; it just directly affects ScrollDX and ScrollDY.
+;; This keyproc is inertialess, use + and - to increase speed and
+;; the up/down/left/right keys to move directions.
+;; Pressing K will switch to the other keyprocedure on the fly.
+;; P pauses the screen -- note that this is just for completely
+;; freezing the screen... it doesn't return until you let go!
+
+EVEN
+scroll_speed_x  dw      SCROLL_SPEED                    ; (defaults)
+scroll_speed_y  dw      SCROLL_SPEED * VIRTUAL_WIDTH    ; (defaults)
+KprocDirect     PROC    near
+chk_leftright:  mov     ax,0
+                JKEYNP  kRIGHT,not_right
+                mov     ax,scroll_speed_x
+                mov     ScrollDX,ax
+                jmp     chk_updown
+not_right:      JKEYNP  kLEFT,not_left
+                sub     ax,scroll_speed_x
+                mov     ScrollDX,ax
+                jmp     chk_updown
+not_left:       mov     ScrollDX,ax
+
+chk_updown:     mov     ax,0
+                JKEYNP  kUP,not_up
+                sub     ax,scroll_speed_y
+                mov     ScrollDY,ax
+                jmp     chk_other
+not_up:         JKEYNP  kDOWN,not_down
+                mov     ax,scroll_speed_y
+                mov     ScrollDY,ax
+                jmp     chk_other
+not_down:       mov     ScrollDY,ax
+
+chk_other:      JKEYNP  kK,not_k
+                mov     kprocCur,KprocInertia
+not_k:          JKEYNP  kM,not_m
+                mov     bDoTransition,1
+not_m:          JKEYNP  kGREY_MINUS,not_minus
+                cmp     scroll_speed_x,1
+                jle     not_minus
+                dec     scroll_speed_x
+                sub     scroll_speed_y,VIRTUAL_WIDTH
+not_minus:      JKEYNP  kGREY_PLUS,not_plus
+                cmp     scroll_speed_x,16
+                jge     not_plus
+                inc     scroll_speed_x
+                add     scroll_speed_y,VIRTUAL_WIDTH
+not_plus:
+
+pause_key:      JKEYP   kP,pause_key
+
+                ret
+KprocDirect     ENDP
+
+;; This keyproc has inertia, so + and - don't work.
+;; Use up/down/left/right keys to increase speed in those directions.
+;; Pressing K will switch to the other keyprocedure on the fly.
+;; P pauses the screen -- note that this is just for completely
+;; freezing the screen... it doesn't return until you let go!
+KprocInertia    PROC    near
+chk2_leftright: JKEYNP  kRIGHT,not2_right
+                cmp     ScrollDX,16
+                je      not2_right
+                inc     ScrollDX
+                jmp     chk2_updown
+not2_right:     JKEYNP  kLEFT,not2_left
+                cmp     ScrollDX,-16
+                je      not2_left
+                dec     ScrollDX
+                jmp     chk2_updown
+not2_left:
+
+chk2_updown:    JKEYNP  kUP,not2_up
+                cmp     ScrollDY,-VIRTUAL_WIDTH * 16
+                je      not2_up
+                add     ScrollDY,-VIRTUAL_WIDTH
+                jmp     chk2_other
+not2_up:        JKEYNP  kDOWN,not2_down
+                cmp     ScrollDY,VIRTUAL_WIDTH * 16
+                je      not2_down
+                add     ScrollDY,VIRTUAL_WIDTH
+                jmp     chk2_other
+not2_down:
+
+chk2_other:     JKEYNP  kK,not2_k
+                mov     kprocCur,KprocDirect
+not2_k:         JKEYNP  kM,not2_m
+                mov     bDoTransition,1
+not2_m:
+
+pause2_key:     JKEYP   kP,pause2_key
+
+                ret
+KprocInertia    ENDP
+
\ No newline at end of file
diff --git a/16/scrasm/LZTIMER.ASM b/16/scrasm/LZTIMER.ASM
new file mode 100644
index 00000000..5fed7be1
--- /dev/null
+++ b/16/scrasm/LZTIMER.ASM
@@ -0,0 +1,636 @@
+;
+; *** Listing 2-5 ***
+;
+; The long-period Zen timer. (LZTIMER.ASM)
+; Uses the 8253 timer and the BIOS time-of-day count to time the
+; performance of code that takes less than an hour to execute.
+; Because interrupts are left on (in order to allow the timer
+; interrupt to be recognized), this is less accurate than the
+; precision Zen timer, so it is best used only to time code that takes
+; more than about 54 milliseconds to execute (code that the precision
+; Zen timer reports overflow on). Resolution is limited by the
+; occurrence of timer interrupts.
+;
+; By Michael Abrash 4/26/89
+;
+; Externally callable routines:
+;
+;  ZTimerOn: Saves the BIOS time of day count and starts the
+;       long-period Zen timer.
+;
+;  ZTimerOff: Stops the long-period Zen timer and saves the timer
+;       count and the BIOS time-of-day count.
+;
+;  ZTimerReport: Prints the time that passed between starting and
+;       stopping the timer.
+;
+; Note: If either more than an hour passes or midnight falls between
+;       calls to ZTimerOn and ZTimerOff, an error is reported. For
+;       timing code that takes more than a few minutes to execute,
+;       either the DOS TIME command in a batch file before and after
+;       execution of the code to time or the use of the DOS
+;       time-of-day function in place of the long-period Zen timer is
+;       more than adequate.
+;
+; Note: The PS/2 version is assembled by setting the symbol PS2 to 1.
+;       PS2 must be set to 1 on PS/2 computers because the PS/2's
+;       timers are not compatible with an undocumented timer-stopping
+;       feature of the 8253; the alternative timing approach that
+;       must be used on PS/2 computers leaves a short window
+;       during which the timer 0 count and the BIOS timer count may
+;       not be synchronized. You should also set the PS2 symbol to
+;       1 if you're getting erratic or obviously incorrect results.
+;
+; Note: When PS2 is 0, the code relies on an undocumented 8253
+;       feature to get more reliable readings. It is possible that
+;       the 8253 (or whatever chip is emulating the 8253) may be put
+;       into an undefined or incorrect state when this feature is
+;       used.
+;
+;     ***************************************************************
+;     * If your computer displays any hint of erratic behavior      *
+;     * after the long-period Zen timer is used, such as the floppy *
+;     * drive failing to operate properly, reboot the system, set   *
+;     * PS2 to 1 and leave it that way!                             *
+;     ***************************************************************
+;
+; Note: Each block of code being timed should ideally be run several
+;       times, with at least two similar readings required to
+;       establish a true measurement, in order to eliminate any
+;       variability caused by interrupts.
+;
+; Note: Interrupts must not be disabled for more than 54 ms at a
+;       stretch during the timing interval. Because interrupts
+;       are enabled, keys, mice, and other devices that generate
+;       interrupts should not be used during the timing interval.
+;
+; Note: Any extra code running off the timer interrupt (such as
+;       some memory-resident utilities) will increase the time
+;       measured by the Zen timer.
+;
+; Note: These routines can introduce inaccuracies of up to a few
+;       tenths of a second into the system clock count for each
+;       code section timed. Consequently, it's a good idea to
+;       reboot at the conclusion of timing sessions. (The
+;       battery-backed clock, if any, is not affected by the Zen
+;       timer.)
+;
+; All registers and all flags are preserved by all routines.
+;
+                DOSSEG
+                .model  small
+                .code
+        public  ZTimerOn, ZTimerOff, ZTimerReport
+
+;
+; Set PS2 to 0 to assemble for use on a fully 8253-compatible
+; system; when PS2 is 0, the readings are more reliable if the
+; computer supports the undocumented timer-stopping feature,
+; but may be badly off if that feature is not supported. In
+; fact, timer-stopping may interfere with your computer's
+; overall operation by putting the 8253 into an undefined or
+; incorrect state.  Use with caution!!!
+;
+; Set PS2 to 1 to assemble for use on non-8253-compatible
+; systems, including PS/2 computers; when PS2 is 1, readings
+; may occasionally be off by 54 ms, but the code will work
+; properly on all systems.
+;
+; A setting of 1 is safer and will work on more systems,
+; while a setting of 0 produces more reliable results in systems
+; which support the undocumented timer-stopping feature of the
+; 8253. The choice is yours.
+;
+PS2     equ     1
+;
+; Base address of the 8253 timer chip.
+;
+BASE_8253               equ     40h
+;
+; The address of the timer 0 count registers in the 8253.
+;
+TIMER_0_8253            equ     BASE_8253 + 0
+;
+; The address of the mode register in the 8253.
+;
+MODE_8253               equ     BASE_8253 + 3
+;
+; The address of the BIOS timer count variable in the BIOS
+; data segment.
+;
+TIMER_COUNT             equ     46ch
+;
+; Macro to emulate a POPF instruction in order to fix the bug in some
+; 80286 chips which allows interrupts to occur during a POPF even when
+; interrupts remain disabled.
+;
+MPOPF macro
+        local   p1, p2
+        jmp short p2
+p1:     iret                    ;jump to pushed address & pop flags
+p2:     push    cs              ;construct far return address to
+        call    p1              ; the next instruction
+        endm
+
+;
+; Macro to delay briefly to ensure that enough time has elapsed
+; between successive I/O accesses so that the device being accessed
+; can respond to both accesses even on a very fast PC.
+;
+DELAY   macro
+        jmp     $+2
+        jmp     $+2
+        jmp     $+2
+        endm
+
+StartBIOSCountLow       dw      ?       ;BIOS count low word at the
+                                        ; start of the timing period
+StartBIOSCountHigh      dw      ?       ;BIOS count high word at the
+                                        ; start of the timing period
+EndBIOSCountLow         dw      ?       ;BIOS count low word at the
+                                        ; end of the timing period
+EndBIOSCountHigh        dw      ?       ;BIOS count high word at the
+                                        ; end of the timing period
+EndTimedCount           dw      ?       ;timer 0 count at the end of
+                                        ; the timing period
+ReferenceCount          dw      ?       ;number of counts required to
+                                        ; execute timer overhead code
+;
+; String printed to report results.
+;
+OutputStr       label   byte
+                db      0dh, 0ah, 'Timed count: '
+TimedCountStr   db      10 dup (?)
+                db      ' microseconds', 0dh, 0ah
+                db      '$'
+;
+; Temporary storage for timed count as it's divided down by powers
+; of ten when converting from doubleword binary to ASCII.
+;
+CurrentCountLow         dw      ?
+CurrentCountHigh        dw      ?
+;
+; Powers of ten table used to perform division by 10 when doing
+; doubleword conversion from binary to ASCII.
+;
+PowersOfTen     label   word
+        dd      1
+        dd      10
+        dd      100
+        dd      1000
+        dd      10000
+        dd      100000
+        dd      1000000
+        dd      10000000
+        dd      100000000
+        dd      1000000000
+PowersOfTenEnd  label   word
+;
+; String printed to report that the high word of the BIOS count
+; changed while timing (an hour elapsed or midnight was crossed),
+; and so the count is invalid and the test needs to be rerun.
+;
+TurnOverStr     label   byte
+        db      0dh, 0ah
+        db      '****************************************************'
+        db      0dh, 0ah
+        db      '* Either midnight passed or an hour or more passed *'
+        db      0dh, 0ah
+        db      '* while timing was in progress. If the former was  *'
+        db      0dh, 0ah
+        db      '* the case, please rerun the test; if the latter   *'
+        db      0dh, 0ah
+        db      '* was the case, the test code takes too long to    *'
+        db      0dh, 0ah
+        db      '* run to be timed by the long-period Zen timer.    *'
+        db      0dh, 0ah
+        db      '* Suggestions: use the DOS TIME command, the DOS   *'
+        db      0dh, 0ah
+        db      '* time function, or a watch.                       *'
+        db      0dh, 0ah
+        db      '****************************************************'
+        db      0dh, 0ah
+        db      '$'
+
+;********************************************************************
+;* Routine called to start timing.                                  *
+;********************************************************************
+
+ZTimerOn        proc    near
+
+;
+; Save the context of the program being timed.
+;
+        push    ax
+        pushf
+;
+; Set timer 0 of the 8253 to mode 2 (divide-by-N), to cause
+; linear counting rather than count-by-two counting. Also stops
+; timer 0 until the timer count is loaded, except on PS/2
+; computers.
+;
+        mov     al,00110100b    ;mode 2
+        out     MODE_8253,al
+;
+; Set the timer count to 0, so we know we won't get another
+; timer interrupt right away.
+; Note: this introduces an inaccuracy of up to 54 ms in the system
+; clock count each time it is executed.
+;
+        DELAY
+        sub     al,al
+        out     TIMER_0_8253,al         ;lsb
+        DELAY
+        out     TIMER_0_8253,al         ;msb
+;
+; In case interrupts are disabled, enable interrupts briefly to allow
+; the interrupt generated when switching from mode 3 to mode 2 to be
+; recognized. Interrupts must be enabled for at least 210 ns to allow
+; time for that interrupt to occur. Here, 10 jumps are used for the
+; delay to ensure that the delay time will be more than long enough
+; even on a very fast PC.
+;
+        pushf
+        sti
+        rept 10
+        jmp     $+2
+        endm
+        MPOPF
+;
+; Store the timing start BIOS count.
+; (Since the timer count was just set to 0, the BIOS count will
+; stay the same for the next 54 ms, so we don't need to disable
+; interrupts in order to avoid getting a half-changed count.)
+;
+        push    ds
+        sub     ax,ax
+        mov     ds,ax
+        mov     ax,ds:[TIMER_COUNT+2]
+        mov     cs:[StartBIOSCountHigh],ax
+        mov     ax,ds:[TIMER_COUNT]
+        mov     cs:[StartBIOSCountLow],ax
+        pop     ds
+;
+; Set the timer count to 0 again to start the timing interval.
+;
+        mov     al,00110100b            ;set up to load initial
+        out     MODE_8253,al            ; timer count
+        DELAY
+        sub     al,al
+        out     TIMER_0_8253,al         ;load count lsb
+        DELAY
+        out     TIMER_0_8253,al         ;load count msb
+;
+; Restore the context of the program being timed and return to it.
+;
+        MPOPF
+        pop     ax
+        ret
+
+ZTimerOn        endp
+
+;********************************************************************
+;* Routine called to stop timing and get count.                     *
+;********************************************************************
+
+ZTimerOff proc  near
+
+;
+; Save the context of the program being timed.
+;
+        pushf
+        push    ax
+        push    cx
+;
+; In case interrupts are disabled, enable interrupts briefly to allow
+; any pending timer interrupt to be handled. Interrupts must be
+; enabled for at least 210 ns to allow time for that interrupt to
+; occur. Here, 10 jumps are used for the delay to ensure that the
+; delay time will be more than long enough even on a very fast PC.
+;
+        sti
+        rept    10
+        jmp     $+2
+        endm
+
+;
+; Latch the timer count.
+;
+
+if PS2
+
+        mov     al,00000000b
+        out     MODE_8253,al            ;latch timer 0 count
+;
+; This is where a one-instruction-long window exists on the PS/2.
+; The timer count and the BIOS count can lose synchronization;
+; since the timer keeps counting after it's latched, it can turn
+; over right after it's latched and cause the BIOS count to turn
+; over before interrupts are disabled, leaving us with the timer
+; count from before the timer turned over coupled with the BIOS
+; count from after the timer turned over. The result is a count
+; that's 54 ms too long.
+;
+
+else
+
+;
+; Set timer 0 to mode 2 (divide-by-N), waiting for a 2-byte count
+; load, which stops timer 0 until the count is loaded. (Only works
+; on fully 8253-compatible chips.)
+;
+        mov     al,00110100b            ;mode 2
+        out     MODE_8253,al
+        DELAY
+        mov     al,00000000b            ;latch timer 0 count
+        out     MODE_8253,al
+
+endif
+
+        cli                             ;stop the BIOS count
+;
+; Read the BIOS count. (Since interrupts are disabled, the BIOS
+; count won't change.)
+;
+        push    ds
+        sub     ax,ax
+        mov     ds,ax
+        mov     ax,ds:[TIMER_COUNT+2]
+        mov     cs:[EndBIOSCountHigh],ax
+        mov     ax,ds:[TIMER_COUNT]
+        mov     cs:[EndBIOSCountLow],ax
+        pop     ds
+;
+; Read the timer count and save it.
+;
+        in      al,TIMER_0_8253         ;lsb
+        DELAY
+        mov     ah,al
+        in      al,TIMER_0_8253         ;msb
+        xchg    ah,al
+        neg     ax                      ;convert from countdown
+                                        ; remaining to elapsed
+                                        ; count
+        mov     cs:[EndTimedCount],ax
+;
+; Restart timer 0, which is still waiting for an initial count
+; to be loaded.
+;
+
+ife PS2
+
+        DELAY
+        mov     al,00110100b            ;mode 2, waiting to load a
+                                        ; 2-byte count
+        out     MODE_8253,al
+        DELAY
+        sub     al,al
+        out     TIMER_0_8253,al         ;lsb
+        DELAY
+        mov     al,ah
+        out     TIMER_0_8253,al         ;msb
+        DELAY
+
+endif
+
+        sti             ;let the BIOS count continue
+;
+; Time a zero-length code fragment, to get a reference for how
+; much overhead this routine has. Time it 16 times and average it,
+; for accuracy, rounding the result.
+;
+        mov     cs:[ReferenceCount],0
+        mov     cx,16
+        cli                             ;interrupts off to allow a
+                                        ; precise reference count
+RefLoop:
+        call    ReferenceZTimerOn
+        call    ReferenceZTimerOff
+        loop    RefLoop
+        sti
+        add     cs:[ReferenceCount],8   ;total + (0.5 * 16)
+        mov     cl,4
+        shr     cs:[ReferenceCount],cl  ;(total) / 16 + 0.5
+;
+; Restore the context of the program being timed and return to it.
+;
+        pop     cx
+        pop     ax
+        MPOPF
+        ret
+
+ZTimerOff endp
+
+;
+; Called by ZTimerOff to start the timer for overhead measurements.
+;
+
+ReferenceZTimerOn       proc    near
+;
+; Save the context of the program being timed.
+;
+        push    ax
+        pushf
+;
+; Set timer 0 of the 8253 to mode 2 (divide-by-N), to cause
+; linear counting rather than count-by-two counting.
+;
+        mov     al,00110100b    ;mode 2
+        out     MODE_8253,al
+;
+; Set the timer count to 0.
+;
+        DELAY
+        sub     al,al
+        out     TIMER_0_8253,al         ;lsb
+        DELAY
+        out     TIMER_0_8253,al         ;msb
+;
+; Restore the context of the program being timed and return to it.
+;
+        MPOPF
+        pop     ax
+        ret
+
+ReferenceZTimerOn       endp
+
+;
+; Called by ZTimerOff to stop the timer and add the result to
+; ReferenceCount for overhead measurements. Doesn't need to look
+; at the BIOS count because timing a zero-length code fragment
+; isn't going to take anywhere near 54 ms.
+;
+
+ReferenceZTimerOff proc near
+;
+; Save the context of the program being timed.
+;
+        pushf
+        push    ax
+        push    cx
+
+;
+; Match the interrupt-window delay in ZTimerOff.
+;
+        sti
+        rept    10
+        jmp     $+2
+        endm
+
+        mov     al,00000000b
+        out     MODE_8253,al            ;latch timer
+;
+; Read the count and save it.
+;
+        DELAY
+        in      al,TIMER_0_8253         ;lsb
+        DELAY
+        mov     ah,al
+        in      al,TIMER_0_8253         ;msb
+        xchg    ah,al
+        neg     ax                      ;convert from countdown
+                                        ; remaining to elapsed
+                                        ; count
+        add     cs:[ReferenceCount],ax
+;
+; Restore the context and return.
+;
+        pop     cx
+        pop     ax
+        MPOPF
+        ret
+
+ReferenceZTimerOff endp
+
+;********************************************************************
+;* Routine called to report timing results.                         *
+;********************************************************************
+
+ZTimerReport    proc    near
+
+        pushf
+        push    ax
+        push    bx
+        push    cx
+        push    dx
+        push    si
+        push    di
+        push    ds
+;
+        push    cs      ;DOS functions require that DS point
+        pop     ds      ; to text to be displayed on the screen
+        assume  ds:_TEXT
+;
+; See if midnight or more than an hour passed during timing. If so,
+; notify the user.
+;
+        mov     ax,[StartBIOSCountHigh]
+        cmp     ax,[EndBIOSCountHigh]
+        jz      CalcBIOSTime            ;hour count didn't change,
+                                        ; so everything's fine
+        inc     ax
+        cmp     ax,[EndBIOSCountHigh]
+        jnz     TestTooLong             ;midnight or two hour
+                                        ; boundaries passed, so the
+                                        ; results are no good
+        mov     ax,[EndBIOSCountLow]
+        cmp     ax,[StartBIOSCountLow]
+        jb      CalcBIOSTime            ;a single hour boundary
+                                        ; passed-that's OK, so long as
+                                        ; the total time wasn't more
+                                        ; than an hour
+
+;
+; Over an hour elapsed or midnight passed during timing, which
+; renders the results invalid. Notify the user. This misses the
+; case where a multiple of 24 hours has passed, but we'll rely
+; on the perspicacity of the user to detect that case.
+;
+TestTooLong:
+        mov     ah,9
+        mov     dx,offset TurnOverStr
+        int     21h
+        jmp     short ZTimerReportDone
+;
+; Convert the BIOS time to microseconds.
+;
+CalcBIOSTime:
+        mov     ax,[EndBIOSCountLow]
+        sub     ax,[StartBIOSCountLow]
+        mov     dx,54925                ;number of microseconds each
+                                        ; BIOS count represents
+        mul     dx
+        mov     bx,ax                   ;set aside BIOS count in
+        mov     cx,dx                   ; microseconds
+;
+; Convert timer count to microseconds.
+;
+        mov     ax,[EndTimedCount]
+        mov     si,8381
+        mul     si
+        mov     si,10000
+        div     si              ;* .8381 = * 8381 / 10000
+;
+; Add timer and BIOS counts together to get an overall time in
+; microseconds.
+;
+        add     bx,ax
+        adc     cx,0
+;
+; Subtract the timer overhead and save the result.
+;
+        mov     ax,[ReferenceCount]
+        mov     si,8381         ;convert the reference count
+        mul     si              ; to microseconds
+        mov     si,10000
+        div     si              ;* .8381 = * 8381 / 10000
+        sub     bx,ax
+        sbb     cx,0
+        mov     [CurrentCountLow],bx
+        mov     [CurrentCountHigh],cx
+;
+; Convert the result to an ASCII string by trial subtractions of
+; powers of 10.
+;
+        mov     di,offset PowersOfTenEnd - offset PowersOfTen - 4
+        mov     si,offset TimedCountStr
+CTSNextDigit:
+        mov     bl,'0'
+CTSLoop:
+        mov     ax,[CurrentCountLow]
+        mov     dx,[CurrentCountHigh]
+        sub     ax,PowersOfTen[di]
+        sbb     dx,PowersOfTen[di+2]
+        jc      CTSNextPowerDown
+        inc     bl
+        mov     [CurrentCountLow],ax
+        mov     [CurrentCountHigh],dx
+        jmp     CTSLoop
+CTSNextPowerDown:
+        mov     [si],bl
+        inc     si
+        sub     di,4
+        jns     CTSNextDigit
+;
+;
+; Print the results.
+;
+        mov     ah,9
+        mov     dx,offset OutputStr
+        int     21h
+;
+ZTimerReportDone:
+        pop     ds
+        pop     di
+        pop     si
+        pop     dx
+        pop     cx
+        pop     bx
+        pop     ax
+        MPOPF
+        ret
+
+ZTimerReport    endp
+
+        end
+
\ No newline at end of file
diff --git a/16/scrasm/LZTIMER.OBJ b/16/scrasm/LZTIMER.OBJ
new file mode 100644
index 00000000..0511b4d5
Binary files /dev/null and b/16/scrasm/LZTIMER.OBJ differ
diff --git a/16/scrasm/MAIN.ASM b/16/scrasm/MAIN.ASM
new file mode 100644
index 00000000..1c33a0fb
--- /dev/null
+++ b/16/scrasm/MAIN.ASM
@@ -0,0 +1,134 @@
+;;=======================================================================;;
+;;                                                                       ;;
+;; Scrolling Routines -- main program                                    ;;
+;;                                                                       ;;
+;; All other INC files are included here.  The main routines for the     ;;
+;; frame-by-frame execution loop are also here.  Finally I tried to keep ;;
+;; global variables stored in this file as well.                         ;;
+;;                                                                       ;;
+;;=======================================================================;;
+                dosseg
+                .model small
+                .386
+
+                .code
+                extrn   ZTimerOn:far, ZTimerOff:far, ZTimerReport:far
+
+INCLUDE constant.inc
+
+
+DW_TABLE        MACRO   inc,num
+                count = 0
+                number = 0
+                WHILE (count LT num)
+                        DW      number
+                        count = count + 1
+                        number = number + inc
+                        ENDM
+                ENDM
+
+DOSPRINT        MACRO   st
+                mov     ah,9
+                mov     dx,st
+                int     21h
+                ENDM
+
+EVEN
+Mult320         label   WORD
+MultBufWidth    label   WORD
+                DW_TABLE 320,200
+MultVirtWidth   label   WORD
+                DW_TABLE (VIRTUAL_WIDTH/4),200
+
+INCLUDE palette.inc
+INCLUDE keyb.inc
+INCLUDE modex.inc
+INCLUDE page.inc
+INCLUDE init.inc
+INCLUDE map.inc
+;INCLUDE sprite.inc NOT FOR NOW
+INCLUDE scroll.inc
+
+;; Various segments that need to be filled in later...
+EVEN
+segVideo        dw      0A000h          ; videoram segment
+segText         dw      0B800h          ; text segment
+segMap          dw      -1              ; Map info segment
+segTiles        dw      -1              ; Tile bitmap segment
+segBuffer       dw      -1              ; Local 320x200 buffer segment
+segCode         dw      -1              ; Code segment
+segPSP          dw      -1              ; PSP segment
+segPalette      dw      -1              ; Palette segment
+segTextPal      dw      -1              ; Saved text palette
+
+EVEN
+bDoTransition   db      0
+
+;; This routine is called for each frame.
+;; Right now it just scrolls, but later all sprite animation would
+;; occur here too.
+EVEN
+OneFrame        PROC    near
+                call    Scroll          ; Scrolls the screen
+;               call    AnimateSprites  ; prepares sprites on drawpage
+                jmp     FlipPage        ; shows drawpage...
+                ; no RET necessary
+OneFrame        ENDP
+
+;; Each frame -- call the frame motion code, then check for keyhit.
+EVEN
+MainLoop        PROC    NEAR
+next_frame:     call    OneFrame
+                JNKEY   next_frame
+                JKEYP   kESC,all_done   ; ESC -> quit, always
+                call    kprocCur
+                mov     al,bDoTransition
+                cmp     al,0
+                je      next_frame
+transition:     FLASH_OFF 16,segPalette
+                mov     bDoTransition,0
+                mov     ax,1
+                sub     ax,nMap
+                mov     nMap,ax         ; Flip maps
+
+                call    LoadData
+                call    update_full     ;<<<<
+                call    OneFrame
+                FLASH_ON 16,segPalette
+                jmp     next_frame
+all_done:       ret
+MainLoop        ENDP
+
+;; Beginning code -- Leaves text mode (saving the text screen) via
+;;                   a fade.  It loads the map data and draws one
+;;                   frame before it fades on.
+Beginning       PROC    near
+                NEW_PAL segTextPal
+                PAL_SAVE segTextPal
+                FADE_OFF 1,segTextPal
+                call    SaveVideo
+                MODEX_START             ; 320x200 Mode X graphics mode
+                PAL_BLACK
+
+                call    LoadData        ; This call will change...
+
+                call    update_full     ;<<<<
+                call    OneFrame
+                FADE_ON 1,segPalette
+                ret
+Beginning       ENDP
+
+;; Ending code -- restore to text mode via a flash
+Ending          PROC    near
+                FLASH_OFF 8,segPalette
+                call    RestoreVideo
+                FLASH_ON 8,segTextPal
+                ret
+Ending          ENDP
+
+                .data
+
+                .stack 2048
+
+                END Initialize
+
\ No newline at end of file
diff --git a/16/scrasm/MAIN.OBJ b/16/scrasm/MAIN.OBJ
new file mode 100644
index 00000000..ff612818
Binary files /dev/null and b/16/scrasm/MAIN.OBJ differ
diff --git a/16/scrasm/MAKEFILE b/16/scrasm/MAKEFILE
new file mode 100644
index 00000000..8b9557ae
--- /dev/null
+++ b/16/scrasm/MAKEFILE
@@ -0,0 +1,47 @@
+OBJS=main.obj lztimer.obj
+INCLUDES=modex.inc keyb.inc palette.inc page.inc scroll.inc map.inc \
+         constant.inc init.inc
+PROGRAM=scroll
+
+.c.obj:
+   cl -c -Zi -Od -W4 $*.c
+
+.asm.obj:
+   masm -ml -zi $*.asm
+
+.obj.exe:
+   link /CO @$*.lnk
+
+project:  scroll.exe gensq.exe genmap.exe genpal.exe
+
+main.asm:  $(INCLUDES)
+
+scroll.lnk:  makefile
+   echo $(OBJS: =+)  > $*.lnk
+   echo $(PROGRAM); >> $*.lnk
+
+scroll.exe: $(OBJS) scroll.lnk
+   link /CO @$*.lnk
+
+gensq.obj: $*.c
+
+gensq.lnk: makefile
+   echo $*.obj; > $*.lnk
+
+gensq.exe: $*.obj $*.lnk
+
+genmap.obj: $*.c
+
+genmap.lnk: makefile
+   echo $*.obj; > $*.lnk
+
+genmap.exe: $*.obj $*.lnk
+
+genpal.obj: $*.c
+
+genpal.lnk: makefile
+   echo $*.obj; > $*.lnk
+
+genpal.exe: $*.obj $*.lnk
+
+
\ No newline at end of file
diff --git a/16/scrasm/MAP.INC b/16/scrasm/MAP.INC
new file mode 100644
index 00000000..c87494c2
--- /dev/null
+++ b/16/scrasm/MAP.INC
@@ -0,0 +1,413 @@
+;; MAP in own segment allows map of tiles to be up to 65536 tiles in area
+;; which translates to about 16.8 million pixels of virtual screen.  This
+;; can be represented in almost any rectangle -- just set MAP_WIDTH.
+
+;; Sorry this code isn't commented -- I was working on it right up until
+;; the point that I released this.  You have any questions?  Ask away
+;; (my internet address is in the DOC file).
+
+MAPHEADER       STRUCT, NONUNIQUE
+                MapName BYTE    ""
+                Wid     WORD    2
+                Ht      WORD    3
+                Extent  WORD    4
+                OffX1   WORD    5
+                OffY1   WORD    6
+                OffX2   WORD    7
+                OffY2   WORD    8
+                WrapX   WORD    9
+                WrapY   WORD    10
+                Magic   WORD    11
+MAPHEADER       ENDS
+MapInfo         MAPHEADER <>
+
+
+; In: DS:DX = offset of filename
+LoadMapFile     PROC    near
+                mov     ax,segMap
+                cmp     ax,-1
+                je      map_not_loaded
+                sub     ax,(SIZEOF MAPHEADER) / 16
+                mov     es,ax
+                mov     ah,49h
+                int     21h
+                mov     nError,ERR_MEM
+                jc      lm_err
+                mov     segMap,-1
+
+map_not_loaded: call    LoadFile
+                jc      lm_err
+
+                mov     ds,dx
+                mov     si,0
+                mov     ax,cs
+                mov     es,ax
+                lea     di,MapInfo
+                mov     cx,(SIZEOF MAPHEADER) / 4
+            rep movsd
+
+                add     dx,(SIZEOF MAPHEADER) / 16
+                mov     cs:segMap,dx
+
+                mov     BlankPage.Valid,0
+                mov     ShowPage.Valid,0
+                mov     DrawPage.Valid,0
+
+                mov     upper_left,0
+                mov     ScrollPosX,0
+                mov     ScrollPosY,0
+                mov     ScrollDX,0
+                mov     ScrollDY,0
+
+lm_err:         ret
+LoadMapFile     ENDP
+
+LoadTilesFile   PROC    near
+                mov     ax,segTiles
+                cmp     ax,-1
+                je      tiles_not_loaded
+                mov     es,ax
+                mov     ah,49h
+                int     21h
+                mov     nError,ERR_MEM
+                jc      lt_err
+                mov     segMap,-1
+
+tiles_not_loaded: call    LoadFile
+                jc      lm_err
+                mov     segTiles,dx
+
+                mov     BlankPage.Valid,0
+                mov     ShowPage.Valid,0
+                mov     DrawPage.Valid,0
+
+lt_err:         ret
+LoadTilesFile   ENDP
+
+EVEN
+LoadData        PROC    near
+        ; Load squares from data file
+                mov     bx,nMap
+                shl     bx,1
+                mov     dx,fntblTiles[bx]
+                mov     ds,segCode
+                call    LoadTilesFile
+                ; returns Carry if error
+                jc      load_error
+
+        ; Load map from data file
+                mov     ds,segCode
+                mov     bx,nMap
+                shl     bx,1
+                mov     dx,fntblMap[bx]
+                call    LoadMapFile
+                ; returns Carry if error
+
+load_error:     ret
+LoadData        ENDP
+
+EVEN
+update_full     PROC
+                mov     ds,segTiles
+                mov     es,segVideo
+                mov     fs,segMap
+
+                mov     dx,SC_INDEX
+                mov     al,MAP_MASK
+                out     dx,al
+
+                mov     di,DrawPage.Address
+                add     di,upper_left
+                mov     bp,MapInfo.OffX1
+                add     bp,MapInfo.OffY1
+
+                mov     dx,MapInfo.WrapX
+
+                mov     ch,(VIRTUAL_WIDTH/SQUARE_WIDTH)
+draw_full_loop: push    cx
+                push    si
+                push    dx
+
+                mov     al,11h
+                mov     si,0
+
+update_f_loop:  mov     dx,SC_INDEX + 1
+                out     dx,al
+                push    bp
+                call    draw_col
+                pop     bp
+                sub     di,(VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4
+                add     si,(SQUARE_WIDTH * SQUARE_HEIGHT) / 4
+                shl     al,1
+                jnc     update_f_loop
+
+                pop     dx
+                dec     dx
+                jnz     update_f_go_on
+                mov     dx,MapInfo.Wid
+                sub     bp,dx
+update_f_go_on: inc     bp
+                pop     si
+                add     di,(SQUARE_WIDTH/ 4)
+                pop     cx
+                dec     ch
+                jnz     draw_full_loop
+
+
+
+
+
+
+
+
+
+
+
+                mov     dx,GC_INDEX
+                mov     ax,ALL_COPY_BITS
+                out     dx,ax
+
+                mov     dx,SC_INDEX
+                mov     ax,0F02h
+                out     dx,ax
+
+                mov     ds,segVideo
+                mov     si,DrawPage.Address
+                add     si,upper_left
+                mov     es,segVideo
+                mov     di,BlankPage.Address
+                add     di,upper_left
+                mov     cx,(VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4
+            rep movsb
+                mov     si,DrawPage.Address
+                add     si,upper_left
+                mov     di,ShowPage.Address
+                add     di,upper_left
+                mov     cx,(VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4
+            rep movsb
+
+                mov     dx,GC_INDEX
+                mov     ax,ALL_DRAW_BITS
+                out     dx,ax
+
+                ret
+update_full     ENDP
+
+EVEN
+update_left     PROC
+                mov     ds,cs:segTiles
+                mov     es,cs:segVideo
+                mov     fs,cs:segMap
+
+                mov     dx,SC_INDEX
+                mov     al,MAP_MASK
+                out     dx,al
+
+                mov     al,011h
+                mov     si,0
+                mov     di,cs:DrawPage.Address
+                add     di,cs:upper_left        ; becomes DI later
+                mov     bp,MapInfo.OffX1
+                add     bp,MapInfo.OffY1
+
+update_l_loop:  mov     dx,SC_INDEX + 1
+                out     dx,al
+                push    bp
+                call    draw_col
+                pop     bp
+                sub     di,(VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4
+                add     si,(SQUARE_WIDTH * SQUARE_HEIGHT) / 4
+                shl     al,1
+                jnc     update_l_loop
+
+                ret
+update_left     ENDP
+
+EVEN
+update_right    PROC    near
+                mov     ds,cs:segTiles
+                mov     es,cs:segVideo
+                mov     fs,cs:segMap
+
+                mov     dx,SC_INDEX
+                mov     al,MAP_MASK
+                out     dx,al
+
+                mov     bp,MapInfo.OffX2
+                add     bp,MapInfo.OffY1
+
+                mov     al,011h
+                mov     si,0
+
+                mov     di,cs:DrawPage.Address  ; becomes DI
+                add     di,cs:upper_left
+                add     di,(VIRTUAL_WIDTH - SQUARE_WIDTH) / 4
+
+update_r_loop:  mov     dx,SC_INDEX + 1
+                out     dx,al
+
+                push    bp
+                call    draw_col
+                pop     bp
+                sub     di,(VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4
+                add     si,(SQUARE_WIDTH * SQUARE_HEIGHT) / 4
+                shl     al,1
+                jnc     update_r_loop
+
+                ret
+update_right    ENDP
+
+EVEN
+update_top      PROC
+                mov     ds,cs:segTiles
+                mov     es,cs:segVideo
+                mov     fs,cs:segMap
+
+                mov     dx,SC_INDEX
+                mov     al,MAP_MASK
+                out     dx,al
+
+                mov     di,cs:DrawPage.Address
+                add     di,cs:upper_left
+                mov     bp,MapInfo.OffX1
+                add     bp,MapInfo.OffY1
+
+                mov     al,011h
+                mov     si,0
+
+update_top_loop:
+                mov     dx,SC_INDEX + 1
+                out     dx,al
+                push    bp
+                call    draw_row
+                pop     bp
+                sub     di,VIRTUAL_WIDTH / 4
+                add     si,(SQUARE_WIDTH * SQUARE_HEIGHT) / 4
+                shl     al,1
+                jnc     update_top_loop
+
+                ret
+update_top      ENDP
+
+EVEN
+update_bottom   PROC
+                mov     ds,cs:segTiles
+                mov     es,cs:segVideo
+                mov     fs,cs:segMap
+
+                mov     dx,SC_INDEX
+                mov     al,MAP_MASK
+                out     dx,al
+
+                mov     di,cs:DrawPage.Address
+                add     di,cs:upper_left
+                add     di,(VIRTUAL_WIDTH * (VIRTUAL_HEIGHT - SQUARE_HEIGHT)) / 4
+                mov     bp,MapInfo.OffX1
+                add     bp,MapInfo.OffY2
+
+                mov     al,011h
+                mov     si,0
+
+update_bottom_loop:
+                mov     dx,SC_INDEX + 1
+                out     dx,al
+                push    bp
+                call    draw_row
+                pop     bp
+                sub     di,VIRTUAL_WIDTH / 4
+                add     si,(SQUARE_WIDTH * SQUARE_HEIGHT) / 4
+                shl     al,1
+                jnc     update_bottom_loop
+
+                ret
+update_bottom   ENDP
+
+; Draws ONE plane of a single col
+EVEN
+draw_col        PROC    near
+        ; DI->upper left corner of col to draw
+        ; BP->col of map to draw
+        ; SI used to point at tiles
+        ; AX,CX used
+        ; BX used to push SI
+        ; DX unused
+                shl     eax,16  ; save it
+                mov     ax,MapInfo.WrapY
+
+                mov     cl,(VIRTUAL_HEIGHT / SQUARE_HEIGHT)
+do_col_loop:    mov     bx,si
+                mov     bh,byte ptr fs:[bp]     ; change tile #
+
+                mov     ch,SQUARE_HEIGHT
+do_col_sq_loop: mov     dl,byte ptr ds:[bx+2]
+                mov     dh,byte ptr ds:[bx+3]
+                shl     edx,16
+                mov     dl,byte ptr ds:[bx+0]
+                mov     dh,byte ptr ds:[bx+1]
+                mov     es:[di],edx     ; 32-bit write
+                add     di,VIRTUAL_WIDTH / 4
+                add     bx,4
+                dec     ch
+                jnz     do_col_sq_loop
+
+                add     bp,MapInfo.Wid
+                dec     ax
+                jnz     yayaya
+                mov     ax,MapInfo.Ht
+                sub     bp,MapInfo.Extent
+yayaya:
+
+                dec     cl
+                jnz     do_col_loop
+
+                shr     eax,16  ; restore it
+
+                ret
+draw_col        ENDP
+
+; Draws ONE plane of a single row
+EVEN
+draw_row        PROC    near
+                push    ax
+;               shl     eax,16  ; save ax
+
+                mov     ax,MapInfo.WrapX
+
+        ; DI->upper left corner of row to draw
+        ; BP->row of map to draw
+        ; SI used to point at tiles
+        ; AX,CX used
+        ; BX used to push SI
+        ; DX unused
+
+                mov     cl,(VIRTUAL_WIDTH / SQUARE_WIDTH)
+do_row_loop:    mov     bx,si
+                mov     bh,byte ptr fs:[bp]     ; change tile #
+
+                mov     ch,SQUARE_HEIGHT
+do_row_sq_loop: mov     dl,byte ptr ds:[bx+2]
+                mov     dh,byte ptr ds:[bx+3]
+                shl     edx,16
+                mov     dl,byte ptr ds:[bx+0]
+                mov     dh,byte ptr ds:[bx+1]
+                mov     es:[di],edx
+                add     di,(VIRTUAL_WIDTH / 4)
+                add     bx,4
+                dec     ch
+                jnz     do_row_sq_loop
+
+                add     di,(-VIRTUAL_WIDTH*SQUARE_HEIGHT + SQUARE_WIDTH) / 4
+                inc     bp
+                dec     ax
+                jnz     yayaya2
+                mov     ax,MapInfo.Wid
+                sub     bp,ax
+yayaya2:
+                dec     cl
+                jnz     do_row_loop
+
+;               shr     eax,16  ; restore it
+                pop     ax
+                ret
+draw_row        ENDP
+
\ No newline at end of file
diff --git a/16/scrasm/MODEX.INC b/16/scrasm/MODEX.INC
new file mode 100644
index 00000000..174aa354
--- /dev/null
+++ b/16/scrasm/MODEX.INC
@@ -0,0 +1,88 @@
+; ====================================================================
+; Entry points:
+; ====================================================================
+MODEX_START     MACRO
+                mov     ax,13h  ;let the BIOS set standard 256-color
+                int     10h     ; mode (320x200 linear)
+;               PALETTE_BLACK
+                call    ModifyForX
+                ENDM
+
+; ====================================================================
+; This is MODE-X code from Dr. Dobb's Journal, by Michael Abrash.
+; I modified it from 320x240 back to 320x200, and then to 512 virtual
+; width, for scrolling purposes.
+; ====================================================================
+
+; Mode X (320x240, 256 colors) mode set routine. Works on all VGAs.
+; ****************************************************************
+; * Revised 6/19/91 to select correct clock; fixes vertical roll *
+; * problems on fixed-frequency (IBM 851X-type) monitors.        *
+; ****************************************************************
+; Modified from public-domain mode set code by John Bridges.
+
+; Index/data pairs for CRT Controller registers that differ between
+; mode 13h and mode X.
+CRTParms label  word
+;       dw      00d06h  ;vertical total
+;       dw      03e07h  ;overflow (bit 8 of vertical counts)
+;       dw      04109h  ;cell height (2 to double-scan)
+;       dw      0ea10h  ;v sync start
+;       dw      0ac11h  ;v sync end and protect cr0-cr7
+;       dw      0df12h  ;vertical displayed = 480
+        dw      00014h  ;turn off dword mode                    *
+;       dw      0e715h  ;v blank start
+;       dw      00616h  ;v blank end
+        dw      0e317h  ;turn on byte mode                      *
+
+        dw      (VIRTUAL_WIDTH*32)+13h  ; width of screen = VWid   NEW
+;       dw      09012h  ;vertical displayed = 400 (already like this)
+CRT_PARM_LENGTH equ     (($-CRTParms)/2)
+
+ModifyForX      PROC    near
+                mov     dx,SC_INDEX
+                mov     ax,0604h
+                out     dx,ax           ;disable chain4 mode
+                mov     ax,0100h
+                out     dx,ax           ;synchronous reset while setting Misc
+                                        ; Output for safety, even though clock
+                                        ; unchanged
+                mov     dx,MISC_OUTPUT
+                mov     al,0e3h
+                out     dx,al           ;select 25 MHz dot clock & 60 Hz scanning rate
+
+                mov     dx,SC_INDEX
+                mov     ax,0300h
+                out     dx,ax           ;undo reset (restart sequencer)
+
+                mov     dx,CRTC_INDEX   ;reprogram the CRT Controller
+                mov     al,11h          ;VSync End reg contains register write
+                out     dx,al           ; protect bit
+                inc     dx              ;CRT Controller Data register
+                in      al,dx           ;get current VSync End register setting
+                and     al,7fh          ;remove write protect on various
+                out     dx,al           ; CRTC registers
+                dec     dx              ;CRT Controller Index
+                cld
+                push    cs
+                pop     ds
+                mov     si,offset CRTParms ;point to CRT parameter table
+                mov     cx,CRT_PARM_LENGTH ;# of table entries
+SetCRTParmsLoop:
+                lodsw                   ;get the next CRT Index/Data pair
+                out     dx,ax           ;set the next CRT Index/Data pair
+                loop    SetCRTParmsLoop
+
+                mov     dx,SC_INDEX
+                mov     ax,0f02h
+                out     dx,ax           ;enable writes to all four planes
+                mov     ax,SCREEN_SEG   ;now clear all display memory, 8 pixels
+                mov     es,ax           ; at a time
+                sub     di,di           ;point ES:DI to display memory
+                sub     ax,ax           ;clear to zero-value pixels
+                mov     cx,8000h        ;# of words in display memory
+            rep stosw                   ;clear all of display memory
+
+                ret
+ModifyForX      ENDP
+
\ No newline at end of file
diff --git a/16/scrasm/PAGE.INC b/16/scrasm/PAGE.INC
new file mode 100644
index 00000000..a55bdf0b
--- /dev/null
+++ b/16/scrasm/PAGE.INC
@@ -0,0 +1,109 @@
+;; ====================================================================
+;; (Code follows)
+;; ====================================================================
+
+EVEN
+upper_left      dw      0               ; Stores upper left corner offset
+                                        ; relative to page offset.
+pages           dw      0               ; for counting frame-per-sec
+
+PAGE_INFO       STRUCT 2,NONUNIQUE
+                Address         dw      0
+                UpperLeftAddress dw     0
+                MapPosX         dw      0
+                MapPosY         dw      0
+                Alignment       db      0
+                AlignmentMask   db      0
+                ScrollOffset    dw      0
+                Rectangles      dw      0
+                Valid           db      0
+PAGE_INFO       ENDS
+
+DrawPage        PAGE_INFO <PAGE_0,PAGE_0>
+ShowPage        PAGE_INFO <PAGE_1,PAGE_1>
+BlankPage       PAGE_INFO <PAGE_2,PAGE_2>
+
+ROTATE3         MACRO   reg,item
+                mov     reg,cs:ShowPage.item
+                xchg    reg,cs:BlankPage.item
+                xchg    reg,cs:DrawPage.item
+                mov     cs:ShowPage.item,reg
+                ENDM    ; Leaves ShowPage.item in reg!
+
+;; This procedure is used to flip between the three available pages.
+;; Originally from Dr. Dobb's Journal's Graphics Programming column by
+;; Michael Abrash, I've reworked the code to be more specific to my
+;; own purposes, and commented it more.
+EVEN
+FlipPage        PROC    near
+        ; This series of instructions circles the show_page, blank_page,
+        ; and draw page appropriately and leaves the current page to show
+        ; in AX.  Note that it's a lot more instructions than it looks like,
+        ; but I unrolled the copy loop for speed.  So-so good idea, because
+        ; if you add a field and forget to rotate it, it could mean trouble!
+                ROTATE3 ax,Rectangles
+                ROTATE3 ax,ScrollOffset
+                ROTATE3 ax,MapPosX
+                ROTATE3 ax,MapPosY
+;               ROTATE3 al,AlignmentMask        SPRITES ...
+                ROTATE3 al,Alignment
+                mov     di,ax           ; DI = scroll offset low, and
+                                        ; garbage in the high bits...
+                and     di,3            ; DI = pixel pan, 0 to 3.
+                shl     di,1            ; Mode X requires 0 2 4 or 6.
+                ROTATE3 ax,Address
+                ROTATE3 al,Valid
+                ROTATE3 ax,UpperLeftAddress ; Leaves AX=ShowPage.ULAddr
+
+                add     ax,cs:ShowPage.ScrollOffset
+
+        ; AX is set up to be the current show page already.
+        ; By pre-loading BX with the low-address set code, and CX with
+        ; the high-address set code, we can more quickly flip the page
+        ; after the vertical retrace period.
+                mov     bl,START_ADDRESS_LOW    ;preload for fastest
+                mov     bh,al                   ; flipping once display
+                mov     cl,START_ADDRESS_HIGH   ; enable is detected
+                mov     ch,ah
+
+        ; Wait for display enable to be active (status is active low), to be
+        ; sure both halves of the start address will take in the same frame.
+                mov     dx,INPUT_STATUS_1
+WaitDE:         in      al,dx
+                test    al,01h
+                jnz     WaitDE  ;display enable is active low (0 = active)
+
+        ; Set the start offset in display memory of the page to display.
+                mov     dx,CRTC_INDEX
+                mov     ax,bx
+                out     dx,ax   ;start address low
+                mov     ax,cx
+                out     dx,ax   ;start address high
+
+        ; Now wait for vertical sync, so the other page will be invisible when
+        ; we start drawing to it.
+                mov     dx,INPUT_STATUS_1
+WaitVS:         in      al,dx
+                test    al,08h
+                jz      WaitVS  ;vertical sync is active high (1 = active)
+
+        ; Finally, have to adjust the pixel panning register in order
+        ; to fine-tune the starting address on a pixel level.
+        ; This pixel pan value is the scroll offset mod 4 -- but since
+        ; Mode X's pixel pan works by values of 2 (0, 2, 4 or 6) we
+        ; have to shift appropriately.
+                mov     dx,ATC_INDEX
+                mov     al,13h  ; 13h = set pixel pan
+                out     dx,al
+                mov     ax,di   ; DI = pixel pan calculated above
+                out     dx,al
+                mov     dx,ATC_INDEX
+                mov     al,32   ; Allows the computer to use this register
+                out     dx,al   ; again.  Without this OUT, the screen will
+                                ; remain blank!
+
+        ; Increment the page counter now!
+                inc     cs:pages
+                ret
+FlipPage        ENDP
+
\ No newline at end of file
diff --git a/16/scrasm/PALETTE.INC b/16/scrasm/PALETTE.INC
new file mode 100644
index 00000000..e3bd381e
--- /dev/null
+++ b/16/scrasm/PALETTE.INC
@@ -0,0 +1,239 @@
+;; Palette operations
+;; Note that where needed in the macros, a "palette" refers to
+;; the segment handle to a 768-byte piece of memory.  So palettes
+;; can be loaded and freed, they're not permanent, but if you want
+;; to use a fixed (not allocated) palette you'd better make sure
+;; it's segment aligned or else you can't use these macros.  If it
+;; is, you can just supply "seg myPalette" as the 'palette' argument
+;; to any of these macros.
+
+;; Fade from a palette to black
+FADE_OFF        MACRO   fade,palette
+                mov     si,0
+                mov     ds,palette
+                mov     bh,fade         ; positive -> Gets dimmer...
+                mov     bl,0            ; Starts exact
+                mov     cx,64/fade+1    ; Total number of loops required
+                call    FadePalette
+                ENDM
+
+;; Fade from black to a palette
+FADE_ON         MACRO   fade,palette
+                mov     si,0
+                mov     ds,palette
+                mov     bh,-fade        ; negative -> Gets brighter...
+                mov     bl,64           ; Starts totally dimmed
+                mov     cx,64/fade+1    ; Total number of loops required
+                call    FadePalette
+                ENDM
+
+;; Flash from a palette to white
+FLASH_OFF       MACRO   fade,palette
+                mov     si,0
+                mov     ds,palette
+                mov     bh,-fade        ; negative -> gets brighter
+                mov     bl,0            ; Starts exact
+                mov     cx,64/fade+1    ; Total number of loops required
+                call    FadePalette
+                ENDM
+
+;; Flash from white to a palette
+FLASH_ON        MACRO   fade,palette
+                mov     si,0
+                mov     ds,palette
+                mov     bh,fade         ; positive -> Gets dimmer...
+                mov     bl,-64          ; Starts totally bright
+                mov     cx,64/fade+1    ; Total number of loops required
+                call    FadePalette
+                ENDM
+
+;; Save a palette into a palette-sized piece of memory
+PAL_SAVE        MACRO   palette
+                mov     es,palette
+                mov     di,0
+                call    SavePalette
+                ENDM
+
+; Returns AX = a new segment for a palette
+NEW_PAL         MACRO   palette
+                mov     bx,(256 * 3) / 16
+                mov     ah,48h
+                int     21h
+                mov     palette,ax
+                ENDM
+
+;; Black the entire palette temporarily.  Used to blank the screen while
+;; drawing a frame before fading in.
+PAL_BLACK       MACRO
+                mov     ax,seg tmppal
+                mov     ds,ax
+                mov     si,OFFSET tmppal
+                mov     bh,-1           ; Doesn't really matter...
+                mov     bl,64           ; Starts totally dimmed
+                mov     cx,1            ; Just one time -- to leave it black
+                call    FadePalette
+                ENDM
+
+;; drawing a frame before fading in.
+PAL_WHITE       MACRO
+                mov     ax,seg tmppal
+                mov     ds,ax
+                mov     si,OFFSET tmppal
+                mov     bh,-1           ; Doesn't really matter...
+                mov     bl,-64          ; Starts totally dimmed
+                mov     cx,1            ; Just one time -- to leave it black
+                call    FadePalette
+                ENDM
+
+;; Black the entire palette temporarily.  Used to blank the screen while
+;; drawing a frame before fading in.
+PAL_UPDATE      MACRO
+                mov     cx,0            ; 0 times = update
+                call    FadePalette
+                ENDM
+
+WAITBORDER      MACRO
+                LOCAL   wbr1,wbr2
+                mov     dx,INPUT_STATUS_1
+wbr1:           in      al,dx
+                test    al,8
+                jnz     wbr1
+wbr2:           in      al,dx
+                test    al,8
+                jz      wbr2
+                ENDM
+
+;; Fade Palette:
+;; The following code is modified greatly from the Future Crew's palette
+;; fading code.  Works on blocks of 256 colors only, so far, but I might
+;; change it later.  Also, it theoretically could "anti-fade" -- fade to
+;; white -- which I call flashing, so I added that ability, which was
+;; missing from FC's code.
+EVEN
+tmppal          DB      768 dup (?)      ; Stores old palette
+FadePalette     PROC NEAR
+                mov     ax,seg tmppal
+                mov     es,ax
+
+FadeLoop:       push    cx
+                push    si
+
+                cmp     cx,0
+                je      JustUpdate
+
+        ; Load in the colors in the palette
+                mov     di,OFFSET tmppal ; ES:DI -> temp palette
+                mov     cx,768          ; Reads 256*3 bytes at a time.
+loadpal_loop:   mov     al,ds:[si]      ; Load one color byte
+                inc     si
+                sub     al,bl           ; Subtract the fade amount
+                jge     pal_more        ; Limit the range by clipping
+                xor     al,al           ;  to between 0 and 63
+                jmp     pal_ok          ; (there's probably a faster
+pal_more:       cmp     al,63           ; way to do it than this,
+                jle     pal_ok          ; but I don't know it)
+                mov     al,63
+pal_ok:         mov     es:[di],al      ; Store that byte in the new
+                inc     di
+                dec     cx              ;  temp palette and loop.
+                jnz     loadpal_loop
+
+        ; Get ready to move this block of palette values
+JustUpdate:     sti                     ; Let interrupts happen now,
+                WAITBORDER              ;  while waiting for a retrace,
+                cli                     ;  instead of more critical times
+
+                mov     dx,PEL_WRITE_REG; Set up to write to color register,
+                xor     al,al           ; starting at palette entry 0.
+                out     dx,al
+                mov     dx,PEL_DATA_REG ; Point at color port
+
+        ; Quickly put out the first half of the color palette
+                mov     di,OFFSET tmppal
+                mov     cl,(768/6)/2    ; Does 2 loops of 128 colors each.
+                cli                     ;  Waits a retrace inbetween...
+FirstHalfLoop:  REPEAT 6                ; Steps of 6 -- reduces the
+                mov     al,es:[di]      ; number of LOOP instructions
+                inc     di
+                out     dx,al
+                ENDM
+                dec     cl
+                jnz     FirstHalfLoop
+                sti
+
+                WAITBORDER              ; Waits one retrace -- less flicker
+                mov     dx,PEL_DATA_REG ; Reset DX
+
+        ; Now, quickly put out the other half of the colors.
+                mov     cl,(768/6)/2
+                cli
+SecondHalfLoop: REPEAT 6                ; Steps of 6 -- reduces the
+                mov     al,es:[di]      ; number of LOOP instructions
+                inc     di
+                out     dx,al
+                ENDM
+                dec     cl
+                jnz     SecondHalfLoop
+
+        ; For the next iteration, restore everything and loop
+                pop     si
+                pop     cx
+
+                cmp     cx,0
+                je      JustUpdated
+
+                add     bl,bh           ; Change brightness by BH
+
+                dec     cx
+                jnz     FadeLoop
+
+        ; All done, re-enable interrupts and return
+JustUpdated:    sti
+                ret
+FadePalette     ENDP
+
+;; Saves the palette into the memory pointed at by DS:SI.  That memory
+;; must be at least 768 bytes long...
+SavePalette     PROC NEAR
+                mov     dx,PEL_READ_REG ; Set up to read from color register,
+                xor     al,al           ; starting at palette entry 0.
+                out     dx,al
+                mov     dx,PEL_DATA_REG
+
+        ; Quickly read in the first half of the color palette
+                mov     cl,(768/6)
+                cli
+ReadPalLoop:    REPEAT 6                ; Steps of 6 -- reduces the
+                in      al,dx           ; number of LOOP instructions
+                mov     es:[di],al
+                inc     di
+                ENDM
+                dec     cl
+                jnz     ReadPalLoop
+        ; All done, re-enable interrupts and return
+                sti
+                ret
+SavePalette     ENDP
+
+;; Load a palette from a file.  Opens the file and reads it into
+;; memory (standard LoadFile) and then points the palette at that
+;; newly allocated memory.  Also, frees old memory before it does
+;; any loading ...
+LoadPaletteFile PROC    near
+                mov     ax,segPalette
+                cmp     ax,-1
+                je      pal_not_loaded
+                mov     es,ax
+                mov     ah,49h
+                int     21h
+                mov     nError,ERR_MEM
+                jc      lp_err
+                mov     segPalette,-1
+
+pal_not_loaded: call    LoadFile
+                jc      lp_err
+
+                mov     segPalette,dx
+lp_err:         ret
+LoadPaletteFile ENDP
+
\ No newline at end of file
diff --git a/16/scrasm/SCROLL.DOC b/16/scrasm/SCROLL.DOC
new file mode 100644
index 00000000..0893db36
--- /dev/null
+++ b/16/scrasm/SCROLL.DOC
@@ -0,0 +1,297 @@
+            ________________________________________________
+           |+----------------------------------------------+|
+           ||          I N T R O D U C I N G :             ||
+           |+----------------------------------------------+|
+           ||               Steve's 4-Way                  ||
+           ||    ___    ___   ____     ___    _      _     ||
+           ||   / __|  / __| |  _ \   / _ \  | |    | |    ||
+           ||  | <_   | |    | |_> | | | | | | |    | |    ||
+           ||   \_ \  | |    |    /  | | | | | |    | |    ||
+           ||   __> | | |__  | |\ \  | |_| | | |__  | |__  ||
+           ||  |___/   \___| |_| \_|  \___/  |____| |____| ||
+           |+______________________________________________+|
+           +------------------------------------------------+
+
+  There, now that I have the hype outta the way, let me explain what
+this program is.  I'm releasing the source code to my 4-way scrolling
+code so that others can learn from it.  There aren't enough really
+good resources out there for someone learning to program games, so I'm
+trying to do my part to help.
+
+WHAT IT IS:
+
+  The code is 100% assembly, for which I use MASM 6.0, so there may
+be a few problems converting to Turbo Assembler.  I also use the ".386"
+directive, meaning that you can't run this code with a 286 or earlier.
+But most of the code should be easily convertible.  I haven't been
+programming for 386's much so I really don't make the use of the 386
+registers like I could have.  Mostly I just did it for some extra 386
+instructions.
+
+  You'll need a VGA which can support mode 13h, the MCGA mode.  This
+code runs in "tweaked" MCGA mode, or what is called "Mode X".  For more
+information on Mode X, check out the 1991 - 1992 issues of Doctor Dobbs
+Journal, wherein you will find Michael Abrash's excellent Graphics
+Programming column.  This is where I (and many others) found out about
+Mode X, which is an excellent graphics mode for fast 256-color graphics.
+Also, you can take a look at XLIB, YakIcons, FastGraph, etc which are all
+graphics libraries (public domain or otherwise) which support Mode X
+graphics and probably have some good documentation on programming the mode.
+Additionally, check out _The Programmer's Guide to the EGA and VGA Cards_,
+by Richard Ferraro, and _Power Graphics Programming_ (out of print, but
+available directly from Que Books) by Michael Abrash.  Finally, you can
+ask about graphics programming on many newsgroups such as
+"rec.games.programmer"...
+
+WHAT IT DOES:
+
+  The code will allow you to create "tiled" background patterns and then
+to omnidirectionally scroll over them.  You could implement sprite routines
+and then animate them over the background, but I haven't gotten this far
+yet.  The scrolling is always relational -- ie no "jump-to"'s, just "scroll
+left", "scroll up", etc.  Jump to would be very easy to implement, I just
+haven't done it yet.
+
+  It runs at about 60-70 fps on a 386/20, which means that it is operating
+in under the time of one vertical refresh (_just_ under, according to some
+timing I've done).  This could probably be reduced, but the best way to
+reduce it is to limit the speed at which it scrolls -- if you stick to
+scrolling at most 8 pixels at a time in two axes or 16 pixels at a time
+in one axis, it is very fast.  More than that, and it occasionally takes
+more than one refresh period even on my 486.  Still, that should be
+fast enough for just about any game.
+
+  I also included some routines to generate maps, tiles, and palettes
+so you can see the file formats.  These are in C, and the executables
+are around in case you don't care to recompile.  None of the utilities
+are exactly production quality.  You'll have to look at the code to
+figure out the arguments!  Luckily you can just run them with no args
+and they perform default behavior.
+
+  Lastly, the program SCROLL.EXE is a demo of what it can do.  In this
+demo you can use one of two sets of keyboard controls to scroll around.
+One, the default set of commands, lets you press up/down/left/right and
+scroll in that direction.  The other has "intertia" -- pressing up/down
+left/right will accelerate you in that direction.  You'll see what I
+mean, just experiment.  You can switch keyhandlers by pressing K.
+You can also switch between the diagonal pattern map and a logo map
+by pressing M.  (By the way, it will eventually run out of memory loading
+the maps and the diagonal map will screw up... don't worry about it,
+it'd be fixed if I had more time).  Try it out.
+
+CREDIT WHERE CREDIT IS DUE:
+
+  People who (unknowingly) helped me out:
+
+        Keyboard by Steven Dollins, Brown Computer Group.  From his
+                KEYINT routines, which is an INT 9 handler to let you
+                keep track of many keys being pressed at the same time.
+        Graphics, basically, by Michael Abrash, whose Mode X columns
+                influenced me greatly.
+        Palette fades and file I/O by the Future Crew.  Thanks for
+                letting out the Mental Surgery code!
+        CPU detection by Ray Duncan, taken from one of his books.
+
+  Obviously I haven't just pirated the code, it's all from publicly
+released source code and I modified it a bit.  But I wouldn't have come
+up with this whole thing without those helping hands.  Thanks.
+
+HOW IT WORKS:
+
+  Here's how the scrolling works.  I'll explain it from a single-page
+point of view, although it actually uses several pages of video memory.
+The video memory is laid out like this:
+ÉÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÑÍÍÍÍÍÍÍÍÍÍÍÍ» ÄÄÄ
+º                                ³ /  /  /  / º  ³
+º                                ³/  /  /  /  º  ³
+º                                ³  /  /  /  /º  ³
+º           Visible page         ³ / Not /  / º  ³
+º                                ³/ visible/  º  ³
+º                                ³  /  /  /  /º 64K
+º                                ³ /  /  /  / º  ³
+ÇÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÙ/  /  /  /  º  ³
+º  /  /  /  /  /  /  /  /  /  /  /  /  /  /  /º  ³
+º /  /  /  /  /  /  /  /  /  /  /  /  /  /  / º  ³
+º/  /  /  /  /  /  /  /  /  /  /  /  /  /  /  º  ³
+ÈÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍ¼ ÄÄÄ
+In other words, it has a virtual width greater than the actual screen
+width, and a virtual height higher than the actual screen height.  The
+VGA hardware allows hardware panning around within the virtual area, so
+that makes panning much easier:  you only have to draw the information
+that is coming on to the screen with each pan.
+
+What is Happening:      What the user sees:
+ÉÍÍÍÍÍÍÍÍÑÍÍÍÍ»          ÚÄÄÄÄÄÄÄÄ¿
+º     hel³////º          ³     hel³           The picture that is
+ÇÄÄÄÄÄÄÄÄÙ////º          ÀÄÄÄÄÄÄÄÄÙ           coming on to the screen
+º/////////////º                               ("hello") appears to
+ÈÍÍÍÍÍÍÍÍÍÍÍÍÍ¼                               the user to be scrolling
+ÉÍÑÍÍÍÍÍÍÍÑÍÍÍ»          ÚÄÄÄÄÄÄÄÄ¿           left, although it is
+º/³   hell³///º          ³    hell³           actually at a stationary
+º/ÀÄÄÄÄÄÄÄÙ///º          ÀÄÄÄÄÄÄÄÄÙ           location in memory...
+º/////////////º                               Each time the frame moves,
+ÈÍÍÍÍÍÍÍÍÍÍÍÍÍ¼                               it is not necessary to
+ÉÍÍÑÍÍÍÍÍÍÍÑÍÍ»          ÚÄÄÄÄÄÄÄÄ¿           redraw the parts that stay
+º//³  hello³//º          ³   hello³           on the screen, just the
+º//ÀÄÄÄÄÄÄÄÙ//º          ÀÄÄÄÄÄÄÄÄÙ           parts that become visible.
+º/////////////º
+ÈÍÍÍÍÍÍÍÍÍÍÍÍÍ¼
+
+  The same works up&down too, or even left/right and up/down at the same
+time.  The problem occurs when you scroll enough to hit the edge of the
+virtual space.  Luckily, video memory increases and wraps at the right
+edge to one line down on the left edge.  So you end up with a situation
+like this after scrolling too far right:
+ÉÍÍÍÍÍÍÍÍÍÍÍÍÍÍÑÍÍÍÍ» ÄÄÄ
+ÇÄÄÄÄÄÄÄ¿//////³    º  ³        User sees:
+º       ³//////³  Thº  ³       ÚÄÄÄÄÄÄÄÄÄÄÄ¿
+ºe quick³//////³    º 64K      ³           ³
+º       ³//////ÀÄÄÄÄ¶  ³       ³  The quick³
+ÇÄÄÄÄÄÄÄÙ///////////º  ³       ³           ³
+º///////////////////º  ³       ÀÄÄÄÄÄÄÄÄÄÄÄÙ
+ÈÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍ¼ ÄÄÄ
+The wrapping is transparent to the user.  So, it appears that you can
+scroll left & right infinitely, by simply always updating the amount of
+memory that has scrolled into view.
+
+  But what happens when you scroll too far down?  Now Intel segments come
+to the rescue!  Because the video memory is 64K, and that is also the
+largest amount of memory you can access in a segment, the segment arithmetic
+performs the top-to-bottom wrapping for me.  It results in a similar
+situation as is pictured above, but with the screen split horizontally
+instead of vertically.  Again, it's completely transparent to the user.
+
+  One performance optimization that I've done is to organize the background
+picture that is being scrolled into quantitized "tiles" -- 16x16 pixels in
+area.  This means that you can store a large amount of picture data if that
+data is repetitive -- as the backgrounds of many video games are.  This also
+helps when figuring out how much new stuff to draw on the screen.  I can wait
+until the panning crosses a 16-pixel border, then draw another 16-pixel
+strip, and then wait for another tile crossing, etc.  You can see this in
+the MAP.INC and SCROLL.INC code.  16x16 pixels also leads to 256-pixel-square
+tiles, which is always a convenient number in assembly... it helps out in
+several places in the code.
+
+  So, the display page is "wandering" around the video memory, only drawing
+what is necessary at any time.  Meanwhile you can animate sprites over the
+background, etc.  The only problem is that with one page, information is
+constantly being drawn to that page and you can never guarantee that it is in
+a correct state at the time of a vertical refresh period.  Instead, I actually
+use several pages, so that one can be shown while the other is worked on.
+This guarantees a perfect picture at any time.  So for now, let's ignore the
+scrolling for a second, and talk about the paging, because it's easier to
+understand the paging if scrolling isn't happening.
+
+  Here's a basic explanation of how the paging works.  I use three separate
+pages, a Draw page, a Show page, and a Blank page.  The Show page refers to
+the page that is currently showing, the Draw page to the page that is
+under construction (to be shown next frame), and the Blank page should always
+be maintained as an up-to-date blank background page.  (The Blank page is
+useful for sprite programming which I am going to be doing next.)  Each
+of the pages is 352x240, although the screen resolution is only 320x200.
+
+  Each frame, the pages rotate DrawPage->ShowPage->BlankPage->DrawPage.
+This means that at the beginning of the frame, the Draw Page is already
+blank, so all that is necessary is to draw on a bunch of sprites.  The
+BlankPage, though, is no longer blank (it's still got stuff from what
+was the ShowPage) so we have to erase it, by blanking out the sprites
+(luckily the new DrawPage _is_ empty, so we can use a Mode X 32-bit video-
+to-video copy to blank it).  Hope you're still with me.
+
+  So, this loop continues with each frame, and the loop invariants are
+maintained:  Show Page is always a "good" frame -- don't touch it.  Blank
+Page is always blank.  Draw Page can look like anything.  Now to include
+the scrolling again:
+
+  The way I do scrolling with several pages is that the pages ALL wander
+around video memory, only they're smaller (1/3 of the size that they could
+have been, to be exact!).  Here's a picture of the situation at its worst:
+ÉÍÍÍÍÍÍÍÍÍÍÍÍÍÍ» ÄÄÄ
+º  ³//ÀÄÄÄÄÄÄÄÄº  ³
+ºÄÄÙ///////////º  ³
+º/////ÚÄÄÄÄÄÄÄÄº  ³
+ºÄÄ¿//³PAGE 0  º  ³
+º  ³//³ (Draw) º  ³
+º  ³//ÀÄÄÄÄÄÄÄÄº  ³
+ºÄÄÙ///////////º 64K
+º/////ÚÄÄÄÄÄÄÄÄº (21K each page)
+ºÄÄ¿//³PAGE 1  º  ³
+º  ³//³ (Show) º  ³
+º  ³//ÀÄÄÄÄÄÄÄÄº  ³
+ºÄÄÙ///////////º  ³
+º/////ÚÄÄÄÄÄÄÄÄº  ³
+ºÄÄ¿//³PAGE 2  º  ³
+º  ³//³ (Blank)º  ³
+ÈÍÍÍÍÍÍÍÍÍÍÍÍÍÍ¼ ÄÄÄ
+The pages always maintain an equal distance apart as they wander.  Since
+all pages move in parallel, the way it used to work is that as soon as the
+pages scrolled, I would draw the newly-visible picture information on
+all three of the pages.  This worked great, except that it resulted in
+a slight pause every time the screen scrolled because it was doing hardware
+pan most of the time (which is very fast) and the drawing routines were
+slow.  Now, I've spread the copying over successive frames to allow a
+smoother scrolling rate.  This is possible because it's not really necessary
+to draw the new information to a page before that page becomes the show
+page...
+
+  I hope that this has made some sense.  It's pretty complicated stuff.
+Take a look at the code and maybe that will help.  Or, write me mail
+(my email address is below).  Or, design your own way and ignore this
+whole thing.
+
+COMING SOON:
+
+  Next up are Sprite routines.  I threw in what I started as SPRITE.INC,
+although it's not included in the project right now.
+  Sound support
+
+  Who knows what else?  Depends on what people send me!
+
+-------------------------------------------------------------------------
+                           R E A D   T H I S
+-------------------------------------------------------------------------
+                           R E A D   T H I S
+-------------------------------------------------------------------------
+
+  This code is being released as "SwapWare".  That means that if you wanted
+to go ahead and use my code directly, I really wouldn't care.  But I ask
+that you send me some of your code that you think is neat.  Especially if
+it's modifications that you make to this code, such as quick sprite drawing
+or optimizations.
+
+  I'm not going to brag and say that I "threw this together in a few hours".
+I didn't, it took me many days of work to get it working properly.  But
+I'm also not looking for money as recompensation for my labor.  I make
+great money at my real day job and you probably have a better use for your
+donations, such as legitimizing your unregistered shareware and pirated
+games.  I'm in this for the knowledge ... so my best payback would be to
+get lots of code from people out there, stuff to really help make a great
+game.  In particular, these would be great:
+        * 32-bit code
+        * Tricky optimizations
+        * Fast BitBlt/masked BitBlt code
+        * Useful File I/O functions
+        * 3D polygon and texture mapping code
+        * Maintenance routines -- like numeric conversions, etc.
+        * Hardware access code like timing routines and interrupt
+          handlers
+Any of those would be very helpful when writing a fast scrolling game.
+
+You can contact me (for the rest of this term only) at
+        seisner@athena.mit.edu
+Feel free to ask any questions you want!  I check my mail about once or
+twice a week so don't expect instant turnaround...  If you're desperate
+to talk to me, say if you work at Origin and want to give me the source
+code to Strike Commander or whatnot, you can also reach me at:
+        Steve Eisner
+        (617) 247-8691
+and leave a message.  But I'd rather you wrote e-mail.
+
+                                        Thanks,
+                                           Steve Eisner
+
+*  Read rec.games.programmer!   And for those who already do:
+   I dream of a world where no one argues over why Wolfenstein
+   3-D sucks or why it doesn't.  Would people just give it a
+   break?
+
\ No newline at end of file
diff --git a/16/scrasm/SCROLL.EXE b/16/scrasm/SCROLL.EXE
new file mode 100644
index 00000000..8181acf1
Binary files /dev/null and b/16/scrasm/SCROLL.EXE differ
diff --git a/16/scrasm/SCROLL.INC b/16/scrasm/SCROLL.INC
new file mode 100644
index 00000000..be58c025
--- /dev/null
+++ b/16/scrasm/SCROLL.INC
@@ -0,0 +1,441 @@
+;; Global variables used here ...
+EVEN
+ScrollPosX      dw      0       ; Scroll origin, upper-left X
+ScrollPosY      dw      0       ; Scroll origin, upper-left Y
+ScrollDX        dw      0       ; Amount to change scroll origin, X
+ScrollDY        dw      0       ; Amount to change scroll origin, Y
+
+;; SCROLL:
+;; This routine takes care of all of the scrolling, however it calls
+;; outside drawing routines to update the screen.  Scrollx and
+;; Scrolly determine the amount to scroll by.
+;; Note that this does only RELATIVE scrolling, not absolute scrolling.
+;; Scroll saves time by updating only up to the one row or column of
+;; tiles which have come into view due to a change in scroll offset.
+;; In other words, it's not good for "jumping" to a particular point,
+;; although this effect can be accomplished in other ways -- the draw_full
+;; routine is available to draw a full screen again.
+;; Sometimes this means that you will have to calculate values ahead of
+;; time, for instance if you wish the scrolling to keep a certain sprite
+;; in the center of the screen.  In this case, just set ScrollDX and
+;; ScrollDY to the delta-x and delta-y of the sprite.
+;; * Newly added:
+;; Since there are three pages, it is necessary to keep each one of them
+;; up to date with each scroll.  Recently, I was doing some fast (8+
+;; pixels per frame) scrolling and noticed that there was a significant
+;; pause when the screen snapped to a new origin.  (The origin is always
+;; at a square's corner, even though it may not look like it because it
+;; disguises things by smooth-panning the hardware.)  Every time it
+;; scrolled, it was drawing the new information and copying it to the
+;; two other planes.  I've now distributed the load over successive
+;; pages, in other words it doesn't copy the new info all at once, but
+;; over several frames.  This really smoothed out the scrolling so that
+;; while there are still some jumps, they only occur very infrequently
+;; and then only at 15 or 16 pixel/frame scroll rates...)  That's the
+;; "catchup" code at the bottom, and that's why it's more complex than
+;; it maybe could be...
+EVEN
+Scroll          PROC    near
+        ; Using the ScrollDX variable as delta-x, move the scroll-origin
+        ; in the x direction.  Then, if the visible screen is now
+        ; viewing invalid data, snap the origin to a new point and
+        ; draw any new columns that are necessary.
+do_x_scroll:    mov     ax,cs:ScrollPosX
+                add     ax,cs:ScrollDX           ; ScrollDX is a delta-x
+                jl      wrap_l                  ; wrap left if negative
+                cmp     ax,VIRTUAL_WIDTH - SCREEN_WIDTH ; too far right?
+                jge     wrap_r                  ; wrap right if too big
+                mov     cs:ScrollPosX,ax        ; Stores new scroll-x
+        ; (just like above, for y:)
+        ; Using the ScrollDY variable as delta-y, move the scroll-origin
+        ; in the y direction.  Then, if the visible screen is now
+        ; viewing invalid data, snap the origin to a new point and
+        ; draw any new rows that are necessary.
+do_y_scroll:    mov     ax,cs:ScrollPosY
+                add     ax,cs:ScrollDY          ; ScrollDY is a delta-y
+                jl      wrap_t                  ; wrap top if negative
+                cmp     ax,(VIRTUAL_HEIGHT - SCREEN_HEIGHT) * VIRTUAL_WIDTH
+                jge     wrap_b                  ; wrap bottom if too big
+                mov     cs:ScrollPosY,ax        ; Store the new scroll-y
+                jmp     calculate
+
+        ; To wrap to the right:
+        ; Add a square's width to the origin's upper left corner, and
+        ; subtract the same amount from the scroll origin's upper left
+        ; corner.  This makes no difference on the screen but allows
+        ; us to forget about the leftmost column on the screen (it's
+        ; offscreen now...) so we can take over the right column.
+        ; See any documentation I included for an explanation of the
+EVEN    ; scrolling...
+wrap_r:         add     cs:upper_left,SQUARE_WIDTH / 4
+                sub     ax,SQUARE_WIDTH
+                mov     cs:ScrollPosX,ax
+
+                mov     dx,MapInfo.Wid
+                mov     bp,MapInfo.OffX1
+                inc     bp
+                cmp     bp,dx
+                jb      wrap_r1_ok
+                sub     bp,dx
+wrap_r1_ok:     mov     MapInfo.OffX1,bp
+
+                mov     bp,MapInfo.OffX2
+                inc     bp
+                cmp     bp,dx
+                jb      wrap_r2_ok
+                sub     bp,dx
+wrap_r2_ok:     mov     MapInfo.OffX2,bp
+
+                mov     bp,MapInfo.WrapX
+                dec     bp
+                jnz     wrap_r3_ok
+                add     bp,dx
+wrap_r3_ok:     mov     MapInfo.WrapX,bp
+
+                call    update_right
+                jmp     do_y_scroll     ; Jump back to do Y
+
+EVEN    ; Same for left side
+wrap_l:         sub     cs:upper_left,SQUARE_WIDTH / 4
+                add     ax,SQUARE_WIDTH
+                mov     cs:ScrollPosX,ax
+
+                mov     dx,MapInfo.Wid
+                mov     bp,MapInfo.OffX1
+                dec     bp
+                cmp     bp,dx
+                jb      wrap_l1_ok
+                add     bp,dx
+wrap_l1_ok:     mov     MapInfo.OffX1,bp
+
+                mov     bp,MapInfo.OffX2
+                dec     bp
+                cmp     bp,dx
+                jb      wrap_l2_ok
+                add     bp,dx
+wrap_l2_ok:     mov     MapInfo.OffX2,bp
+
+                mov     bp,MapInfo.WrapX
+                inc     bp
+                cmp     bp,dx
+                jbe     wrap_l3_ok
+                sub     bp,dx
+wrap_l3_ok:     mov     MapInfo.WrapX,bp
+
+                call    update_left
+                jmp     do_y_scroll     ; Jump back to do Y
+
+EVEN    ; Same for bottom
+wrap_b:         add     cs:upper_left,(SQUARE_HEIGHT * VIRTUAL_WIDTH) / 4
+                sub     ax,SQUARE_HEIGHT * VIRTUAL_WIDTH
+                mov     cs:ScrollPosY,ax
+
+                mov     bp,MapInfo.OffY1
+                mov     dx,MapInfo.Extent
+                add     bp,MapInfo.Wid
+                cmp     bp,dx
+                jb      wrap_b1_ok
+                sub     bp,dx
+wrap_b1_ok:     mov     MapInfo.OffY1,bp
+
+                mov     bp,MapInfo.OffY2
+                add     bp,MapInfo.Wid
+                cmp     bp,dx
+                jb      wrap_b2_ok
+                sub     bp,dx
+wrap_b2_ok:     mov     MapInfo.OffY2,bp
+
+                mov     dx,MapInfo.Ht
+                mov     bp,MapInfo.WrapY
+                dec     bp
+                jg      wrap_b3_ok
+                add     bp,dx
+wrap_b3_ok:     mov     MapInfo.WrapY,bp
+
+                call    update_bottom
+                mov     ax,cs:ScrollPosY
+                jmp     calculate       ; Jump down to calc new offsets
+
+EVEN    ; Same for top
+wrap_t:         sub     cs:upper_left,(SQUARE_HEIGHT * VIRTUAL_WIDTH) / 4
+                add     ax,SQUARE_HEIGHT * VIRTUAL_WIDTH
+                mov     cs:ScrollPosY,ax
+
+                mov     bp,MapInfo.OffY1
+                mov     dx,MapInfo.Extent
+                sub     bp,MapInfo.Wid
+                cmp     bp,dx
+                jb      wrap_t1_ok
+                add     bp,dx
+wrap_t1_ok:     mov     MapInfo.OffY1,bp
+
+                mov     bp,MapInfo.OffY2
+                sub     bp,MapInfo.Wid
+                cmp     bp,dx
+                jb      wrap_t2_ok
+                add     bp,dx
+wrap_t2_ok:     mov     MapInfo.OffY2,bp
+
+                mov     bp,MapInfo.WrapY
+                mov     dx,MapInfo.Ht
+                inc     bp
+                cmp     bp,dx
+                jbe     wrap_t3_ok
+                sub     bp,dx
+wrap_t3_ok:     mov     MapInfo.WrapY,bp
+
+                call    update_top
+                mov     ax,cs:ScrollPosY
+                jmp     calculate       ; Jump down to calc new offsets
+
+EVEN
+align_mask_table DB     11h,22h,44h,88h
+calculate:
+        ; Calculate the scroll offset
+        ; AX already = ScrollPosY
+                add     ax,cs:ScrollPosX        ;Now AX = scroll offset
+
+        ; Calculate the plane alignment
+                mov     bl,al
+                and     bx,0003h
+                mov     cs:DrawPage.Alignment,bl
+;               mov     bl,cs:align_mask_table[bx]
+;               mov     cs:DrawPage.AlignmentMask,bl
+
+        ; Now we don't need Scroll Offset on a pixel level any more,
+        ; so shift it to a byte level (/4) and store it away.
+                shr     ax,2
+                mov     cs:DrawPage.ScrollOffset,ax
+
+        ; Calculate the actual upper left corner address
+                mov     si,cs:DrawPage.Address
+                add     si,cs:upper_left
+                mov     cs:DrawPage.UpperLeftAddress,si
+
+        ; And the map offset:
+                mov     bx,MapInfo.WrapX
+                mov     cs:DrawPage.MapPosX,bx
+                mov     di,MapInfo.WrapY
+                mov     cs:DrawPage.MapPosY,di
+
+                mov     cs:DrawPage.Valid,1
+                cmp     cs:BlankPage.Valid,0
+                je      no_catch_up
+
+        ; Lastly, update dirty area (if any) on blank page.
+        ; BP still contains the draw page's mapoffset.
+                sub     bx,cs:BlankPage.MapPosX
+                sub     di,cs:BlankPage.MapPosY
+                jnz     yes_catch_up
+                cmp     bx,0
+                jnz     yes_catch_up
+        ; No catchup necessary -- return.
+no_catch_up:    ret
+
+;; Okay, this stuff is a mess.  I've registerized everything except
+;; for the video data itself.  I'll try to comment it best I can.
+EVEN
+yes_catch_up:
+        ; First, switch into full-copy mode.  This means latching the
+        ; bit mask as coming entirely from the local 32-bit registers
+        ; and then setting the map mask to write to all 4 planes.  This
+        ; is Mode X's greatest advantage, when you can do it!  It
+        ; provides a 2x speedup or so...
+                mov     dx,SC_INDEX     ; Select Sequencer input
+                mov     ax,0F02h
+                out     dx,ax           ; set map mask = all bits
+
+                mov     dx,GC_INDEX
+                mov     ax,ALL_COPY_BITS
+                out     dx,ax
+
+                JKEYNP  kB,isntbp
+isbp:           nop
+isntbp:
+        ; Next, calculate the amount to catch up the top/bottom rows
+        ; If we just wrapped over the edge, it is possible that the
+        ; distance traveled will be as high as MapInfo.Ht - 1.  So,
+        ; in the fashion of signed numbers, if the number is greater
+        ; than MapInfo.Ht / 2, we take it to mean negative.  To convert
+        ; it to signed, we have to shift it into the proper range.  But
+        ; if it's less than MapInfo.Ht / 2, then it's okay as it is.
+                mov     ax,di
+                cmp     ax,0
+                je      y_mod
+
+                mov     cx,MapInfo.Ht
+                cwd             ; DX = -1 or 0 based on AX's sign.
+                and     dx,cx   ; DX = Ht or 0
+                add     ax,dx   ; AX = 0 ... Ht (unsigned)
+
+                mov     di,ax
+                shl     di,1
+                cmp     di,cx
+                jb      y_signed
+                sub     ax,cx
+y_signed:       neg     ax
+
+        ; Find DI MOD MapInfo.Wid, and then convert to it into virtual
+        ; coordinates from map offset coordinates.
+        ; This routine also calculates BP, which will be used as a loop
+        ; counter to determine how many rows to draw on the left/right
+        ; column copy.
+y_mod:          mov     bp,ax
+                cwd
+                add     bp,dx
+                xor     bp,dx
+                shl     bp,3            ; BP = (SQUARE_HEIGHT / 2) * dX
+                mov     di,cs:MultVirtWidth[bp] ; Use multiplication table
+                add     di,dx                   ; to calculate new DI, then
+                xor     di,dx                   ; restore the sign.
+                sub     bp,VIRTUAL_HEIGHT / 2
+        ; Out:  DI = # of pixels traveled,
+        ;       BP = (VIRTUAL_HEIGHT - # of rows) / 2
+
+        ; Change BX (delta-x) to signed from unsigned, store in AX
+                mov     ax,bx
+                mov     cx,MapInfo.Wid
+                cwd
+                and     dx,cx   ; DX = Wid or 0
+                add     ax,dx   ; AX = 0 ... Wid
+
+                mov     bx,ax
+                shl     bx,1
+                cmp     bx,cx
+                jb      x_signed
+                sub     ax,cx
+x_signed:
+
+        ; The following is an optimization which would slow down on
+        ; normal memory, but I believe it will be okay on VGA memory,
+        ; which is so incredibly slow.  Basically, I've replaced all
+        ; "rep movsb"'s with a loop that first calculates "bx = di - si",
+        ; and then loops performing "mov ds:[si],es:[si+bx]".  Why?
+        ; Because of several reasons, none of which I'm sure actually
+        ; help out, but they do make for smaller code.  1)  It means that
+        ; I only have to maintain SI, and "DI" is maintained automatically
+        ; (because DI - SI should remain constant).  2)  Don't have to
+        ; calculate DS.  Not much gain here.  3)  Because I'd already
+        ; unrolled the loops, and the "rep movsb"'s had become instead
+        ; "mov al, ds:[si] / mov es:[di], al / mov al, ds:[si + 1] /
+        ; mov es:[di + 1],al ... etc ... add si, 4 / add di, 4".  In
+        ; other words, I wasn't using MOVSB anyway.  The only advantage
+        ; I can see in MOVSB is that it doesn't have to store the answer
+        ; in AL so it could be slightly faster.  By unrolling the loops,
+        ; I'd already made up for that, I think.  4)  Normally, using
+        ; [SI + BX + 1] would incur a penalty of an additional clock
+        ; cycle (because it has to add two indexs + an offset).  But
+        ; the VGA memory and the '86 CPU can multi-task, and the VGA
+        ; is very slow.  So by the time the VGA is ready to write the
+        ; next byte, the one extra clock cycle has already passed.
+        ;
+        ; Am I right?  Does this make things faster?  I have no idea.
+        ; I haven't bothered to check both ways.  Please let me know
+        ; if I've missed something important...
+        ;
+        ; Here's the calculation of BX.  SI is already set.
+                ; si already = DrawPage.UpperLeftAddress
+                mov     bx,cs:BlankPage.Address
+                sub     bx,cs:DrawPage.Address
+
+        ; Now, converts SI into "1/4" units.  I do all the calculations
+        ; in "1/4" scale and then scale back up, mostly because it saved
+        ; me some instructions elsewhere.
+                shr     si,2
+        ; Stores this value of SI.  This will be restored after doing
+        ; the top/bottom copying.
+                mov     dx,si
+
+        ; Check if it's necessary to catch up the top or bottom.
+catchup_tb:     cmp     di,0
+                je      catchup_tb_end
+                jl      catchup_t
+catchup_b:      ; COPY BOTTOM
+        ; Move SI to point at the bottom of the screen - # of rows
+        ; to update.
+                add     si,((VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4) / 4
+                sub     si,di
+                jmp     copy_tb
+catchup_t:      ; COPY_TOP
+        ; Leave SI, but add to the "pushed" value of SI the number of
+        ; rows that will be drawn.  This prevents overlap between top
+        ; and right/left when moving diagonally.  Also, DI = |DI|
+                neg     di
+                add     dx,di
+
+        ; Now do the actual copying.  Shifts SI back into scale "1",
+        ; then performs an unrolled loop to copy the entire virtual
+        ; width * # of pixel rows.  Since DI is already in "1/4" scale,
+        ; it is only decremented once for each four pixels drawn.
+copy_tb:        shl     si,2
+copy_tb_loop:   mov     cl,es:[si]
+                mov     es:[si+bx],cl
+                mov     cl,es:[si+1]
+                mov     es:[si+bx+1],cl
+                mov     cl,es:[si+2]
+                mov     es:[si+bx+2],cl
+                mov     cl,es:[si+3]
+                mov     es:[si+bx+3],cl
+                add     si,4
+                dec     di
+                jnz     copy_tb_loop
+catchup_tb_end:
+
+        ; Next, check to see if it's necessary to draw the right or
+        ; the left side.
+catchup_rl:     cmp     ax,0
+                je      catchup_rl_end
+                jg      catchup_l
+catchup_r:      ; COPY RIGHT
+        ; Adds to the "pushed" SI the width of the screen, minus
+        ; the number of rows to be drawn.
+                neg     ax
+                add     dx,(VIRTUAL_WIDTH / 4) / 4
+                sub     dx,ax
+catchup_l:      ; COPY LEFT (or nothing)
+
+        ; Does the actual copying.  First pops SI from its stored value
+        ; and shifts it back into scale "1"
+copy_rl:        mov     si,dx
+                shl     si,2
+
+        ; This is a loop over BP -- which has already been set as
+        ; VIRTUAL_HEIGHT - (# of bytes drawn in vertical update)
+        ; Again, this loop is unrolled such that it does two rows @
+        ; 4 bytes each with every iteration.
+        ; This LEA instruction is just a quick MOV DI, SI + 2 *y
+        ; DI is used to push the next value of SI for each iteration
+        ; of the loop.
+copy_rl_loop:   lea     di,[si + 2*(VIRTUAL_WIDTH/4)]
+                mov     cx,ax
+copy_rl_col:    mov     dl,es:[si]
+                mov     es:[si+bx],dl
+                mov     dl,es:[si+1]
+                mov     es:[si+bx+1],dl
+                mov     dl,es:[si+2]
+                mov     es:[si+bx+2],dl
+                mov     dl,es:[si+3]
+                mov     es:[si+bx+3],dl
+                mov     dl,es:[si+VIRTUAL_WIDTH/4]
+                mov     es:[si+bx+VIRTUAL_WIDTH/4],dl
+                mov     dl,es:[si+VIRTUAL_WIDTH/4+1]
+                mov     es:[si+bx+VIRTUAL_WIDTH/4+1],dl
+                mov     dl,es:[si+VIRTUAL_WIDTH/4+2]
+                mov     es:[si+bx+VIRTUAL_WIDTH/4+2],dl
+                mov     dl,es:[si+VIRTUAL_WIDTH/4+3]
+                mov     es:[si+bx+VIRTUAL_WIDTH/4+3],dl
+                add     si,4
+                dec     cx
+                jnz     copy_rl_col
+                mov     si,di           ; SI = pop (SI + VIRTUAL_WIDTH/4)
+                inc     bp              ; (BP is negative, so INC it)
+                jnz     copy_rl_loop
+catchup_rl_end:
+
+        ; Switch back to all-draw mode.
+                mov     dx,GC_INDEX
+                mov     ax,ALL_DRAW_BITS
+                out     dx,ax
+                ret
+Scroll          ENDP
+
\ No newline at end of file
diff --git a/16/scrasm/SCROLL.LNK b/16/scrasm/SCROLL.LNK
new file mode 100644
index 00000000..275ba401
--- /dev/null
+++ b/16/scrasm/SCROLL.LNK
@@ -0,0 +1,2 @@
+main.obj+lztimer.obj  
+scroll; 
diff --git a/16/scrasm/SCROLL.MAP b/16/scrasm/SCROLL.MAP
new file mode 100644
index 00000000..0b18f2bf
Binary files /dev/null and b/16/scrasm/SCROLL.MAP differ
diff --git a/16/scrasm/SCROLL.PAL b/16/scrasm/SCROLL.PAL
new file mode 100644
index 00000000..5dee1969
Binary files /dev/null and b/16/scrasm/SCROLL.PAL differ
diff --git a/16/scrasm/SCROLL.TIL b/16/scrasm/SCROLL.TIL
new file mode 100644
index 00000000..3b47a8e1
Binary files /dev/null and b/16/scrasm/SCROLL.TIL differ
diff --git a/16/scrasm/SPRITE.INC b/16/scrasm/SPRITE.INC
new file mode 100644
index 00000000..9c3f2e48
--- /dev/null
+++ b/16/scrasm/SPRITE.INC
@@ -0,0 +1,280 @@
+; SPRITE routines
+MAX_SPRITE      EQU     100
+
+RECTANGLE STRUCT 2,NONUNIQUE
+                X       WORD    0
+                Y       WORD    0
+                Wid4    BYTE    0
+                Ht      BYTE    0
+                Color   BYTE    0
+                Next    WORD    0
+        ; DrawMe is used to not bother with sprites that you know
+        ; are contained totally within another, allowing animated
+        ; eyes, etc to be stored in separate sprites.  These will be
+        ; drawn to the local buffer but skipped when copying to the
+        ; screen, so if they are not TOTALLY contained, they will
+        ; just get clipped away.
+                DrawMe  BYTE    1       ; default, yes draw me.
+        ; (Storage from this point on ... NEVER provide anything but
+        ; default for these values!)
+                address_virt    WORD    0
+                address_buf     WORD    0
+                next_line_virt  WORD    0
+                next_line_buf   WORD    0
+RECTANGLE ENDS
+
+SPRITE  STRUCT 2, NONUNIQUE
+        RECTANGLE       <>      ; Contains rectangle info
+SPRITE  ENDS
+
+EVEN
+rect5   SPRITE  <<40 ,60 , 2,8, C_TRANSPARENT, 0           , 0>>
+rect4   SPRITE  <<80 ,30 , 2,8, C_TRANSPARENT, offset rect5, 0>>
+rect3   SPRITE  <<120,60 , 2,8, C_TRANSPARENT, offset rect4, 0>>
+rect2   SPRITE  <<55 ,100, 2,8, C_TRANSPARENT, offset rect3, 0>>
+rect1   SPRITE  <<105,100, 2,8, C_TRANSPARENT, offset rect2, 0>>
+
+rect6   SPRITE  <<36 ,56 , 4,16, C_BLUE, offset rect1, 1>>
+rect7   SPRITE  <<76 ,26 , 4,16, C_BLUE, offset rect6, 1>>
+rect8   SPRITE  <<116,56 , 4,16, C_BLUE, offset rect7, 1>>
+rect9   SPRITE  <<51 ,96 , 4,16, C_BLUE, offset rect8, 1>>
+rect10  SPRITE  <<101,96 , 4,16, C_BLUE, offset rect9, 1>>
+
+;; Simply adding in these 5 rectangles (~20000 pixels for both
+;; drawing and erasing) really slows things down!  That's why
+;; it's important to optimize the sprite drawing routines!
+rect11  SPRITE  <<35 ,55 ,14,36, C_GREEN, offset rect10, 1>>
+rect12  SPRITE  <<75 ,25 ,14,36, C_GREEN, offset rect11, 1>>
+rect13  SPRITE  <<115,55 ,14,36, C_GREEN, offset rect12, 1>>
+rect14  SPRITE  <<50 ,95 ,14,36, C_GREEN, offset rect13, 1>>
+rect15  SPRITE  <<100,95 ,14,36, C_GREEN, offset rect14, 1>>
+
+FIRST_SPRITE    EQU     rect10
+
+EVEN
+AnimateSprites  PROC    near
+                ret
+        ; Blank out the draw page, by copying from the blank page
+        ; to the draw page all rectangles which had changed.  The
+        ; blank page must always be entirely blank if this is going
+        ; to work!
+                mov     di,cs:DrawPage.UpperLeftAddress
+                add     di,cs:DrawPage.ScrollOffset
+                mov     si,cs:BlankPage.UpperLeftAddress
+                add     si,cs:BlankPage.ScrollOffset
+                mov     bp,cs:BlankPage.Rectangles
+                call    CopyRectangles
+
+        ; Now draw the sprites.  Uses a temporary buffer to ensure
+        ; minimal drawing to the screen, but that's not really necessary,
+        ; if memory is at a minimum.  It's just faster...
+                mov     bp,offset FIRST_SPRITE
+                mov     cs:DrawPage.Rectangles,bp
+                call    do_fill_buffer
+                mov     di,cs:DrawPage.UpperLeftAddress
+                add     di,cs:DrawPage.ScrollOffset
+                mov     bh,cs:DrawPage.AlignmentMask
+                mov     bp,offset FIRST_SPRITE
+                jmp     smart_rects     ; "call"
+AnimateSprites  ENDP
+
+smart_dest      DW      0
+out_di          DW      0
+out_si          DW      0
+
+EVEN
+smart_rects     PROC    near
+                add     di,cs:DrawPage.Address
+                mov     ds,cs:segBuffer
+                mov     es,cs:segVideo
+                mov     dx,3c4h
+                mov     al,02h
+                out     dx,al
+                inc     dx
+                mov     cs:smart_dest,di
+
+        ; === Beginning of loop through rectangles! ===
+sp_nextrect:
+                cmp     cs:[bp].RECTANGLE.DrawMe,1
+                jne     sp_next
+        ; Draw this rectangle from the buffer to screen memory.
+        ; Calculate the output address.
+                mov     si,cs:[bp].RECTANGLE.address_buf
+                mov     di,cs:[bp].RECTANGLE.address_virt
+                add     di,cs:smart_dest
+
+        ; Loop over 4 planes
+                mov     bl,4
+sp_plane_loop:  mov     al,bh
+                out     dx,al
+
+                mov     cs:out_di,di
+                mov     cs:out_si,si
+
+        ; Loop over height
+                mov     ch,cs:[bp].RECTANGLE.Ht
+sp_row_loop:
+
+        ; Loop over width of rectangle (Wid4 is actually width/4)
+                mov     cl,cs:[bp].RECTANGLE.Wid4
+sp_col_loop:
+
+        ; Read a byte from the buffer
+        ; Is it transparent (no-modify)?  If so, just jump over the draw
+                mov     al,byte ptr ds:[si]
+                cmp     al,C_TRANSPARENT
+                je      sp_next_pixel
+        ; Otherwise, draw it on the spreen, and mark it transparent
+        ; so that it won't be drawn again.
+                mov     byte ptr es:[di],al
+                mov     byte ptr ds:[si],C_TRANSPARENT
+
+        ; Skip to next 4-byte group (next column that can be drawn in
+        ; Mode X)  Also increment spreen draw address, but only by 1
+        ; because ModeX is 4 pixels per byte
+sp_next_pixel:
+                add     si,4
+                inc     di
+
+                dec     cl
+                jnz     sp_col_loop
+
+        ; End of row.  Skip space to get to left edge of next row down
+        ;  Skip SI = (SCREEN_WIDTH - #bytesdrawn)
+        ; Only draw up to height of rectangle
+                add     si,cs:[bp].RECTANGLE.next_line_buf
+                add     di,cs:[bp].RECTANGLE.next_line_virt
+                dec     ch
+                jnz     sp_row_loop
+
+                mov     di,cs:out_di
+                mov     si,cs:out_si
+                inc     si
+                rol     bh,1
+                adc     di,0
+
+                dec     bl
+                jnz     sp_plane_loop
+
+        ; Follow chain to next rectangle
+sp_next:        mov     bp,cs:[bp].RECTANGLE.Next
+                cmp     bp,0
+                jne     sp_nextrect
+        ; All done
+sp_end:         ret
+smart_rects     ENDP
+
+; BP -> first rectangle.  Follows BP->next, stops when BP = 0
+EVEN
+do_fill_buffer  PROC    near
+                mov     es,cs:segBuffer
+
+                cmp     bp,0
+                je      fill_end
+fill_loop:
+
+                mov     bx,cs:[bp].RECTANGLE.Y
+                shl     bx,1                    ; BX = word index y
+                mov     di,cs:MultBufWidth[bx]  ; DI = SW * y
+                mov     cx,cs:[bp].RECTANGLE.X  ; CX = x
+                add     di,cx                   ; DI = (SW * y) + x
+                mov     cs:[bp].RECTANGLE.address_buf,di ; (DI used later)
+
+                mov     ax,cs:MultVirtWidth[bx] ; AX = (VW/4) * y
+                shr     cx,2                    ; CX = (x / 4)
+                add     ax,cx                   ; AX = (VW * y + x)/4
+                mov     cs:[bp].RECTANGLE.address_virt,ax
+
+                mov     dx,(VIRTUAL_WIDTH / 4)
+                sub     dl,cs:[bp].RECTANGLE.Wid4 ; DX = (VW - w) / 4
+                mov     cs:[bp].RECTANGLE.next_line_virt,dx
+
+                mov     dx,(SCREEN_WIDTH / 4)
+                sub     dl,cs:[bp].RECTANGLE.Wid4 ; DX = (SW - w) / 4
+                shl     dx,2                      ; DX = SW - w
+                mov     cs:[bp].RECTANGLE.next_line_buf,dx
+
+                mov     ah,cs:[bp].RECTANGLE.Color
+                mov     al,cs:[bp].RECTANGLE.Color
+
+                mov     ch,cs:[bp].RECTANGLE.Ht
+fill_row_loop:  mov     cl,cs:[bp].RECTANGLE.Wid4
+fill_col_loop:  mov     es:[di],ax
+                mov     es:[di+2],ax
+                add     di,4
+                dec     cl
+                jnz     fill_col_loop
+                add     di,dx
+                dec     ch
+                jnz     fill_row_loop
+
+                mov     bp,cs:[bp].RECTANGLE.Next
+                cmp     bp,0
+                jne     fill_loop
+fill_end:       ret
+do_fill_buffer  ENDP
+
+EVEN
+CopyRectangles  PROC    near
+                mov     ax,cs:segVideo
+                mov     ds,ax
+                mov     es,ax
+
+        ; Calculate the difference between the source and destination
+        ; pages.  Since in a movsb loop the two would remain a constant
+        ; distance apart, we can just calculate a displacement and then
+        ; not have to worry about SI; instead use DI and DI+BX, thanks
+        ; to the thoughtful x86 ALU!
+                mov     bx,di
+                sub     bx,si
+
+                mov     dx,GC_INDEX
+                mov     ax,ALL_COPY_BITS
+                out     dx,ax
+
+                mov     dx,SC_INDEX
+                mov     ax,0F02h
+                out     dx,ax
+                mov     si,di   ;store destination
+
+        ; === Beginning of loop through rectangles! ===
+cr_nextrect:    cmp     cs:[bp].RECTANGLE.DrawMe,1
+                jne     cr_next
+        ; Draw this rectangle from the buffer to screen memory.
+        ; Calculate the output address.
+                mov     di,cs:[bp].RECTANGLE.address_virt
+                mov     dx,cs:[bp].RECTANGLE.next_line_virt
+                add     di,si
+
+        ; Loop over height
+                mov     ch,cs:[bp].RECTANGLE.Ht
+cr_row_loop:
+
+        ; Loop over width of rectangle (Wid4 is actually width/4)
+                mov     cl,cs:[bp].RECTANGLE.Wid4
+cr_col_loop:    mov     al,ds:[di + bx]
+                stosb
+                dec     cl
+                jnz     cr_col_loop
+                mov     al,ds:[di + bx]
+                mov     es:[di],al
+
+        ; End of row.  Skip space to get to left edge of next row down
+        ; Only draw up to height of rectangle
+                add     di,dx
+                dec     ch
+                jnz     cr_row_loop
+
+        ; Follow chain to next rectangle
+cr_next:        mov     bp,cs:[bp].RECTANGLE.Next
+                cmp     bp,0
+                jne     cr_nextrect
+        ; All done
+cr_end:
+                mov     dx,GC_INDEX
+                mov     ax,ALL_DRAW_BITS
+                out     dx,ax
+                ret
+CopyRectangles  ENDP
+
+
\ No newline at end of file