+;; Global variables used here ...\r
+EVEN\r
+ScrollPosX dw 0 ; Scroll origin, upper-left X\r
+ScrollPosY dw 0 ; Scroll origin, upper-left Y\r
+ScrollDX dw 0 ; Amount to change scroll origin, X\r
+ScrollDY dw 0 ; Amount to change scroll origin, Y\r
+\r
+;; SCROLL:\r
+;; This routine takes care of all of the scrolling, however it calls\r
+;; outside drawing routines to update the screen. Scrollx and\r
+;; Scrolly determine the amount to scroll by.\r
+;; Note that this does only RELATIVE scrolling, not absolute scrolling.\r
+;; Scroll saves time by updating only up to the one row or column of\r
+;; tiles which have come into view due to a change in scroll offset.\r
+;; In other words, it's not good for "jumping" to a particular point,\r
+;; although this effect can be accomplished in other ways -- the draw_full\r
+;; routine is available to draw a full screen again.\r
+;; Sometimes this means that you will have to calculate values ahead of\r
+;; time, for instance if you wish the scrolling to keep a certain sprite\r
+;; in the center of the screen. In this case, just set ScrollDX and\r
+;; ScrollDY to the delta-x and delta-y of the sprite.\r
+;; * Newly added:\r
+;; Since there are three pages, it is necessary to keep each one of them\r
+;; up to date with each scroll. Recently, I was doing some fast (8+\r
+;; pixels per frame) scrolling and noticed that there was a significant\r
+;; pause when the screen snapped to a new origin. (The origin is always\r
+;; at a square's corner, even though it may not look like it because it\r
+;; disguises things by smooth-panning the hardware.) Every time it\r
+;; scrolled, it was drawing the new information and copying it to the\r
+;; two other planes. I've now distributed the load over successive\r
+;; pages, in other words it doesn't copy the new info all at once, but\r
+;; over several frames. This really smoothed out the scrolling so that\r
+;; while there are still some jumps, they only occur very infrequently\r
+;; and then only at 15 or 16 pixel/frame scroll rates...) That's the\r
+;; "catchup" code at the bottom, and that's why it's more complex than\r
+;; it maybe could be...\r
+EVEN\r
+Scroll PROC near\r
+ ; Using the ScrollDX variable as delta-x, move the scroll-origin\r
+ ; in the x direction. Then, if the visible screen is now\r
+ ; viewing invalid data, snap the origin to a new point and\r
+ ; draw any new columns that are necessary.\r
+do_x_scroll: mov ax,cs:ScrollPosX\r
+ add ax,cs:ScrollDX ; ScrollDX is a delta-x\r
+ jl wrap_l ; wrap left if negative\r
+ cmp ax,VIRTUAL_WIDTH - SCREEN_WIDTH ; too far right?\r
+ jge wrap_r ; wrap right if too big\r
+ mov cs:ScrollPosX,ax ; Stores new scroll-x\r
+ ; (just like above, for y:)\r
+ ; Using the ScrollDY variable as delta-y, move the scroll-origin\r
+ ; in the y direction. Then, if the visible screen is now\r
+ ; viewing invalid data, snap the origin to a new point and\r
+ ; draw any new rows that are necessary.\r
+do_y_scroll: mov ax,cs:ScrollPosY\r
+ add ax,cs:ScrollDY ; ScrollDY is a delta-y\r
+ jl wrap_t ; wrap top if negative\r
+ cmp ax,(VIRTUAL_HEIGHT - SCREEN_HEIGHT) * VIRTUAL_WIDTH\r
+ jge wrap_b ; wrap bottom if too big\r
+ mov cs:ScrollPosY,ax ; Store the new scroll-y\r
+ jmp calculate\r
+\r
+ ; To wrap to the right:\r
+ ; Add a square's width to the origin's upper left corner, and\r
+ ; subtract the same amount from the scroll origin's upper left\r
+ ; corner. This makes no difference on the screen but allows\r
+ ; us to forget about the leftmost column on the screen (it's\r
+ ; offscreen now...) so we can take over the right column.\r
+ ; See any documentation I included for an explanation of the\r
+EVEN ; scrolling...\r
+wrap_r: add cs:upper_left,SQUARE_WIDTH / 4\r
+ sub ax,SQUARE_WIDTH\r
+ mov cs:ScrollPosX,ax\r
+\r
+ mov dx,MapInfo.Wid\r
+ mov bp,MapInfo.OffX1\r
+ inc bp\r
+ cmp bp,dx\r
+ jb wrap_r1_ok\r
+ sub bp,dx\r
+wrap_r1_ok: mov MapInfo.OffX1,bp\r
+\r
+ mov bp,MapInfo.OffX2\r
+ inc bp\r
+ cmp bp,dx\r
+ jb wrap_r2_ok\r
+ sub bp,dx\r
+wrap_r2_ok: mov MapInfo.OffX2,bp\r
+\r
+ mov bp,MapInfo.WrapX\r
+ dec bp\r
+ jnz wrap_r3_ok\r
+ add bp,dx\r
+wrap_r3_ok: mov MapInfo.WrapX,bp\r
+\r
+ call update_right\r
+ jmp do_y_scroll ; Jump back to do Y\r
+\r
+EVEN ; Same for left side\r
+wrap_l: sub cs:upper_left,SQUARE_WIDTH / 4\r
+ add ax,SQUARE_WIDTH\r
+ mov cs:ScrollPosX,ax\r
+\r
+ mov dx,MapInfo.Wid\r
+ mov bp,MapInfo.OffX1\r
+ dec bp\r
+ cmp bp,dx\r
+ jb wrap_l1_ok\r
+ add bp,dx\r
+wrap_l1_ok: mov MapInfo.OffX1,bp\r
+\r
+ mov bp,MapInfo.OffX2\r
+ dec bp\r
+ cmp bp,dx\r
+ jb wrap_l2_ok\r
+ add bp,dx\r
+wrap_l2_ok: mov MapInfo.OffX2,bp\r
+\r
+ mov bp,MapInfo.WrapX\r
+ inc bp\r
+ cmp bp,dx\r
+ jbe wrap_l3_ok\r
+ sub bp,dx\r
+wrap_l3_ok: mov MapInfo.WrapX,bp\r
+\r
+ call update_left\r
+ jmp do_y_scroll ; Jump back to do Y\r
+\r
+EVEN ; Same for bottom\r
+wrap_b: add cs:upper_left,(SQUARE_HEIGHT * VIRTUAL_WIDTH) / 4\r
+ sub ax,SQUARE_HEIGHT * VIRTUAL_WIDTH\r
+ mov cs:ScrollPosY,ax\r
+\r
+ mov bp,MapInfo.OffY1\r
+ mov dx,MapInfo.Extent\r
+ add bp,MapInfo.Wid\r
+ cmp bp,dx\r
+ jb wrap_b1_ok\r
+ sub bp,dx\r
+wrap_b1_ok: mov MapInfo.OffY1,bp\r
+\r
+ mov bp,MapInfo.OffY2\r
+ add bp,MapInfo.Wid\r
+ cmp bp,dx\r
+ jb wrap_b2_ok\r
+ sub bp,dx\r
+wrap_b2_ok: mov MapInfo.OffY2,bp\r
+\r
+ mov dx,MapInfo.Ht\r
+ mov bp,MapInfo.WrapY\r
+ dec bp\r
+ jg wrap_b3_ok\r
+ add bp,dx\r
+wrap_b3_ok: mov MapInfo.WrapY,bp\r
+\r
+ call update_bottom\r
+ mov ax,cs:ScrollPosY\r
+ jmp calculate ; Jump down to calc new offsets\r
+\r
+EVEN ; Same for top\r
+wrap_t: sub cs:upper_left,(SQUARE_HEIGHT * VIRTUAL_WIDTH) / 4\r
+ add ax,SQUARE_HEIGHT * VIRTUAL_WIDTH\r
+ mov cs:ScrollPosY,ax\r
+\r
+ mov bp,MapInfo.OffY1\r
+ mov dx,MapInfo.Extent\r
+ sub bp,MapInfo.Wid\r
+ cmp bp,dx\r
+ jb wrap_t1_ok\r
+ add bp,dx\r
+wrap_t1_ok: mov MapInfo.OffY1,bp\r
+\r
+ mov bp,MapInfo.OffY2\r
+ sub bp,MapInfo.Wid\r
+ cmp bp,dx\r
+ jb wrap_t2_ok\r
+ add bp,dx\r
+wrap_t2_ok: mov MapInfo.OffY2,bp\r
+\r
+ mov bp,MapInfo.WrapY\r
+ mov dx,MapInfo.Ht\r
+ inc bp\r
+ cmp bp,dx\r
+ jbe wrap_t3_ok\r
+ sub bp,dx\r
+wrap_t3_ok: mov MapInfo.WrapY,bp\r
+\r
+ call update_top\r
+ mov ax,cs:ScrollPosY\r
+ jmp calculate ; Jump down to calc new offsets\r
+\r
+EVEN\r
+align_mask_table DB 11h,22h,44h,88h\r
+calculate:\r
+ ; Calculate the scroll offset\r
+ ; AX already = ScrollPosY\r
+ add ax,cs:ScrollPosX ;Now AX = scroll offset\r
+\r
+ ; Calculate the plane alignment\r
+ mov bl,al\r
+ and bx,0003h\r
+ mov cs:DrawPage.Alignment,bl\r
+; mov bl,cs:align_mask_table[bx]\r
+; mov cs:DrawPage.AlignmentMask,bl\r
+\r
+ ; Now we don't need Scroll Offset on a pixel level any more,\r
+ ; so shift it to a byte level (/4) and store it away.\r
+ shr ax,2\r
+ mov cs:DrawPage.ScrollOffset,ax\r
+\r
+ ; Calculate the actual upper left corner address\r
+ mov si,cs:DrawPage.Address\r
+ add si,cs:upper_left\r
+ mov cs:DrawPage.UpperLeftAddress,si\r
+\r
+ ; And the map offset:\r
+ mov bx,MapInfo.WrapX\r
+ mov cs:DrawPage.MapPosX,bx\r
+ mov di,MapInfo.WrapY\r
+ mov cs:DrawPage.MapPosY,di\r
+\r
+ mov cs:DrawPage.Valid,1\r
+ cmp cs:BlankPage.Valid,0\r
+ je no_catch_up\r
+\r
+ ; Lastly, update dirty area (if any) on blank page.\r
+ ; BP still contains the draw page's mapoffset.\r
+ sub bx,cs:BlankPage.MapPosX\r
+ sub di,cs:BlankPage.MapPosY\r
+ jnz yes_catch_up\r
+ cmp bx,0\r
+ jnz yes_catch_up\r
+ ; No catchup necessary -- return.\r
+no_catch_up: ret\r
+\r
+;; Okay, this stuff is a mess. I've registerized everything except\r
+;; for the video data itself. I'll try to comment it best I can.\r
+EVEN\r
+yes_catch_up:\r
+ ; First, switch into full-copy mode. This means latching the\r
+ ; bit mask as coming entirely from the local 32-bit registers\r
+ ; and then setting the map mask to write to all 4 planes. This\r
+ ; is Mode X's greatest advantage, when you can do it! It\r
+ ; provides a 2x speedup or so...\r
+ mov dx,SC_INDEX ; Select Sequencer input\r
+ mov ax,0F02h\r
+ out dx,ax ; set map mask = all bits\r
+\r
+ mov dx,GC_INDEX\r
+ mov ax,ALL_COPY_BITS\r
+ out dx,ax\r
+\r
+ JKEYNP kB,isntbp\r
+isbp: nop\r
+isntbp:\r
+ ; Next, calculate the amount to catch up the top/bottom rows\r
+ ; If we just wrapped over the edge, it is possible that the\r
+ ; distance traveled will be as high as MapInfo.Ht - 1. So,\r
+ ; in the fashion of signed numbers, if the number is greater\r
+ ; than MapInfo.Ht / 2, we take it to mean negative. To convert\r
+ ; it to signed, we have to shift it into the proper range. But\r
+ ; if it's less than MapInfo.Ht / 2, then it's okay as it is.\r
+ mov ax,di\r
+ cmp ax,0\r
+ je y_mod\r
+\r
+ mov cx,MapInfo.Ht\r
+ cwd ; DX = -1 or 0 based on AX's sign.\r
+ and dx,cx ; DX = Ht or 0\r
+ add ax,dx ; AX = 0 ... Ht (unsigned)\r
+\r
+ mov di,ax\r
+ shl di,1\r
+ cmp di,cx\r
+ jb y_signed\r
+ sub ax,cx\r
+y_signed: neg ax\r
+\r
+ ; Find DI MOD MapInfo.Wid, and then convert to it into virtual\r
+ ; coordinates from map offset coordinates.\r
+ ; This routine also calculates BP, which will be used as a loop\r
+ ; counter to determine how many rows to draw on the left/right\r
+ ; column copy.\r
+y_mod: mov bp,ax\r
+ cwd\r
+ add bp,dx\r
+ xor bp,dx\r
+ shl bp,3 ; BP = (SQUARE_HEIGHT / 2) * dX\r
+ mov di,cs:MultVirtWidth[bp] ; Use multiplication table\r
+ add di,dx ; to calculate new DI, then\r
+ xor di,dx ; restore the sign.\r
+ sub bp,VIRTUAL_HEIGHT / 2\r
+ ; Out: DI = # of pixels traveled,\r
+ ; BP = (VIRTUAL_HEIGHT - # of rows) / 2\r
+\r
+ ; Change BX (delta-x) to signed from unsigned, store in AX\r
+ mov ax,bx\r
+ mov cx,MapInfo.Wid\r
+ cwd\r
+ and dx,cx ; DX = Wid or 0\r
+ add ax,dx ; AX = 0 ... Wid\r
+\r
+ mov bx,ax\r
+ shl bx,1\r
+ cmp bx,cx\r
+ jb x_signed\r
+ sub ax,cx\r
+x_signed:\r
+\r
+ ; The following is an optimization which would slow down on\r
+ ; normal memory, but I believe it will be okay on VGA memory,\r
+ ; which is so incredibly slow. Basically, I've replaced all\r
+ ; "rep movsb"'s with a loop that first calculates "bx = di - si",\r
+ ; and then loops performing "mov ds:[si],es:[si+bx]". Why?\r
+ ; Because of several reasons, none of which I'm sure actually\r
+ ; help out, but they do make for smaller code. 1) It means that\r
+ ; I only have to maintain SI, and "DI" is maintained automatically\r
+ ; (because DI - SI should remain constant). 2) Don't have to\r
+ ; calculate DS. Not much gain here. 3) Because I'd already\r
+ ; unrolled the loops, and the "rep movsb"'s had become instead\r
+ ; "mov al, ds:[si] / mov es:[di], al / mov al, ds:[si + 1] /\r
+ ; mov es:[di + 1],al ... etc ... add si, 4 / add di, 4". In\r
+ ; other words, I wasn't using MOVSB anyway. The only advantage\r
+ ; I can see in MOVSB is that it doesn't have to store the answer\r
+ ; in AL so it could be slightly faster. By unrolling the loops,\r
+ ; I'd already made up for that, I think. 4) Normally, using\r
+ ; [SI + BX + 1] would incur a penalty of an additional clock\r
+ ; cycle (because it has to add two indexs + an offset). But\r
+ ; the VGA memory and the '86 CPU can multi-task, and the VGA\r
+ ; is very slow. So by the time the VGA is ready to write the\r
+ ; next byte, the one extra clock cycle has already passed.\r
+ ;\r
+ ; Am I right? Does this make things faster? I have no idea.\r
+ ; I haven't bothered to check both ways. Please let me know\r
+ ; if I've missed something important...\r
+ ;\r
+ ; Here's the calculation of BX. SI is already set.\r
+ ; si already = DrawPage.UpperLeftAddress\r
+ mov bx,cs:BlankPage.Address\r
+ sub bx,cs:DrawPage.Address\r
+\r
+ ; Now, converts SI into "1/4" units. I do all the calculations\r
+ ; in "1/4" scale and then scale back up, mostly because it saved\r
+ ; me some instructions elsewhere.\r
+ shr si,2\r
+ ; Stores this value of SI. This will be restored after doing\r
+ ; the top/bottom copying.\r
+ mov dx,si\r
+\r
+ ; Check if it's necessary to catch up the top or bottom.\r
+catchup_tb: cmp di,0\r
+ je catchup_tb_end\r
+ jl catchup_t\r
+catchup_b: ; COPY BOTTOM\r
+ ; Move SI to point at the bottom of the screen - # of rows\r
+ ; to update.\r
+ add si,((VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4) / 4\r
+ sub si,di\r
+ jmp copy_tb\r
+catchup_t: ; COPY_TOP\r
+ ; Leave SI, but add to the "pushed" value of SI the number of\r
+ ; rows that will be drawn. This prevents overlap between top\r
+ ; and right/left when moving diagonally. Also, DI = |DI|\r
+ neg di\r
+ add dx,di\r
+\r
+ ; Now do the actual copying. Shifts SI back into scale "1",\r
+ ; then performs an unrolled loop to copy the entire virtual\r
+ ; width * # of pixel rows. Since DI is already in "1/4" scale,\r
+ ; it is only decremented once for each four pixels drawn.\r
+copy_tb: shl si,2\r
+copy_tb_loop: mov cl,es:[si]\r
+ mov es:[si+bx],cl\r
+ mov cl,es:[si+1]\r
+ mov es:[si+bx+1],cl\r
+ mov cl,es:[si+2]\r
+ mov es:[si+bx+2],cl\r
+ mov cl,es:[si+3]\r
+ mov es:[si+bx+3],cl\r
+ add si,4\r
+ dec di\r
+ jnz copy_tb_loop\r
+catchup_tb_end:\r
+\r
+ ; Next, check to see if it's necessary to draw the right or\r
+ ; the left side.\r
+catchup_rl: cmp ax,0\r
+ je catchup_rl_end\r
+ jg catchup_l\r
+catchup_r: ; COPY RIGHT\r
+ ; Adds to the "pushed" SI the width of the screen, minus\r
+ ; the number of rows to be drawn.\r
+ neg ax\r
+ add dx,(VIRTUAL_WIDTH / 4) / 4\r
+ sub dx,ax\r
+catchup_l: ; COPY LEFT (or nothing)\r
+\r
+ ; Does the actual copying. First pops SI from its stored value\r
+ ; and shifts it back into scale "1"\r
+copy_rl: mov si,dx\r
+ shl si,2\r
+\r
+ ; This is a loop over BP -- which has already been set as\r
+ ; VIRTUAL_HEIGHT - (# of bytes drawn in vertical update)\r
+ ; Again, this loop is unrolled such that it does two rows @\r
+ ; 4 bytes each with every iteration.\r
+ ; This LEA instruction is just a quick MOV DI, SI + 2 *y\r
+ ; DI is used to push the next value of SI for each iteration\r
+ ; of the loop.\r
+copy_rl_loop: lea di,[si + 2*(VIRTUAL_WIDTH/4)]\r
+ mov cx,ax\r
+copy_rl_col: mov dl,es:[si]\r
+ mov es:[si+bx],dl\r
+ mov dl,es:[si+1]\r
+ mov es:[si+bx+1],dl\r
+ mov dl,es:[si+2]\r
+ mov es:[si+bx+2],dl\r
+ mov dl,es:[si+3]\r
+ mov es:[si+bx+3],dl\r
+ mov dl,es:[si+VIRTUAL_WIDTH/4]\r
+ mov es:[si+bx+VIRTUAL_WIDTH/4],dl\r
+ mov dl,es:[si+VIRTUAL_WIDTH/4+1]\r
+ mov es:[si+bx+VIRTUAL_WIDTH/4+1],dl\r
+ mov dl,es:[si+VIRTUAL_WIDTH/4+2]\r
+ mov es:[si+bx+VIRTUAL_WIDTH/4+2],dl\r
+ mov dl,es:[si+VIRTUAL_WIDTH/4+3]\r
+ mov es:[si+bx+VIRTUAL_WIDTH/4+3],dl\r
+ add si,4\r
+ dec cx\r
+ jnz copy_rl_col\r
+ mov si,di ; SI = pop (SI + VIRTUAL_WIDTH/4)\r
+ inc bp ; (BP is negative, so INC it)\r
+ jnz copy_rl_loop\r
+catchup_rl_end:\r
+\r
+ ; Switch back to all-draw mode.\r
+ mov dx,GC_INDEX\r
+ mov ax,ALL_DRAW_BITS\r
+ out dx,ax\r
+ ret\r
+Scroll ENDP\r
+\1a
\ No newline at end of file