1 ;; Global variables used here ...
\r
3 ScrollPosX dw 0 ; Scroll origin, upper-left X
\r
4 ScrollPosY dw 0 ; Scroll origin, upper-left Y
\r
5 ScrollDX dw 0 ; Amount to change scroll origin, X
\r
6 ScrollDY dw 0 ; Amount to change scroll origin, Y
\r
9 ;; This routine takes care of all of the scrolling, however it calls
\r
10 ;; outside drawing routines to update the screen. Scrollx and
\r
11 ;; Scrolly determine the amount to scroll by.
\r
12 ;; Note that this does only RELATIVE scrolling, not absolute scrolling.
\r
13 ;; Scroll saves time by updating only up to the one row or column of
\r
14 ;; tiles which have come into view due to a change in scroll offset.
\r
15 ;; In other words, it's not good for "jumping" to a particular point,
\r
16 ;; although this effect can be accomplished in other ways -- the draw_full
\r
17 ;; routine is available to draw a full screen again.
\r
18 ;; Sometimes this means that you will have to calculate values ahead of
\r
19 ;; time, for instance if you wish the scrolling to keep a certain sprite
\r
20 ;; in the center of the screen. In this case, just set ScrollDX and
\r
21 ;; ScrollDY to the delta-x and delta-y of the sprite.
\r
23 ;; Since there are three pages, it is necessary to keep each one of them
\r
24 ;; up to date with each scroll. Recently, I was doing some fast (8+
\r
25 ;; pixels per frame) scrolling and noticed that there was a significant
\r
26 ;; pause when the screen snapped to a new origin. (The origin is always
\r
27 ;; at a square's corner, even though it may not look like it because it
\r
28 ;; disguises things by smooth-panning the hardware.) Every time it
\r
29 ;; scrolled, it was drawing the new information and copying it to the
\r
30 ;; two other planes. I've now distributed the load over successive
\r
31 ;; pages, in other words it doesn't copy the new info all at once, but
\r
32 ;; over several frames. This really smoothed out the scrolling so that
\r
33 ;; while there are still some jumps, they only occur very infrequently
\r
34 ;; and then only at 15 or 16 pixel/frame scroll rates...) That's the
\r
35 ;; "catchup" code at the bottom, and that's why it's more complex than
\r
36 ;; it maybe could be...
\r
39 ; Using the ScrollDX variable as delta-x, move the scroll-origin
\r
40 ; in the x direction. Then, if the visible screen is now
\r
41 ; viewing invalid data, snap the origin to a new point and
\r
42 ; draw any new columns that are necessary.
\r
43 do_x_scroll: mov ax,cs:ScrollPosX
\r
44 add ax,cs:ScrollDX ; ScrollDX is a delta-x
\r
45 jl wrap_l ; wrap left if negative
\r
46 cmp ax,VIRTUAL_WIDTH - SCREEN_WIDTH ; too far right?
\r
47 jge wrap_r ; wrap right if too big
\r
48 mov cs:ScrollPosX,ax ; Stores new scroll-x
\r
49 ; (just like above, for y:)
\r
50 ; Using the ScrollDY variable as delta-y, move the scroll-origin
\r
51 ; in the y direction. Then, if the visible screen is now
\r
52 ; viewing invalid data, snap the origin to a new point and
\r
53 ; draw any new rows that are necessary.
\r
54 do_y_scroll: mov ax,cs:ScrollPosY
\r
55 add ax,cs:ScrollDY ; ScrollDY is a delta-y
\r
56 jl wrap_t ; wrap top if negative
\r
57 cmp ax,(VIRTUAL_HEIGHT - SCREEN_HEIGHT) * VIRTUAL_WIDTH
\r
58 jge wrap_b ; wrap bottom if too big
\r
59 mov cs:ScrollPosY,ax ; Store the new scroll-y
\r
62 ; To wrap to the right:
\r
63 ; Add a square's width to the origin's upper left corner, and
\r
64 ; subtract the same amount from the scroll origin's upper left
\r
65 ; corner. This makes no difference on the screen but allows
\r
66 ; us to forget about the leftmost column on the screen (it's
\r
67 ; offscreen now...) so we can take over the right column.
\r
68 ; See any documentation I included for an explanation of the
\r
70 wrap_r: add cs:upper_left,SQUARE_WIDTH / 4
\r
72 mov cs:ScrollPosX,ax
\r
75 mov bp,MapInfo.OffX1
\r
80 wrap_r1_ok: mov MapInfo.OffX1,bp
\r
82 mov bp,MapInfo.OffX2
\r
87 wrap_r2_ok: mov MapInfo.OffX2,bp
\r
89 mov bp,MapInfo.WrapX
\r
93 wrap_r3_ok: mov MapInfo.WrapX,bp
\r
96 jmp do_y_scroll ; Jump back to do Y
\r
98 EVEN ; Same for left side
\r
99 wrap_l: sub cs:upper_left,SQUARE_WIDTH / 4
\r
100 add ax,SQUARE_WIDTH
\r
101 mov cs:ScrollPosX,ax
\r
104 mov bp,MapInfo.OffX1
\r
109 wrap_l1_ok: mov MapInfo.OffX1,bp
\r
111 mov bp,MapInfo.OffX2
\r
116 wrap_l2_ok: mov MapInfo.OffX2,bp
\r
118 mov bp,MapInfo.WrapX
\r
123 wrap_l3_ok: mov MapInfo.WrapX,bp
\r
126 jmp do_y_scroll ; Jump back to do Y
\r
128 EVEN ; Same for bottom
\r
129 wrap_b: add cs:upper_left,(SQUARE_HEIGHT * VIRTUAL_WIDTH) / 4
\r
130 sub ax,SQUARE_HEIGHT * VIRTUAL_WIDTH
\r
131 mov cs:ScrollPosY,ax
\r
133 mov bp,MapInfo.OffY1
\r
134 mov dx,MapInfo.Extent
\r
139 wrap_b1_ok: mov MapInfo.OffY1,bp
\r
141 mov bp,MapInfo.OffY2
\r
146 wrap_b2_ok: mov MapInfo.OffY2,bp
\r
149 mov bp,MapInfo.WrapY
\r
153 wrap_b3_ok: mov MapInfo.WrapY,bp
\r
156 mov ax,cs:ScrollPosY
\r
157 jmp calculate ; Jump down to calc new offsets
\r
159 EVEN ; Same for top
\r
160 wrap_t: sub cs:upper_left,(SQUARE_HEIGHT * VIRTUAL_WIDTH) / 4
\r
161 add ax,SQUARE_HEIGHT * VIRTUAL_WIDTH
\r
162 mov cs:ScrollPosY,ax
\r
164 mov bp,MapInfo.OffY1
\r
165 mov dx,MapInfo.Extent
\r
170 wrap_t1_ok: mov MapInfo.OffY1,bp
\r
172 mov bp,MapInfo.OffY2
\r
177 wrap_t2_ok: mov MapInfo.OffY2,bp
\r
179 mov bp,MapInfo.WrapY
\r
185 wrap_t3_ok: mov MapInfo.WrapY,bp
\r
188 mov ax,cs:ScrollPosY
\r
189 jmp calculate ; Jump down to calc new offsets
\r
192 align_mask_table DB 11h,22h,44h,88h
\r
194 ; Calculate the scroll offset
\r
195 ; AX already = ScrollPosY
\r
196 add ax,cs:ScrollPosX ;Now AX = scroll offset
\r
198 ; Calculate the plane alignment
\r
201 mov cs:DrawPage.Alignment,bl
\r
202 ; mov bl,cs:align_mask_table[bx]
\r
203 ; mov cs:DrawPage.AlignmentMask,bl
\r
205 ; Now we don't need Scroll Offset on a pixel level any more,
\r
206 ; so shift it to a byte level (/4) and store it away.
\r
208 mov cs:DrawPage.ScrollOffset,ax
\r
210 ; Calculate the actual upper left corner address
\r
211 mov si,cs:DrawPage.Address
\r
212 add si,cs:upper_left
\r
213 mov cs:DrawPage.UpperLeftAddress,si
\r
215 ; And the map offset:
\r
216 mov bx,MapInfo.WrapX
\r
217 mov cs:DrawPage.MapPosX,bx
\r
218 mov di,MapInfo.WrapY
\r
219 mov cs:DrawPage.MapPosY,di
\r
221 mov cs:DrawPage.Valid,1
\r
222 cmp cs:BlankPage.Valid,0
\r
225 ; Lastly, update dirty area (if any) on blank page.
\r
226 ; BP still contains the draw page's mapoffset.
\r
227 sub bx,cs:BlankPage.MapPosX
\r
228 sub di,cs:BlankPage.MapPosY
\r
232 ; No catchup necessary -- return.
\r
235 ;; Okay, this stuff is a mess. I've registerized everything except
\r
236 ;; for the video data itself. I'll try to comment it best I can.
\r
239 ; First, switch into full-copy mode. This means latching the
\r
240 ; bit mask as coming entirely from the local 32-bit registers
\r
241 ; and then setting the map mask to write to all 4 planes. This
\r
242 ; is Mode X's greatest advantage, when you can do it! It
\r
243 ; provides a 2x speedup or so...
\r
244 mov dx,SC_INDEX ; Select Sequencer input
\r
246 out dx,ax ; set map mask = all bits
\r
249 mov ax,ALL_COPY_BITS
\r
255 ; Next, calculate the amount to catch up the top/bottom rows
\r
256 ; If we just wrapped over the edge, it is possible that the
\r
257 ; distance traveled will be as high as MapInfo.Ht - 1. So,
\r
258 ; in the fashion of signed numbers, if the number is greater
\r
259 ; than MapInfo.Ht / 2, we take it to mean negative. To convert
\r
260 ; it to signed, we have to shift it into the proper range. But
\r
261 ; if it's less than MapInfo.Ht / 2, then it's okay as it is.
\r
267 cwd ; DX = -1 or 0 based on AX's sign.
\r
268 and dx,cx ; DX = Ht or 0
\r
269 add ax,dx ; AX = 0 ... Ht (unsigned)
\r
278 ; Find DI MOD MapInfo.Wid, and then convert to it into virtual
\r
279 ; coordinates from map offset coordinates.
\r
280 ; This routine also calculates BP, which will be used as a loop
\r
281 ; counter to determine how many rows to draw on the left/right
\r
287 shl bp,3 ; BP = (SQUARE_HEIGHT / 2) * dX
\r
288 mov di,cs:MultVirtWidth[bp] ; Use multiplication table
\r
289 add di,dx ; to calculate new DI, then
\r
290 xor di,dx ; restore the sign.
\r
291 sub bp,VIRTUAL_HEIGHT / 2
\r
292 ; Out: DI = # of pixels traveled,
\r
293 ; BP = (VIRTUAL_HEIGHT - # of rows) / 2
\r
295 ; Change BX (delta-x) to signed from unsigned, store in AX
\r
299 and dx,cx ; DX = Wid or 0
\r
300 add ax,dx ; AX = 0 ... Wid
\r
309 ; The following is an optimization which would slow down on
\r
310 ; normal memory, but I believe it will be okay on VGA memory,
\r
311 ; which is so incredibly slow. Basically, I've replaced all
\r
312 ; "rep movsb"'s with a loop that first calculates "bx = di - si",
\r
313 ; and then loops performing "mov ds:[si],es:[si+bx]". Why?
\r
314 ; Because of several reasons, none of which I'm sure actually
\r
315 ; help out, but they do make for smaller code. 1) It means that
\r
316 ; I only have to maintain SI, and "DI" is maintained automatically
\r
317 ; (because DI - SI should remain constant). 2) Don't have to
\r
318 ; calculate DS. Not much gain here. 3) Because I'd already
\r
319 ; unrolled the loops, and the "rep movsb"'s had become instead
\r
320 ; "mov al, ds:[si] / mov es:[di], al / mov al, ds:[si + 1] /
\r
321 ; mov es:[di + 1],al ... etc ... add si, 4 / add di, 4". In
\r
322 ; other words, I wasn't using MOVSB anyway. The only advantage
\r
323 ; I can see in MOVSB is that it doesn't have to store the answer
\r
324 ; in AL so it could be slightly faster. By unrolling the loops,
\r
325 ; I'd already made up for that, I think. 4) Normally, using
\r
326 ; [SI + BX + 1] would incur a penalty of an additional clock
\r
327 ; cycle (because it has to add two indexs + an offset). But
\r
328 ; the VGA memory and the '86 CPU can multi-task, and the VGA
\r
329 ; is very slow. So by the time the VGA is ready to write the
\r
330 ; next byte, the one extra clock cycle has already passed.
\r
332 ; Am I right? Does this make things faster? I have no idea.
\r
333 ; I haven't bothered to check both ways. Please let me know
\r
334 ; if I've missed something important...
\r
336 ; Here's the calculation of BX. SI is already set.
\r
337 ; si already = DrawPage.UpperLeftAddress
\r
338 mov bx,cs:BlankPage.Address
\r
339 sub bx,cs:DrawPage.Address
\r
341 ; Now, converts SI into "1/4" units. I do all the calculations
\r
342 ; in "1/4" scale and then scale back up, mostly because it saved
\r
343 ; me some instructions elsewhere.
\r
345 ; Stores this value of SI. This will be restored after doing
\r
346 ; the top/bottom copying.
\r
349 ; Check if it's necessary to catch up the top or bottom.
\r
350 catchup_tb: cmp di,0
\r
353 catchup_b: ; COPY BOTTOM
\r
354 ; Move SI to point at the bottom of the screen - # of rows
\r
356 add si,((VIRTUAL_WIDTH * VIRTUAL_HEIGHT) / 4) / 4
\r
359 catchup_t: ; COPY_TOP
\r
360 ; Leave SI, but add to the "pushed" value of SI the number of
\r
361 ; rows that will be drawn. This prevents overlap between top
\r
362 ; and right/left when moving diagonally. Also, DI = |DI|
\r
366 ; Now do the actual copying. Shifts SI back into scale "1",
\r
367 ; then performs an unrolled loop to copy the entire virtual
\r
368 ; width * # of pixel rows. Since DI is already in "1/4" scale,
\r
369 ; it is only decremented once for each four pixels drawn.
\r
371 copy_tb_loop: mov cl,es:[si]
\r
374 mov es:[si+bx+1],cl
\r
376 mov es:[si+bx+2],cl
\r
378 mov es:[si+bx+3],cl
\r
384 ; Next, check to see if it's necessary to draw the right or
\r
386 catchup_rl: cmp ax,0
\r
389 catchup_r: ; COPY RIGHT
\r
390 ; Adds to the "pushed" SI the width of the screen, minus
\r
391 ; the number of rows to be drawn.
\r
393 add dx,(VIRTUAL_WIDTH / 4) / 4
\r
395 catchup_l: ; COPY LEFT (or nothing)
\r
397 ; Does the actual copying. First pops SI from its stored value
\r
398 ; and shifts it back into scale "1"
\r
402 ; This is a loop over BP -- which has already been set as
\r
403 ; VIRTUAL_HEIGHT - (# of bytes drawn in vertical update)
\r
404 ; Again, this loop is unrolled such that it does two rows @
\r
405 ; 4 bytes each with every iteration.
\r
406 ; This LEA instruction is just a quick MOV DI, SI + 2 *y
\r
407 ; DI is used to push the next value of SI for each iteration
\r
409 copy_rl_loop: lea di,[si + 2*(VIRTUAL_WIDTH/4)]
\r
411 copy_rl_col: mov dl,es:[si]
\r
414 mov es:[si+bx+1],dl
\r
416 mov es:[si+bx+2],dl
\r
418 mov es:[si+bx+3],dl
\r
419 mov dl,es:[si+VIRTUAL_WIDTH/4]
\r
420 mov es:[si+bx+VIRTUAL_WIDTH/4],dl
\r
421 mov dl,es:[si+VIRTUAL_WIDTH/4+1]
\r
422 mov es:[si+bx+VIRTUAL_WIDTH/4+1],dl
\r
423 mov dl,es:[si+VIRTUAL_WIDTH/4+2]
\r
424 mov es:[si+bx+VIRTUAL_WIDTH/4+2],dl
\r
425 mov dl,es:[si+VIRTUAL_WIDTH/4+3]
\r
426 mov es:[si+bx+VIRTUAL_WIDTH/4+3],dl
\r
430 mov si,di ; SI = pop (SI + VIRTUAL_WIDTH/4)
\r
431 inc bp ; (BP is negative, so INC it)
\r
435 ; Switch back to all-draw mode.
\r
437 mov ax,ALL_DRAW_BITS
\r