2 ; NTS: We use NASM to achieve our goals here because WASM sucks donkey balls
3 ; Maybe when they bother to implement a proper conditional macro system, I'll consider it...
7 %error You must specify MMODE variable (memory model) for 16-bit real mode code
11 %if TARGET_MSDOS == 16
13 %define retnative retf
14 %define cdecl_param_offset 6 ; RETF addr + PUSH BP
17 %define retnative retf
18 %define cdecl_param_offset 6 ; RETF addr + PUSH BP
21 %define cdecl_param_offset 4 ; RET addr + PUSH BP
26 %define cdecl_param_offset 8 ; RET addr + PUSH EBP
29 ; NTS: Associate our data with Watcom's data segment
30 segment .data public align=4 class=data
32 %if TARGET_MSDOS == 16
34 ; uint32_t far* near llmemcpy_gdt = NULL;
36 ; uint16_t near llmemcpy_gdtr[4];
38 ; uint16_t near llmemcpy_idtr[4];
40 ; uint32_t near llmemcpy_vcpi[0x20];
42 ; uint32_t near llmemcpy_vcpi_return[2];
43 extern _llmemcpy_vcpi_return
44 ; volatile void FAR* llmemcpy_pagetables = NULL;
45 extern _llmemcpy_pagetables
48 ; NTS: Help NASM put the code segment in the right place for Watcom to link it in properly
49 segment text public align=1 class=code
51 %if TARGET_MSDOS == 16
53 global llmem_memcpy_16_inner_pae_
54 llmem_memcpy_16_inner_pae_:
62 mov word [cs:exit_pae_patch+1+2],ax
64 ; jump into protected mode, with paging
71 ; load the data selectors
90 ; get out of protected mode
105 global llmem_memcpy_16_inner_pse_
106 llmem_memcpy_16_inner_pse_:
114 mov word [cs:exit_pse_patch+1+2],ax
116 ; jump into protected mode, with paging
123 ; load the data selectors
142 ; get out of protected mode
157 ; alternate version to do PSE llmemcpy when VCPI/EMM386.EXE is active.
158 ; void __cdecl llmem_memcpy_16_inner_pse_vcpi(uint32_t dst,uint32_t src,uint32_t cpy);
159 global _llmem_memcpy_16_inner_pse_vcpi
160 _llmem_memcpy_16_inner_pse_vcpi:
168 ; we need to store _llmemcpy_vcpi_return on the stack. once we're in protected mode
169 ; the FAR pointers given by Watcom are not usable.
170 mov si,seg _llmemcpy_vcpi_return
172 mov si,_llmemcpy_vcpi_return
173 mov eax,[fs:si+4] ; segment
178 %define extra 16 ; +16
182 mov ax,seg _llmemcpy_pagetables
185 les di,[fs:_llmemcpy_pagetables]
188 mov ax,seg _llmemcpy_gdt
191 lds si,[fs:_llmemcpy_gdt]
194 ; so: DS:SI = First GDT available to VCPI server
195 ; ES:DI = Page dir 0 page 0 4KB page
204 .info_ok: ; we need EBX, the return entry point offset
207 ; now enter VCPI protected mode
208 mov bx,seg _llmemcpy_vcpi
210 mov dword [fs:_llmemcpy_vcpi+0x10],.vcpi_entry
213 mov si,seg _llmemcpy_vcpi
215 add esi,_llmemcpy_vcpi
219 hlt ; <- BRICK WALL in case of errant VCPI server
221 .vcpi_entry: mov ax,2 << 3
232 ; switch on PSE. note we couldn't do this from the real-mode side
233 ; since the v86 monitor would likely not allow that
238 mov ecx,[bp+cdecl_param_offset+extra+8] ; cpy
239 mov esi,[bp+cdecl_param_offset+extra+4] ; src
240 mov edi,[bp+cdecl_param_offset+extra+0] ; dst
250 ; switch off PSE. once we're back in v86 mode we can't touch control regs
255 ; set up return to v86 mode
256 and esp,0xFFFF ; <--- THIS IS VERY IMPORTANT ON RETURN FROM VCPI, UPPER BITS OF ESP CAN BE NONZERO
259 push eax ; SS:ESP+0x28 GS
260 push eax ; SS:ESP+0x24 FS
261 push eax ; SS:ESP+0x20 DS
263 push eax ; SS:ESP+0x1C ES
265 push eax ; SS:ESP+0x18 SS
267 push eax ; SS:ESP+0x14 ESP
268 pushfd ; SS:ESP+0x10 EFLAGS
270 push eax ; SS:ESP+0x0C CS
271 push dword .vcpi_exit; SS:ESP+0x08 EIP
272 mov eax,[bp+4] ; VCPI code segment
273 push eax ; SS:ESP+0x04 VCPI code segment
274 mov eax,[bp+0] ; VCPI offset
276 mov eax,0xDE0C ; switch back to v86
277 jmp far dword [esp] ; <--- 32-bit address mode required for direct use of SP, but only if we refer to ESP
285 ; alternate version to do PAE llmemcpy when VCPI/EMM386.EXE is active.
286 ; void __cdecl llmem_memcpy_16_inner_pae_vcpi(uint32_t dst,uint32_t src,uint32_t cpy);
287 global _llmem_memcpy_16_inner_pae_vcpi
288 _llmem_memcpy_16_inner_pae_vcpi:
294 mov si,seg _llmemcpy_pagetables
296 mov si,[fs:_llmemcpy_pagetables+2]
298 mov di,[fs:_llmemcpy_pagetables]
307 ; we need to store _llmemcpy_vcpi_return on the stack. once we're in protected mode
308 ; the FAR pointers given by Watcom are not usable.
309 mov si,seg _llmemcpy_vcpi_return
311 mov si,_llmemcpy_vcpi_return
312 mov eax,[fs:si+4] ; segment
317 %define extra 20 ; +20
321 ; we're going to give the VCPI server the last 4KB page in the 36KB
322 ; buffer allocated by the function, set aside for that purpose.
323 ; what we're then going to do is copy and translate that page table
324 ; to the 64-bit form required by PAE, initially starting from the
325 ; 32-bit form in the last 8KB
326 mov ax,seg _llmemcpy_pagetables
329 les di,[fs:_llmemcpy_pagetables]
330 add edi,0x8000 ; +32KB
332 mov ax,seg _llmemcpy_gdt
335 lds si,[fs:_llmemcpy_gdt]
338 ; so: DS:SI = First GDT available to VCPI server
339 ; ES:DI = Page dir 0 page 0 4KB page
348 .info_ok: ; we need EBX, the return entry point offset
351 ; now enter VCPI protected mode
352 mov bx,seg _llmemcpy_vcpi
354 mov dword [fs:_llmemcpy_vcpi+0x10],.vcpi_entry
357 mov si,seg _llmemcpy_vcpi
359 add esi,_llmemcpy_vcpi
363 hlt ; <- BRICK WALL in case of errant VCPI server
365 .vcpi_entry: mov ax,2 << 3
376 ; switch on PSE. note we couldn't do this from the real-mode side
377 ; since the v86 monitor would likely not allow that
382 ; copy the first 4MB of 32-bit page tables and translate to 64-bit
383 mov eax,[bp+16] ; _llmemcpy_pagetables
384 lea esi,[eax+0x8000] ; source: 32-bit VCPI page zero
385 lea edi,[eax+0x5000] ; dest: 64-bit page zero and one
386 mov ecx,1024 ; 1024 x 32-bit -> 1024 x 64-bit (4KB -> 8KB)
388 .xlate_loop: a32 es movsd ; lower 32 bits -> 64 bits with upper bits zero
393 ; switch on PAE, reload CR3. Temporarily shut down paging to accomplish that.
394 ; most likely: as a DOS program in the 1MB area we're not remapped and it won't affect us.
397 mov cr0,ecx ; CR0=Disable PE
398 mov ebx,[bp+16] ; _llmemcpy_pagetables
399 mov cr3,ebx ; CR3=new 64-bit page table
402 mov cr4,eax ; CR4=PSE and PAE
404 mov cr0,ecx ; CR0=Enable PE
406 mov ecx,[bp+cdecl_param_offset+extra+8] ; cpy
407 mov esi,[bp+cdecl_param_offset+extra+4] ; src
408 mov edi,[bp+cdecl_param_offset+extra+0] ; dst
418 ; switch on PAE, reload CR3. Temporarily shut down paging to accomplish that.
419 ; most likely: as a DOS program in the 1MB area we're not remapped and it won't affect us.
422 mov cr0,ecx ; CR0=Disable PE
423 mov ebx,[bp+16] ; _llmemcpy_pagetables
424 add ebx,0x7000 ; point at 32-bit tables
425 mov cr3,ebx ; CR3=new 64-bit page table
428 mov cr4,eax ; CR4=Disable PSE and PAE
430 mov cr0,ecx ; CR0=Enable PE
432 ; set up return to v86 mode
433 and esp,0xFFFF ; <--- THIS IS VERY IMPORTANT ON RETURN FROM VCPI, UPPER BITS OF ESP CAN BE NONZERO
436 push eax ; SS:ESP+0x28 GS
437 push eax ; SS:ESP+0x24 FS
438 push eax ; SS:ESP+0x20 DS
440 push eax ; SS:ESP+0x1C ES
442 push eax ; SS:ESP+0x18 SS
444 push eax ; SS:ESP+0x14 ESP
445 pushfd ; SS:ESP+0x10 EFLAGS
447 push eax ; SS:ESP+0x0C CS
448 push dword .vcpi_exit; SS:ESP+0x08 EIP
449 mov eax,[bp+4] ; VCPI code segment
450 push eax ; SS:ESP+0x04 VCPI code segment
451 mov eax,[bp+0] ; VCPI offset
453 mov eax,0xDE0C ; switch back to v86
454 jmp far dword [esp] ; <--- 32-bit address mode required for direct use of SP, but only if we refer to ESP