;.486P .686 .XMM .MODEL FLAT,C .CODE ; Summary: ; ; Prefetches a bunch of data ; ; I was not well informed about how prefetching works at the time I wrote this routine, ; so it probably doesn't do what I'd imagined. At any rate, benchmarks did not show any ; improvement so it is no longer used. ; DWORD PTR [esp] = data pointer ; DWORD PTR [esp + 4] = data amount (in bytes) ; DWORD PTR [esp + 8] = 0: non-temporal - !0: temporal ASW_DPND_PrefetchASM PROC NEAR ; Get the byte count mov eax, DWORD PTR [esp + 8] ; Divide by 32, since the prefetch instruction gets 32 bytes at a time mov edx, 0 mov ecx, 32 div ecx mov ecx, eax ; Get the data pointer mov edx, DWORD PTR [esp + 4] cmp DWORD PTR [esp + 12], 0 jnz ASW_DPND_PrefetchASM_TEMPORAL ; We are prefetching non-temporal data prefetcht0 [edx] cmp ecx, 0 jecxz ASW_DPND_PrefetchASM_END ASW_DPND_PrefetchASM_NON_TEMPORAL_LOOP: add edx, 32 prefetcht0 [edx] loopne ASW_DPND_PrefetchASM_NON_TEMPORAL_LOOP ret ASW_DPND_PrefetchASM_TEMPORAL: ; We are prefecthing temporal data prefetcht0 [edx] cmp ecx, 0 jecxz ASW_DPND_PrefetchASM_END ASW_DPND_PrefetchASM_TEMPORAL_LOOP: add edx, 32 prefetcht0 [edx] loopne ASW_DPND_PrefetchASM_TEMPORAL_LOOP ret ASW_DPND_PrefetchASM_END: ret ASW_DPND_PrefetchASM ENDP ;*************************************************************************************** ;*************************************************************************************** ;*************************************************************************************** ; Summary: ; ; - Computes position and normals using the Transform structures ; and corresponding weight values (Transforms must be set already). ; - Upon calculation of transformed point and each normal, they are written ; to each Vertex that is referenced by the point (no color or texture data is calculated/written) ; DWORD PTR [esp] (36) = array of WeightPoint structures (must be aligned on 16-byte boundary) ; DWORD PTR [esp + 4] (40) = # of WeightPoint structures ; DWORD PTR [esp + 8] (44) = vertex buffer offset to add to each vertex buffer index ; DWORD PTR [esp + 12] (48) = current point array (4d vectors aligned on 16-byte boundary) ASW_DPND_ComputePositionAndNormalASM PROC NEAR pushad ; Align esp on a 16 byte boundary mov eax, esp xor edx, edx mov ebx, 16 div ebx add dx, 2 sub sp, dx push dx ; GENERAL registers will hold the following through main loop: ; eax: NOTHING [see TRANSFORM loop] ; ebx: NOTHING [see VERTEX loop] ; ecx: NOTHING [see TRANSFORM loop] ; edx: Current point array ; edi: Current WeightPoint ; esi: End WeightPoint ; ebp: NOTHING [see TRANSFORM loop] ; mov edx, DWORD PTR [esp + edx + 50] ; current point array ; Get the start and end of the WeightPoint array mov edi, DWORD PTR [esp + edx + 38] ; WeightPoint array mov eax, DWORD PTR [esp + edx + 42] ; # of WeightPoints mov ecx, 64 ; sizeof(WeightPoint) mul ecx mov esi, eax add esi, edi ; Get the current point array xor eax, eax mov ax, WORD PTR [esp] mov edx, DWORD PTR [esp + eax + 50] ; GENERAL registers will hold the following through TRANSFORM loop: ; eax: Transform pointer array ; ebx: NOTHING [see VERTEX loop] ; ecx: Transform reference (from eax) ; edx: NOTHING ; edi: Current WeightPoint ; esi: End WeightPoint ; ebp: Weight array ; XMM registers will hold the following during TRANSFORM loop: ; 0: Original X ; 1: Original Y ; 2: Original Z ; 3: Original X (mult. by tranform matrix row 1) ; 4: Original Y (mult. by tranform matrix row 2) ; 5: Original Z (mult. by tranform matrix row 3) ; 6: Weight ; 7: Accumulation ASW_DPND_ComputePositionAndNormal_OUTER_LOOP: ; Get the original point movaps xmm0, [edi + 0] ; Original X movaps xmm1, [edi + 16] ; Original Y movaps xmm2, [edi + 32] ; Original Z ; Get the weight array mov ebp, DWORD PTR [edi + 52] ; weights ; Get the transform reference array mov eax, DWORD PTR [edi + 56] ; Transform references ; Initialize the accumulation to zero xorps xmm7, xmm7 ; Get the first transform reference mov ecx, DWORD PTR [eax] ASW_DPND_ComputePositionAndNormal_TRANSFORM_LOOP: ; Copy the original X, Y, and Z movaps xmm3, xmm0 movaps xmm4, xmm1 movaps xmm5, xmm2 ; Get the weight value movss xmm6, [ebp] ; Multiply by the transform matrix mulps xmm3, [ecx] mulps xmm4, [ecx + 16] mulps xmm5, [ecx + 32] ; Add the first two result vectors addps xmm4, xmm3 ; Add the third result vector addps xmm4, xmm5 ; Add the fourth result vector addps xmm4, [ecx + 48] ; Distribute the weight value shufps xmm6, xmm6, 0 ; Multiply the result vector by the weight mulps xmm4, xmm6 ; Add to the accumulation register addps xmm7, xmm4 add eax, 4 mov ecx, DWORD PTR [eax] add ebp, 4 cmp ecx, 0 jnz ASW_DPND_ComputePositionAndNormal_TRANSFORM_LOOP ; Copy the 'new' point over the old point movntdq [edx], xmm7 ; **************************************************************************************************** ; By here, we have the transformed point... loop through all vertices that use this point ; **************************************************************************************************** ; Get the vertex reference array mov eax, DWORD PTR [edi + 60] ; Get the vertex reference offset xor ebx, ebx mov bx, WORD PTR [esp] mov ebx, DWORD PTR [esp + ebx + 46] push esi push edx push ebp ; Get the normal array mov edx, DWORD PTR [edi + 48] ; Copy the new point onto the stack sub esp, 64 movaps [esp], xmm7 ; Get the first Vertex pointer mov ecx, DWORD PTR [eax] ASW_DPND_ComputePositionAndNormal_VERTEX_LOOP: ; Get the original normal for this vertex movss xmm0, [edx + 0] movss xmm1, [edx + 4] movss xmm2, [edx + 8] ; CHANGE THIS IF WE ONLY USE XYZ FOR NORMALS INSTEAD OF XYZW!!! add edx, 16 ; 12 ; Shuffle the normal shufps xmm0, xmm0, 0 ; XXXX shufps xmm1, xmm1, 0 ; YYYY shufps xmm2, xmm2, 0 ; ZZZZ ; Save the normal array pointer mov DWORD PTR [esp + 32], edx ; Get the transform reference array mov edx, DWORD PTR [edi + 56] ; Transform references ; Get the weight array mov ebp, DWORD PTR [edi + 52] ; weights ; Initialize the accumulation to zero xorps xmm7, xmm7 ; Get the first transform reference mov esi, DWORD PTR [edx] ASW_DPND_ComputePositionAndNormal_NORMAL_LOOP: ; Copy the original X, Y, and Z movaps xmm3, xmm0 movaps xmm4, xmm1 movaps xmm5, xmm2 ; Get the weight value movss xmm6, [ebp] ; Multiply by the transform matrix mulps xmm3, [esi + 0] mulps xmm4, [esi + 16] mulps xmm5, [esi + 32] ; Add the first two result vectors addps xmm4, xmm3 ; Add the third result vector addps xmm4, xmm5 ; Distribute the weight value shufps xmm6, xmm6, 0 ; Multiply the result vector by the weight mulps xmm4, xmm6 ; Add to the accumulation register addps xmm7, xmm4 add edx, 4 mov esi, DWORD PTR [edx] add ebp, 4 cmp esi, 0 jnz ASW_DPND_ComputePositionAndNormal_NORMAL_LOOP ; NORMALIZE??? ; Copy the transformed normal to the stack movaps [esp + 16], xmm7 ; Copy the point into registers mov edx, DWORD PTR [esp] mov ebp, DWORD PTR [esp + 4] mov esi, DWORD PTR [esp + 8] ; Add the vertex reference offset add ecx, ebx ; Move the vertex array pointer add eax, 4 ; Copy the point from the registers to the vertex movnti [ecx + 0], edx movnti [ecx + 4], ebp movnti [ecx + 8], esi ; Copy the normal from the stack into registers mov edx, DWORD PTR [esp + 16] mov ebp, DWORD PTR [esp + 20] mov esi, DWORD PTR [esp + 24] ; Copy the normal from the registers to the vertex movnti [ecx + 16], edx movnti [ecx + 20], ebp movnti [ecx + 24], esi ; Get the next vertex pointer mov ecx, DWORD PTR [eax] ; Restore the normal array pointer mov edx, DWORD PTR [esp + 32] ; See if we are done yet cmp ecx, 0FFFFFFFFh jnz ASW_DPND_ComputePositionAndNormal_VERTEX_LOOP add esp, 64 pop ebp pop edx pop esi add edi, 64 add edx, 16 cmp edi, esi jb ASW_DPND_ComputePositionAndNormal_OUTER_LOOP xor eax, eax pop ax add sp, ax popad ; emms ret ASW_DPND_ComputePositionAndNormalASM ENDP ;*************************************************************************************** ;*************************************************************************************** ;*************************************************************************************** ; Summary: ; ; - Computes position and delta position (new position minus last position) using the Transform structures ; and corresponding weight values (Transforms must be set already). ; - Upon calculation of transformed point and delta position "color", points and colors are written ; to each Vertex that is referenced by the point (no normal or texture data is calculated/written) ; DWORD PTR [esp] (36) = array of WeightPoint structures (must be aligned on 16-byte boundary) ; DWORD PTR [esp + 4] (40) = # of WeightPoint structures ; DWORD PTR [esp + 8] (44) = oriented velocity of observer (must be a 4d vector aligned on a 16-byte boundary) ; DWORD PTR [esp + 12] (48) = minimization/maximization of resulting color (must be a 4d vector aligned on a 16-byte boundary) ; DWORD PTR [esp + 16] (52) = bias of resulting color (must be a 4d vector aligned on a 16-byte boundary) ; DWORD PTR [esp + 20] (56) = vertex buffer offset to add to each vertex buffer index ; DWORD PTR [esp + 24] (60) = current point array (4d vectors aligned on 16-byte boundary) ASW_DPND_ComputePositionAndDeltaASM PROC NEAR pushad ; Make it so esp is aligned mov eax, esp xor edx, edx mov ebx, 16 div ebx mov eax, 2 add ax, dx ; mov eax, 30 ; sub ax, bx sub esp, eax push ax ; GENERAL registers will hold the following through main loop: ; eax: NOTHING [see TRANSFORM loop] ; ebx: NOTHING [see VERTEX loop] ; ecx: NOTHING [see TRANSFORM loop] ; edx: Current point array ; edi: Current WeightPoint ; esi: End WeightPoint ; ebp: NOTHING [see TRANSFORM loop] ; Get the start and end of the WeightPoint array mov edi, DWORD PTR [esp + eax + 38] ; WeightPoint array mov eax, DWORD PTR [esp + eax + 42] ; # of WeightPoints mov ecx, 64 ; sizeof(WeightPoint) mul ecx mov esi, eax add esi, edi ; Get the current point array xor eax, eax mov ax, WORD PTR [esp] mov edx, DWORD PTR [esp + eax + 62] ; current point array ; GENERAL registers will hold the following through TRANSFORM loop: ; eax: Transform pointer array ; ebx: NOTHING [see VERTEX loop] ; ecx: Transform reference (from eax) ; edx: NOTHING ; edi: Current WeightPoint ; esi: End WeightPoint ; ebp: Weight array ; XMM registers will hold the following during TRANSFORM loop: ; 0: Original X ; 1: Original Y ; 2: Original Z ; 3: Original X (mult. by tranform matrix row 1) ; 4: Original Y (mult. by tranform matrix row 2) ; 5: Original Z (mult. by tranform matrix row 3) ; 6: Weight ; 7: Accumulation ASW_DPND_ComputePositionAndDelta_OUTER_LOOP: ; Prefetch the current point ; prefetchnta [edx] ; Get the original point movaps xmm0, [edi + 0] ; Original X movaps xmm1, [edi + 16] ; Original Y movaps xmm2, [edi + 32] ; Original Z ; Get the weight array mov ebp, DWORD PTR [edi + 52] ; weights ; Get the transform reference array mov eax, DWORD PTR [edi + 56] ; Transform references ; Initialize the accumulation to zero xorps xmm7, xmm7 ; Get the first transform reference mov ecx, DWORD PTR [eax] ASW_DPND_ComputePositionAndDelta_TRANSFORM_LOOP: ; Copy the original X, Y, and Z movaps xmm3, xmm0 movaps xmm4, xmm1 movaps xmm5, xmm2 ; Get the weight value movss xmm6, [ebp] ; Multiply by the transform matrix mulps xmm3, [ecx] mulps xmm4, [ecx + 16] mulps xmm5, [ecx + 32] ; Add the first two result vectors addps xmm4, xmm3 ; Add the third result vector addps xmm4, xmm5 ; Add the fourth result vector addps xmm4, [ecx + 48] ; Distribute the weight value shufps xmm6, xmm6, 0 ; Multiply the result vector by the weight mulps xmm4, xmm6 ; Add to the accumulation register addps xmm7, xmm4 add eax, 4 mov ecx, DWORD PTR [eax] add ebp, 4 cmp ecx, 0 jnz ASW_DPND_ComputePositionAndDelta_TRANSFORM_LOOP ; **************************************************************************************************** ; By here, we have the transformed point... compute the difference from the current point and store it ; **************************************************************************************************** ; Get the viewer's velocity, color factor, and color bias xor ecx, ecx mov cx, WORD PTR [esp] mov eax, DWORD PTR [esp + ecx + 46] ; oriented velocity mov ebx, DWORD PTR [esp + ecx + 50] ; color factor mov ecx, DWORD PTR [esp + ecx + 54] ; color bias ; Copy the new point movaps xmm0, xmm7 ; Subtract the 'old' point from the 'new' point to get the displacement subps xmm0, [edx] ; Subtract the velocity (to put things in perspective for the viewer) subps xmm0, [eax] ; Set the W component to the distance of the point (from the origin) movaps xmm1, xmm0 shufps xmm0, xmm0, 39 mulps xmm1, xmm1 movaps xmm2, xmm1 movaps xmm3, xmm1 shufps xmm2, xmm2, 201 shufps xmm3, xmm3, 230 addss xmm2, xmm1 addss xmm2, xmm3 movss xmm0, xmm2 shufps xmm0, xmm0, 39 ; Multiply by the color factor mulps xmm0, [ebx] ; Copy the 'new' point over the old point movntdq [edx], xmm7 ; Add the color bias addps xmm0, [ecx] ; Convert from 32 to 16 bits, then from 16 to 8 bits ; packssdw xmm0, xmm0 ; packuswb xmm0, xmm0 ; ***************************************************************************************************** ; By here, we have the transformed point and the color... loop through all vertices that use this point ; ***************************************************************************************************** ; GENERAL registers will hold the following during VERTEX loop: ; eax: Vertex pointer array ; ebx: Vertex buffer memory offset ; ecx: Vertex pointer (from eax) ; edx: X component of point ; edi: Y component of point ; esi: Z component of point ; ebp: Color component of point ; Get the vertex reference array mov eax, DWORD PTR [edi + 60] ; Get the vertex reference offset xor ebx, ebx mov bx, WORD PTR [esp] mov ebx, DWORD PTR [esp + ebx + 58] ; Get the first Vertex pointer mov ecx, DWORD PTR [eax] push esi push edx push edi sub esp, 64 movaps [esp], xmm7 movaps [esp + 16], xmm0 ASW_DPND_ComputePositionAndDelta_VERTEX_LOOP: ; Move the vertex array pointer add eax, 4 ; Add the vertex reference offset add ecx, ebx ; Copy the point mov edx, DWORD PTR [esp] mov edi, DWORD PTR [esp + 4] mov esi, DWORD PTR [esp + 8] movnti [ecx], edx movnti [ecx + 4], edi movnti [ecx + 8], esi ; Copy the color mov edx, DWORD PTR [esp + 16] mov edi, DWORD PTR [esp + 20] mov esi, DWORD PTR [esp + 24] mov ebp, DWORD PTR [esp + 28] movnti [ecx + 36], edx movnti [ecx + 40], edi movnti [ecx + 44], esi movnti [ecx + 48], ebp ; See if we are done yet mov ecx, DWORD PTR [eax] cmp ecx, 0FFFFFFFFh jnz ASW_DPND_ComputePositionAndDelta_VERTEX_LOOP add esp, 64 pop edi pop edx pop esi add edi, 64 add edx, 16 cmp edi, esi jb ASW_DPND_ComputePositionAndDelta_OUTER_LOOP xor eax, eax pop ax add sp, ax popad emms ret ASW_DPND_ComputePositionAndDeltaASM ENDP END