version 1.4 | | version 1.5 |
---|
| | |
* Routines to draw the texture mapped scanlines. | | * Routines to draw the texture mapped scanlines. |
* | | * |
* $Log$ | | * $Log$ |
| | * Revision 1.5 1999/12/08 01:08:10 donut |
| | * Falk Hueffner's updated fp_tmap, plus my own code to allow runtime selection of tmap routines |
| | * |
* Revision 1.4 1999/10/18 00:31:01 donut | | * Revision 1.4 1999/10/18 00:31:01 donut |
* more alpha fixes from Falk Hueffner | | * more alpha fixes from Falk Hueffner |
* | | * |
| | |
#include "texmap.h" | | #include "texmap.h" |
#include "texmapl.h" | | #include "texmapl.h" |
#include "scanline.h" | | #include "scanline.h" |
| | #include "strutil.h" |
| | |
void c_tmap_scanline_flat() | | void c_tmap_scanline_flat() |
{ | | { |
| | |
} | | } |
#endif | | #endif |
| | |
#ifdef FP_TMAP | | // Used for energy centers. See comments for c_tmap_scanline_per(). |
void c_tmap_scanline_per_nolight() | | void c_fp_tmap_scanline_per_nolight() |
{ | | { |
ubyte *dest; | | ubyte *dest; |
uint c; | | ubyte c; |
int x, j; | | int x; |
double u, v, z, dudx, dvdx, dzdx, rec_z; | | double u, v, z, dudx, dvdx, dzdx, rec_z; |
u_int64_t destlong; | | double ubyz, vbyz, ubyz0, vbyz0, ubyz8, vbyz8, du1, dv1; |
| | double dudx8, dvdx8, dzdx8; |
| | u_int64_t destlong;//, destmask; |
| | |
| | ubyte *texmap = pixptr;//, *fadetable = gr_fade_table; |
| | |
u = f2db(fx_u); | | u = f2db(fx_u); |
v = f2db(fx_v) * 64.0; | | v = f2db(fx_v) * 64.0; |
z = f2db(fx_z); | | z = f2db(fx_z); |
| | |
dudx = f2db(fx_du_dx); | | dudx = f2db(fx_du_dx); |
dvdx = f2db(fx_dv_dx) * 64.0; | | dvdx = f2db(fx_dv_dx) * 64.0; |
dzdx = f2db(fx_dz_dx); | | dzdx = f2db(fx_dz_dx); |
| | |
| | dudx8 = dudx * 8.0; |
| | dvdx8 = dvdx * 8.0; |
| | dzdx8 = dzdx * 8.0; |
| | |
rec_z = 1.0 / z; | | rec_z = 1.0 / z; |
| | |
dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y)); | | dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y)); |
| | |
x = fx_xright - fx_xleft + 1; | | x = fx_xright - fx_xleft + 1; |
if (!Transparency_on) { | | |
if (x >= 8) { | | |
if ((j = (size_t) dest & 7) != 0) { | | |
j = 8 - j; | | |
| | |
while (j > 0) { | | if (!Transparency_on) { // I'm not sure this is ever used (energy texture is transparent) |
*dest++ = | | if (x >= 8) { |
(uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | for ( ; (size_t) dest & 7; --x) { |
| | *dest++ = (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + |
(((int) (u * rec_z)) & 63)]; | | (((int) (u * rec_z)) & 63)]; |
u += dudx; | | u += dudx; |
v += dvdx; | | v += dvdx; |
z += dzdx; | | z += dzdx; |
rec_z = 1.0 / z; | | rec_z = 1.0 / z; |
x--; | | |
j--; | | |
} | | |
} | | } |
| | |
while (j >= 8) { | | ubyz0 = u * rec_z; |
destlong = | | vbyz0 = v * rec_z; |
(u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | |
(((int) (u * rec_z)) & 63)]; | | u += dudx8; |
u += dudx; | | v += dvdx8; |
v += dvdx; | | z += dzdx8; |
z += dzdx; | | |
rec_z = 1.0 / z; | | |
destlong |= | | |
(u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | |
(((int) (u * rec_z)) & 63)] << 8; | | |
u += dudx; | | |
v += dvdx; | | |
z += dzdx; | | |
rec_z = 1.0 / z; | | |
destlong |= | | |
(u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | |
(((int) (u * rec_z)) & 63)] << 16; | | |
u += dudx; | | |
v += dvdx; | | |
z += dzdx; | | |
rec_z = 1.0 / z; | | |
destlong |= | | |
(u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | |
(((int) (u * rec_z)) & 63)] << 24; | | |
u += dudx; | | |
v += dvdx; | | |
z += dzdx; | | |
rec_z = 1.0 / z; | | |
destlong |= | | |
(u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | |
(((int) (u * rec_z)) & 63)] << 32; | | |
u += dudx; | | |
v += dvdx; | | |
z += dzdx; | | |
rec_z = 1.0 / z; | | |
destlong |= | | |
(u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | |
(((int) (u * rec_z)) & 63)] << 40; | | |
u += dudx; | | |
v += dvdx; | | |
z += dzdx; | | |
rec_z = 1.0 / z; | | |
destlong |= | | |
(u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | |
(((int) (u * rec_z)) & 63)] << 48; | | |
u += dudx; | | |
v += dvdx; | | |
z += dzdx; | | |
rec_z = 1.0 / z; | | rec_z = 1.0 / z; |
destlong |= | | |
(u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | ubyz8 = u * rec_z; |
(((int) (u * rec_z)) & 63)] << 56; | | vbyz8 = v * rec_z; |
u += dudx; | | |
v += dvdx; | | du1 = (ubyz8 - ubyz0) / 8.0; |
z += dzdx; | | dv1 = (vbyz8 - vbyz0) / 8.0; |
| | ubyz = ubyz0; |
| | vbyz = vbyz0; |
| | |
| | for ( ; x >= 8; x -= 8) { |
| | destlong = (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) + |
| | (((int) ubyz) & 63)]; |
| | ubyz += du1; |
| | vbyz += dv1; |
| | |
| | destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) + |
| | (((int) ubyz) & 63)] << 8; |
| | ubyz += du1; |
| | vbyz += dv1; |
| | |
| | destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) + |
| | (((int) ubyz) & 63)] << 16; |
| | ubyz += du1; |
| | vbyz += dv1; |
| | |
| | destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) + |
| | (((int) ubyz) & 63)] << 24; |
| | ubyz += du1; |
| | vbyz += dv1; |
| | |
| | destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) + |
| | (((int) ubyz) & 63)] << 32; |
| | ubyz += du1; |
| | vbyz += dv1; |
| | |
| | destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) + |
| | (((int) ubyz) & 63)] << 40; |
| | ubyz += du1; |
| | vbyz += dv1; |
| | |
| | destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) + |
| | (((int) ubyz) & 63)] << 48; |
| | ubyz += du1; |
| | vbyz += dv1; |
| | |
| | destlong |= (u_int64_t) texmap[(((int) vbyz) & (64 * 63)) + |
| | (((int) ubyz) & 63)] << 56; |
| | |
| | ubyz0 = ubyz8; |
| | vbyz0 = vbyz8; |
| | |
| | u += dudx8; |
| | v += dvdx8; |
| | z += dzdx8; |
| | |
rec_z = 1.0 / z; | | rec_z = 1.0 / z; |
| | |
| | ubyz8 = u * rec_z; |
| | vbyz8 = v * rec_z; |
| | |
| | du1 = (ubyz8 - ubyz0) / 8.0; |
| | dv1 = (vbyz8 - vbyz0) / 8.0; |
| | ubyz = ubyz0; |
| | vbyz = vbyz0; |
| | |
*((u_int64_t *) dest) = destlong; | | *((u_int64_t *) dest) = destlong; |
dest += 8; | | dest += 8; |
x -= 8; | | |
j -= 8; | | |
} | | } |
| | u -= dudx8; |
| | v -= dvdx8; |
| | z -= dzdx8; |
} | | } |
while (x-- > 0) { | | |
*dest++ = | | rec_z = 1.0 / z; |
(u_int64_t) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | for ( ; x > 0; x--) { |
(((int) (u * rec_z)) & 63)]; | | *dest++ = (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; |
u += dudx; | | u += dudx; |
v += dvdx; | | v += dvdx; |
z += dzdx; | | z += dzdx; |
rec_z = 1.0 / z; | | rec_z = 1.0 / z; |
} | | } |
} else { | | } else { // Transparency_on |
x = fx_xright - fx_xleft + 1; | | |
| | |
if (x >= 8) { | | if (x >= 8) { |
if ((j = (size_t) dest & 7) != 0) { | | for ( ; (size_t) dest & 7; --x) { |
j = 8 - j; | | c = (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; |
| | if (c != TRANSPARENCY_COLOR) |
while (j > 0) { | | |
c = | | |
(uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | |
(((int) (u * rec_z)) & 63)]; | | |
if (c != 255) | | |
*dest = c; | | *dest = c; |
dest++; | | dest++; |
u += dudx; | | u += dudx; |
v += dvdx; | | v += dvdx; |
z += dzdx; | | z += dzdx; |
rec_z = 1.0 / z; | | rec_z = 1.0 / z; |
x--; | | |
j--; | | |
} | | |
} | | } |
| | |
j = x; | | ubyz0 = u * rec_z; |
while (j >= 8) { | | vbyz0 = v * rec_z; |
| | |
| | u += dudx8; |
| | v += dvdx8; |
| | z += dzdx8; |
| | rec_z = 1.0 / z; |
| | ubyz8 = u * rec_z; |
| | vbyz8 = v * rec_z; |
| | du1 = (ubyz8 - ubyz0) / 8.0; |
| | dv1 = (vbyz8 - vbyz0) / 8.0; |
| | ubyz = ubyz0; |
| | vbyz = vbyz0; |
| | for ( ; x >= 8; x -= 8) { |
destlong = *((u_int64_t *) dest); | | destlong = *((u_int64_t *) dest); |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | |
(((int) (u * rec_z)) & 63)]; | | c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; |
if (c != 255) { | | if (c != TRANSPARENCY_COLOR) { |
destlong &= ~(u_int64_t)0xFF; | | destlong &= ~((u_int64_t) 0xFF); |
destlong |= (u_int64_t) c; | | destlong |= (u_int64_t) c; |
} | | } |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | if (c != TRANSPARENCY_COLOR) { |
(((int) (u * rec_z)) & 63)]; | | |
if (c != 255) { | | |
destlong &= ~((u_int64_t)0xFF << 8); | | destlong &= ~((u_int64_t)0xFF << 8); |
destlong |= (u_int64_t) c << 8; | | destlong |= (u_int64_t) c << 8; |
} | | } |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | if (c != TRANSPARENCY_COLOR) { |
(((int) (u * rec_z)) & 63)]; | | |
if (c != 255) { | | |
destlong &= ~((u_int64_t)0xFF << 16); | | destlong &= ~((u_int64_t)0xFF << 16); |
destlong |= (u_int64_t) c << 16; | | destlong |= (u_int64_t) c << 16; |
} | | } |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | if (c != TRANSPARENCY_COLOR) { |
(((int) (u * rec_z)) & 63)]; | | |
if (c != 255) { | | |
destlong &= ~((u_int64_t)0xFF << 24); | | destlong &= ~((u_int64_t)0xFF << 24); |
destlong |= (u_int64_t) c << 24; | | destlong |= (u_int64_t) c << 24; |
} | | } |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | if (c != TRANSPARENCY_COLOR) { |
(((int) (u * rec_z)) & 63)]; | | |
if (c != 255) { | | |
destlong &= ~((u_int64_t)0xFF << 32); | | destlong &= ~((u_int64_t)0xFF << 32); |
destlong |= (u_int64_t) c << 32; | | destlong |= (u_int64_t) c << 32; |
} | | } |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | if (c != TRANSPARENCY_COLOR) { |
(((int) (u * rec_z)) & 63)]; | | |
if (c != 255) { | | |
destlong &= ~((u_int64_t)0xFF << 40); | | destlong &= ~((u_int64_t)0xFF << 40); |
destlong |= (u_int64_t) c << 40; | | destlong |= (u_int64_t) c << 40; |
} | | } |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | if (c != TRANSPARENCY_COLOR) { |
(((int) (u * rec_z)) & 63)]; | | |
if (c != 255) { | | |
destlong &= ~((u_int64_t)0xFF << 48); | | destlong &= ~((u_int64_t)0xFF << 48); |
destlong |= (u_int64_t) c << 48; | | destlong |= (u_int64_t) c << 48; |
} | | } |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | if (c != TRANSPARENCY_COLOR) { |
(((int) (u * rec_z)) & 63)]; | | |
if (c != 255) { | | |
destlong &= ~((u_int64_t)0xFF << 56); | | destlong &= ~((u_int64_t)0xFF << 56); |
destlong |= (u_int64_t) c << 56; | | destlong |= (u_int64_t) c << 56; |
} | | } |
u += dudx; | | |
v += dvdx; | | |
z += dzdx; | | |
rec_z = 1.0 / z; | | |
| | |
*((u_int64_t *) dest) = destlong; | | *((u_int64_t *) dest) = destlong; |
dest += 8; | | dest += 8; |
x -= 8; | | |
j -= 8; | | ubyz0 = ubyz8; |
| | vbyz0 = vbyz8; |
| | |
| | u += dudx8; |
| | v += dvdx8; |
| | z += dzdx8; |
| | rec_z = 1.0 / z; |
| | ubyz8 = u * rec_z; |
| | vbyz8 = v * rec_z; |
| | du1 = (ubyz8 - ubyz0) / 8.0; |
| | dv1 = (vbyz8 - vbyz0) / 8.0; |
| | ubyz = ubyz0; |
| | vbyz = vbyz0; |
| | |
} | | } |
| | u -= dudx8; |
| | v -= dvdx8; |
| | z -= dzdx8; |
} | | } |
while (x-- > 0) { | | rec_z = 1.0 / z; |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | for ( ; x > 0; x--) { |
(((int) (u * rec_z)) & 63)]; | | c = (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; |
if (c != 255) | | if (c != TRANSPARENCY_COLOR) |
*dest = c; | | *dest = c; |
dest++; | | dest++; |
u += dudx; | | u += dudx; |
| | |
} | | } |
} | | } |
} | | } |
#else | | |
void c_tmap_scanline_per_nolight() | | void c_tmap_scanline_per_nolight() |
{ | | { |
ubyte *dest; | | ubyte *dest; |
| | |
} | | } |
} | | } |
} | | } |
#endif | | |
| | |
#ifdef FP_TMAP | | // This texture mapper uses floating point extensively and writes 8 pixels at once, so it likely works |
void c_tmap_scanline_per() | | // best on 64 bit RISC processors. |
| | // WARNING: it is not endian clean. For big endian, reverse the shift counts in the unrolled loops. I |
| | // have no means to test that, so I didn't try it. Please tell me if you get this to work on a big |
| | // endian machine. |
| | // If you're using an Alpha, use the Compaq compiler for this file for quite some fps more. |
| | // Unfortunately, it won't compile the whole source, so simply compile everything, change the |
| | // compiler to ccc, remove scanline.o and compile again. |
| | // Please send comments/suggestions to falk.hueffner@student.uni-tuebingen.de. |
| | void c_fp_tmap_scanline_per() |
{ | | { |
ubyte *dest; | | ubyte *dest; |
uint c; | | ubyte c; |
int x, j; | | int x; |
double u, v, z, l, dudx, dvdx, dzdx, dldx, rec_z; | | double u, v, z, dudx, dvdx, dzdx, rec_z; |
u_int64_t destlong; | | double ubyz, vbyz, ubyz0, vbyz0, ubyz8, vbyz8, du1, dv1; |
| | double dudx8, dvdx8, dzdx8; |
| | fix l, dldx; |
| | u_int64_t destlong;//, destmask; |
| | |
| | // give dumb compilers a chance to put these global pointers into registers or at least have |
| | // nicer names :) |
| | ubyte *texmap = pixptr, *fadetable = gr_fade_table; |
| | |
| | #ifdef CYCLECOUNT |
| | unsigned long start, stop, time; |
| | static unsigned long sum, count; |
| | #endif |
| | |
| | // v is pre-scaled by 64 to avoid the multiplication when accessing the 64x64 texture array |
u = f2db(fx_u); | | u = f2db(fx_u); |
v = f2db(fx_v) * 64.0; | | v = f2db(fx_v) * 64.0; |
z = f2db(fx_z); | | z = f2db(fx_z); |
l = f2db(fx_l); | | l = fx_l >> 8; |
| | |
dudx = f2db(fx_du_dx); | | dudx = f2db(fx_du_dx); |
dvdx = f2db(fx_dv_dx) * 64.0; | | dvdx = f2db(fx_dv_dx) * 64.0; |
dzdx = f2db(fx_dz_dx); | | dzdx = f2db(fx_dz_dx); |
dldx = f2db(fx_dl_dx); | | dldx = fx_dl_dx >> 8; |
| | |
| | dudx8 = dudx * 8.0; |
| | dvdx8 = dvdx * 8.0; |
| | dzdx8 = dzdx * 8.0; |
| | |
rec_z = 1.0 / z; // gcc 2.95.2 is won't do this optimization itself | | rec_z = 1.0 / z; // multiplication is often faster than division |
| | |
dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y)); | | dest = (ubyte *) (write_buffer + fx_xleft + (bytes_per_row * fx_y)); |
x = fx_xright - fx_xleft + 1; | | x = fx_xright - fx_xleft + 1; |
| | |
if (!Transparency_on) { | | if (!Transparency_on) { |
if (x >= 8) { | | if (x >= 8) { |
if ((j = (size_t) dest & 7) != 0) { | | // draw till we are on a 8-byte aligned address |
j = 8 - j; | | for ( ; (size_t) dest & 7; --x) { |
| | *dest++ = fadetable[(l & 0x7f00) + |
while (j > 0) { | | (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + |
*dest++ = | | |
gr_fade_table[((int) fabs(l)) * 256 + | | |
(uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | |
(((int) (u * rec_z)) & 63)]]; | | (((int) (u * rec_z)) & 63)]]; |
l += dldx; | | l += dldx; |
u += dudx; | | u += dudx; |
v += dvdx; | | v += dvdx; |
z += dzdx; | | z += dzdx; |
rec_z = 1.0 / z; | | rec_z = 1.0 / z; |
x--; | | |
j--; | | |
} | | |
} | | } |
| | |
j = x; | | // Now draw 8 pixels at once, interpolating 1/z linearly. Artifacts of the |
while (j >= 8) { | | // interpolation aren't really noticeable; many games even interpolate over 16 |
| | // pixels. |
| | |
| | // We do these calculations once before and then at the end of the loop instead |
| | // of simply at the start of the loop, because he scheduler can then interleave |
| | // them with the texture accesses. Silly, but gains a few fps. |
| | ubyz0 = u * rec_z; |
| | vbyz0 = v * rec_z; |
| | |
| | u += dudx8; |
| | v += dvdx8; |
| | z += dzdx8; |
| | |
| | rec_z = 1.0 / z; |
| | |
| | ubyz8 = u * rec_z; |
| | vbyz8 = v * rec_z; |
| | |
| | du1 = (ubyz8 - ubyz0) / 8.0; |
| | dv1 = (vbyz8 - vbyz0) / 8.0; |
| | ubyz = ubyz0; |
| | vbyz = vbyz0; |
| | |
| | // This loop is the "hot spot" of the game; it takes about 70% of the time. The |
| | // major weak point are the many integer casts, which have to go through memory |
| | // on processors < 21264. But when using integers, one needs to compensate for |
| | // inexactness, and the code ends up being not really faster. |
| | for ( ; x >= 8; x -= 8) { |
| | #ifdef CYCLECOUNT |
| | start = virtcc(); |
| | #endif |
destlong = | | destlong = |
(u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + | | (u_int64_t) fadetable[(l & 0x7f00) + |
(uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | (uint) texmap[(((int) vbyz) & (64 * 63)) + |
(((int) (u * rec_z)) & 63)]]; | | (((int) ubyz) & 63)]]; |
l += dldx; | | l += dldx; |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | |
destlong |= | | destlong |= |
(u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + | | (u_int64_t) fadetable[(l & 0x7f00) + |
(uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | (uint) texmap[(((int) vbyz) & (64 * 63)) + |
(((int) (u * rec_z)) & 63)]] << 8; | | (((int) ubyz) & 63)]] << 8; |
l += dldx; | | l += dldx; |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | |
destlong |= | | destlong |= |
(u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + | | (u_int64_t) fadetable[(l & 0x7f00) + |
(uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | (uint) texmap[(((int) vbyz) & (64 * 63)) + |
(((int) (u * rec_z)) & 63)]] << 16; | | (((int) ubyz) & 63)]] << 16; |
l += dldx; | | l += dldx; |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | |
destlong |= | | destlong |= |
(u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + | | (u_int64_t) fadetable[(l & 0x7f00) + |
(uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | (uint) texmap[(((int) vbyz) & (64 * 63)) + |
(((int) (u * rec_z)) & 63)]] << 24; | | (((int) ubyz) & 63)]] << 24; |
l += dldx; | | l += dldx; |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | |
destlong |= | | destlong |= |
(u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + | | (u_int64_t) fadetable[(l & 0x7f00) + |
(uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | (uint) texmap[(((int) vbyz) & (64 * 63)) + |
(((int) (u * rec_z)) & 63)]] << 32; | | (((int) ubyz) & 63)]] << 32; |
l += dldx; | | l += dldx; |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | |
destlong |= | | destlong |= |
(u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + | | (u_int64_t) fadetable[(l & 0x7f00) + |
(uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | (uint) texmap[(((int) vbyz) & (64 * 63)) + |
(((int) (u * rec_z)) & 63)]] << 40; | | (((int) ubyz) & 63)]] << 40; |
l += dldx; | | l += dldx; |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | |
destlong |= | | destlong |= |
(u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + | | (u_int64_t) fadetable[(l & 0x7f00) + |
(uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | (uint) texmap[(((int) vbyz) & (64 * 63)) + |
(((int) (u * rec_z)) & 63)]] << 48; | | (((int) ubyz) & 63)]] << 48; |
l += dldx; | | l += dldx; |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | |
destlong |= | | destlong |= |
(u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + | | (u_int64_t) fadetable[(l & 0x7f00) + |
(uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + | | (uint) texmap[(((int) vbyz) & (64 * 63)) + |
(((int) (u * rec_z)) & 63)]] << 56; | | (((int) ubyz) & 63)]] << 56; |
l += dldx; | | l += dldx; |
u += dudx; | | |
v += dvdx; | | ubyz0 = ubyz8; |
z += dzdx; | | vbyz0 = vbyz8; |
| | |
| | u += dudx8; |
| | v += dvdx8; |
| | z += dzdx8; |
| | |
rec_z = 1.0 / z; | | rec_z = 1.0 / z; |
| | |
| | ubyz8 = u * rec_z; |
| | vbyz8 = v * rec_z; |
| | |
| | du1 = (ubyz8 - ubyz0) / 8.0; |
| | dv1 = (vbyz8 - vbyz0) / 8.0; |
| | ubyz = ubyz0; |
| | vbyz = vbyz0; |
| | |
*((u_int64_t *) dest) = destlong; | | *((u_int64_t *) dest) = destlong; |
dest += 8; | | dest += 8; |
x -= 8; | | #ifdef CYCLECOUNT |
j -= 8; | | stop = virtcc(); |
| | #endif |
} | | } |
| | // compensate for being calculated once too often |
| | u -= dudx8; |
| | v -= dvdx8; |
| | z -= dzdx8; |
| | #ifdef CYCLECOUNT |
| | time = stop - start; |
| | if (time > 10 && time < 900) { |
| | sum += time; |
| | ++count; |
| | if (count % 10000 == 1) |
| | printf("%f %d\n", (double) sum / (double) count, time); |
} | | } |
while (x-- > 0) { | | #endif |
| | } |
| | |
| | // Draw the last few (<8) pixels. |
| | rec_z = 1.0 / z; |
| | for ( ; x > 0; x--) { |
*dest++ = | | *dest++ = |
gr_fade_table[((int) fabs(l)) * 256 + | | fadetable[(l & 0x7f00) + |
(uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]]; | | (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]]; |
l += dldx; | | l += dldx; |
u += dudx; | | u += dudx; |
v += dvdx; | | v += dvdx; |
z += dzdx; | | z += dzdx; |
rec_z = 1.0 / z; | | rec_z = 1.0 / z; |
} | | } |
} else { | | } else { // Transparency_on |
if (x >= 8) { | | if (x >= 8) { |
if ((j = (size_t) dest & 7) != 0) { | | for ( ; (size_t) dest & 7; --x) { |
j = 8 - j; | | c = (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; |
| | if (c != TRANSPARENCY_COLOR) |
while (j > 0) { | | *dest = fadetable[(l & 0x7f00) + c]; |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; | | |
if (c != 255) | | |
*dest = gr_fade_table[((int) fabs(l)) * 256 + c]; | | |
dest++; | | dest++; |
l += dldx; | | l += dldx; |
u += dudx; | | u += dudx; |
v += dvdx; | | v += dvdx; |
z += dzdx; | | z += dzdx; |
rec_z = 1.0 / z; | | rec_z = 1.0 / z; |
x--; | | |
j--; | | |
} | | |
} | | } |
| | |
j = x; | | ubyz0 = u * rec_z; |
while (j >= 8) { | | vbyz0 = v * rec_z; |
| | |
| | u += dudx8; |
| | v += dvdx8; |
| | z += dzdx8; |
| | rec_z = 1.0 / z; |
| | ubyz8 = u * rec_z; |
| | vbyz8 = v * rec_z; |
| | du1 = (ubyz8 - ubyz0) / 8.0; |
| | dv1 = (vbyz8 - vbyz0) / 8.0; |
| | ubyz = ubyz0; |
| | vbyz = vbyz0; |
| | for ( ; x >= 8; x -= 8) { |
destlong = *((u_int64_t *) dest); | | destlong = *((u_int64_t *) dest); |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; | | |
if (c != 255) { | | c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; |
destlong &= ~(u_int64_t)0xFF; | | if (c != TRANSPARENCY_COLOR) { |
destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c]; | | destlong &= ~((u_int64_t) 0xFF); |
| | destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c]; |
} | | } |
l += dldx; | | l += dldx; |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; | | if (c != TRANSPARENCY_COLOR) { |
if (c != 255) { | | |
destlong &= ~((u_int64_t)0xFF << 8); | | destlong &= ~((u_int64_t)0xFF << 8); |
destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 8; | | destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 8; |
} | | } |
l += dldx; | | l += dldx; |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; | | if (c != TRANSPARENCY_COLOR) { |
if (c != 255) { | | |
destlong &= ~((u_int64_t)0xFF << 16); | | destlong &= ~((u_int64_t)0xFF << 16); |
destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 16; | | destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 16; |
} | | } |
l += dldx; | | l += dldx; |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; | | if (c != TRANSPARENCY_COLOR) { |
if (c != 255) { | | |
destlong &= ~((u_int64_t)0xFF << 24); | | destlong &= ~((u_int64_t)0xFF << 24); |
destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 24; | | destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 24; |
} | | } |
l += dldx; | | l += dldx; |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; | | if (c != TRANSPARENCY_COLOR) { |
if (c != 255) { | | |
destlong &= ~((u_int64_t)0xFF << 32); | | destlong &= ~((u_int64_t)0xFF << 32); |
destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 32; | | destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 32; |
} | | } |
l += dldx; | | l += dldx; |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; | | if (c != TRANSPARENCY_COLOR) { |
if (c != 255) { | | |
destlong &= ~((u_int64_t)0xFF << 40); | | destlong &= ~((u_int64_t)0xFF << 40); |
destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 40; | | destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 40; |
} | | } |
l += dldx; | | l += dldx; |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; | | if (c != TRANSPARENCY_COLOR) { |
if (c != 255) { | | |
destlong &= ~((u_int64_t)0xFF << 48); | | destlong &= ~((u_int64_t)0xFF << 48); |
destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 48; | | destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 48; |
} | | } |
l += dldx; | | l += dldx; |
u += dudx; | | ubyz += du1; |
v += dvdx; | | vbyz += dv1; |
z += dzdx; | | |
rec_z = 1.0 / z; | | c = texmap[(((int) vbyz) & (64 * 63)) + (((int) ubyz) & 63)]; |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; | | if (c != TRANSPARENCY_COLOR) { |
if (c != 255) { | | |
destlong &= ~((u_int64_t)0xFF << 56); | | destlong &= ~((u_int64_t)0xFF << 56); |
destlong |= (u_int64_t) gr_fade_table[((int) fabs(l)) * 256 + c] << 56; | | destlong |= (u_int64_t) fadetable[(l & 0x7f00) + c] << 56; |
} | | } |
l += dldx; | | l += dldx; |
u += dudx; | | |
v += dvdx; | | |
z += dzdx; | | |
rec_z = 1.0 / z; | | |
| | |
*((u_int64_t *) dest) = destlong; | | *((u_int64_t *) dest) = destlong; |
dest += 8; | | dest += 8; |
x -= 8; | | |
j -= 8; | | ubyz0 = ubyz8; |
| | vbyz0 = vbyz8; |
| | |
| | u += dudx8; |
| | v += dvdx8; |
| | z += dzdx8; |
| | rec_z = 1.0 / z; |
| | ubyz8 = u * rec_z; |
| | vbyz8 = v * rec_z; |
| | du1 = (ubyz8 - ubyz0) / 8.0; |
| | dv1 = (vbyz8 - vbyz0) / 8.0; |
| | ubyz = ubyz0; |
| | vbyz = vbyz0; |
| | |
} | | } |
| | u -= dudx8; |
| | v -= dvdx8; |
| | z -= dzdx8; |
} | | } |
while (x-- > 0) { | | rec_z = 1.0 / z; |
c = (uint) pixptr[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; | | for ( ; x > 0; x--) { |
if (c != 255) | | c = (uint) texmap[(((int) (v * rec_z)) & (64 * 63)) + (((int) (u * rec_z)) & 63)]; |
*dest = gr_fade_table[((int) fabs(l)) * 256 + c]; | | if (c != TRANSPARENCY_COLOR) |
| | *dest = fadetable[(l & 0x7f00) + c]; |
dest++; | | dest++; |
l += dldx; | | l += dldx; |
u += dudx; | | u += dudx; |
| | |
} | | } |
} | | } |
| | |
#elif 1 | | #if 1 |
// note the unrolling loop is broken. It is never called, and uses big endian. -- FH | | // note the unrolling loop is broken. It is never called, and uses big endian. -- FH |
void c_tmap_scanline_per() | | void c_tmap_scanline_per() |
{ | | { |
| | |
} | | } |
} | | } |
} | | } |
| | #endif |
| | |
| | void (*cur_tmap_scanline_per)(void); |
| | void (*cur_tmap_scanline_per_nolight)(void); |
| | void (*cur_tmap_scanline_lin)(void); |
| | void (*cur_tmap_scanline_lin_nolight)(void); |
| | void (*cur_tmap_scanline_flat)(void); |
| | void (*cur_tmap_scanline_shaded)(void); |
| | |
| | //runtime selection of optimized tmappers. 12/07/99 Matthew Mueller |
| | //the reason I did it this way rather than having a *tmap_funcs that then points to a c_tmap or fp_tmap struct thats already filled in, is to avoid a second pointer dereference. |
| | void select_tmap(char *type){ |
| | if (!type){ |
| | #ifndef NO_ASM |
| | select_tmap("i386"); |
| | #else |
| | select_tmap("c"); |
| | #endif |
| | return; |
| | } |
| | #ifndef NO_ASM |
| | if (stricmp(type,"i386")==0){ |
| | cur_tmap_scanline_per=asm_tmap_scanline_per; |
| | cur_tmap_scanline_per_nolight=asm_tmap_scanline_per; |
| | cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted; |
| | cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin; |
| | cur_tmap_scanline_flat=asm_tmap_scanline_flat; |
| | cur_tmap_scanline_shaded=asm_tmap_scanline_shaded; |
| | } |
| | else if (stricmp(type,"pent")==0){ |
| | cur_tmap_scanline_per=asm_pent_tmap_scanline_per; |
| | cur_tmap_scanline_per_nolight=asm_pent_tmap_scanline_per; |
| | cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted; |
| | cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin; |
| | cur_tmap_scanline_flat=asm_tmap_scanline_flat; |
| | cur_tmap_scanline_shaded=asm_tmap_scanline_shaded; |
| | } |
| | else if (stricmp(type,"ppro")==0){ |
| | cur_tmap_scanline_per=asm_ppro_tmap_scanline_per; |
| | cur_tmap_scanline_per_nolight=asm_ppro_tmap_scanline_per; |
| | cur_tmap_scanline_lin=asm_tmap_scanline_lin_lighted; |
| | cur_tmap_scanline_lin_nolight=asm_tmap_scanline_lin; |
| | cur_tmap_scanline_flat=asm_tmap_scanline_flat; |
| | cur_tmap_scanline_shaded=asm_tmap_scanline_shaded; |
| | } |
| | else |
#endif | | #endif |
| | if (stricmp(type,"fp")==0){ |
| | cur_tmap_scanline_per=c_fp_tmap_scanline_per; |
| | cur_tmap_scanline_per_nolight=c_fp_tmap_scanline_per_nolight; |
| | cur_tmap_scanline_lin=c_tmap_scanline_lin; |
| | cur_tmap_scanline_lin_nolight=c_tmap_scanline_lin_nolight; |
| | cur_tmap_scanline_flat=c_tmap_scanline_flat; |
| | cur_tmap_scanline_shaded=c_tmap_scanline_shaded; |
| | } |
| | else { |
| | if (stricmp(type,"c")!=0) |
| | printf("unknown tmap requested, using c tmap\n"); |
| | cur_tmap_scanline_per=c_tmap_scanline_per; |
| | cur_tmap_scanline_per_nolight=c_tmap_scanline_per_nolight; |
| | cur_tmap_scanline_lin=c_tmap_scanline_lin; |
| | cur_tmap_scanline_lin_nolight=c_tmap_scanline_lin_nolight; |
| | cur_tmap_scanline_flat=c_tmap_scanline_flat; |
| | cur_tmap_scanline_shaded=c_tmap_scanline_shaded; |
| | } |
| | } |
| | |