(view as text)
diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp
index 0de9034..2bec618 100644
--- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp
+++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp
@@ -451,11 +451,15 @@ void ProgramShaderCache::CreateHeader ( void )
// Precision defines for GLSLES3
"%s\n"
+ "%s\n"
// Silly differences
"#define float2 vec2\n"
"#define float3 vec3\n"
"#define float4 vec4\n"
+ "#define uint2 uvec2\n"
+ "#define uint3 uvec3\n"
+ "#define uint4 uvec4\n"
"#define int2 ivec2\n"
"#define int3 ivec3\n"
"#define int4 ivec4\n"
@@ -474,6 +478,7 @@ void ProgramShaderCache::CreateHeader ( void )
, g_ActiveConfig.backend_info.bSupportShadingLanguage420pack ? "#extension GL_ARB_shading_language_420pack : enable" : ""
, v==GLSLES3 ? "precision highp float;" : ""
+ , v==GLSLES3 ? "precision highp int;" : ""
, DriverDetails::HasBug(DriverDetails::BUG_BROKENTEXTURESIZE) ? "#define textureSize(x, y) ivec2(1, 1)" : ""
, DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "#define centroid" : ""
diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h
index 8db3eb7..2f1b333 100644
--- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h
+++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h
@@ -38,9 +38,6 @@ public:
};
-const int NUM_UNIFORMS = 19;
-extern const char *UniformNames[NUM_UNIFORMS];
-
struct SHADER
{
SHADER() : glprogid(0) { }
@@ -52,8 +49,6 @@ struct SHADER
GLuint glprogid; // opengl program id
std::string strvprog, strpprog;
- GLint UniformLocations[NUM_UNIFORMS];
- u32 UniformSize[NUM_UNIFORMS];
void SetProgramVariables();
void SetProgramBindings();
diff --git a/Source/Core/VideoBackends/Software/Tev.cpp b/Source/Core/VideoBackends/Software/Tev.cpp
index 5ebdcd7..0b0baa6 100644
--- a/Source/Core/VideoBackends/Software/Tev.cpp
+++ b/Source/Core/VideoBackends/Software/Tev.cpp
@@ -470,7 +470,7 @@ void Tev::Indirect(unsigned int stageNum, s32 s, s32 t)
case ITBA_OFF:
AlphaBump = 0;
break;
- case ITBA_S:
+ case ITBA_S:
AlphaBump = indmap[TextureSampler::ALP_SMP];
break;
case ITBA_T:
@@ -520,9 +520,9 @@ void Tev::Indirect(unsigned int stageNum, s32 s, s32 t)
return;
}
- s64 indtevtrans[2] = { 0,0 };
+ s32 indtevtrans[2] = { 0,0 };
- // matrix multiply
+ // matrix multiply - results might overflow, but we don't care since we only use the lower 24 bits of the result.
int indmtxid = indirect.mid & 3;
if (indmtxid)
{
@@ -536,19 +536,21 @@ void Tev::Indirect(unsigned int stageNum, s32 s, s32 t)
switch (indirect.mid & 12)
{
case 0:
- shift = 3 + (17 - scale);
- indtevtrans[0] = indmtx.col0.ma * indcoord[0] + indmtx.col1.mc * indcoord[1] + indmtx.col2.me * indcoord[2];
- indtevtrans[1] = indmtx.col0.mb * indcoord[0] + indmtx.col1.md * indcoord[1] + indmtx.col2.mf * indcoord[2];
+ // matrix values are S0.10, output format is S17.7, so divide by 8
+ shift = (17 - scale);
+ indtevtrans[0] = (indmtx.col0.ma * indcoord[0] + indmtx.col1.mc * indcoord[1] + indmtx.col2.me * indcoord[2]) >> 3;
+ indtevtrans[1] = (indmtx.col0.mb * indcoord[0] + indmtx.col1.md * indcoord[1] + indmtx.col2.mf * indcoord[2]) >> 3;
break;
case 4: // s matrix
- shift = 8 + (17 - scale);
- indtevtrans[0] = s * indcoord[0];
- indtevtrans[1] = t * indcoord[0];
+ // s is S17.7, matrix elements are divided by 256, output is S17.7, so divide by 256. - TODO: Maybe, since s is actually stored as S24, we should divide by 256*64?
+ shift = (17 - scale);
+ indtevtrans[0] = s * indcoord[0] / 256;
+ indtevtrans[1] = t * indcoord[0] / 256;
break;
case 8: // t matrix
- shift = 8 + (17 - scale);
- indtevtrans[0] = s * indcoord[1];
- indtevtrans[1] = t * indcoord[1];
+ shift = (17 - scale);
+ indtevtrans[0] = s * indcoord[1] / 256;
+ indtevtrans[1] = t * indcoord[1] / 256;
break;
default:
return;
diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h
index 35f1e79..88f5186 100644
--- a/Source/Core/VideoCommon/BPMemory.h
+++ b/Source/Core/VideoCommon/BPMemory.h
@@ -411,9 +411,6 @@ union TEXSCALE
u32 rid : 8;
};
u32 hex;
-
- float getScaleS(int i){return 1.0f/(float)(1<<(i?ss1:ss0));}
- float getScaleT(int i){return 1.0f/(float)(1<<(i?ts1:ts0));}
};
union RAS1_IREF
diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h
index bca1c5c..8563096 100644
--- a/Source/Core/VideoCommon/ConstantManager.h
+++ b/Source/Core/VideoCommon/ConstantManager.h
@@ -11,26 +11,30 @@ typedef s32 int4[4];
struct PixelShaderConstants
{
- float4 colors[4];
- float4 kcolors[4];
- float4 alpha;
+ int4 colors[4];
+ int4 kcolors[4];
+ int4 alpha;
float4 texdims[8];
- float4 zbias[2];
- float4 indtexscale[2];
- float4 indtexmtx[6];
- float4 fog[3];
+ int4 zbias[2];
+ int4 indtexscale[2];
+ int4 indtexmtx[6];
+ int4 fogcolor;
+ int4 fogi[1];
+ float4 fogf[2];
// For pixel lighting
- float4 plights[40];
- float4 pmaterials[4];
+ int4 plight_colors[8];
+ float4 plights[32];
+ int4 pmaterials[4];
};
struct VertexShaderConstants
{
float4 posnormalmatrix[6];
float4 projection[4];
- float4 materials[4];
- float4 lights[40];
+ int4 materials[4];
+ int4 light_colors[8]; // 8 lights
+ float4 lights[32]; // 8 lights * 4 parameters
float4 texmatrices[24];
float4 transformmatrices[64];
float4 normalmatrices[32];
diff --git a/Source/Core/VideoCommon/LightingShaderGen.h b/Source/Core/VideoCommon/LightingShaderGen.h
index 9379f65..c48a9dc 100644
--- a/Source/Core/VideoCommon/LightingShaderGen.h
+++ b/Source/Core/VideoCommon/LightingShaderGen.h
@@ -9,19 +9,19 @@
#include "VideoCommon/XFMemory.h"
-#define LIGHT_COL "%s[5*%d].%s"
-#define LIGHT_COL_PARAMS(lightsName, index, swizzle) (lightsName), (index), (swizzle)
+#define LIGHT_COL "%s[%d].%s"
+#define LIGHT_COL_PARAMS(lightsColName, index, swizzle) (lightsColName), (index), (swizzle)
-#define LIGHT_COSATT "%s[5*%d+1]"
+#define LIGHT_COSATT "%s[4*%d]"
#define LIGHT_COSATT_PARAMS(lightsName, index) (lightsName), (index)
-#define LIGHT_DISTATT "%s[5*%d+2]"
+#define LIGHT_DISTATT "%s[4*%d+1]"
#define LIGHT_DISTATT_PARAMS(lightsName, index) (lightsName), (index)
-#define LIGHT_POS "%s[5*%d+3]"
+#define LIGHT_POS "%s[4*%d+2]"
#define LIGHT_POS_PARAMS(lightsName, index) (lightsName), (index)
-#define LIGHT_DIR "%s[5*%d+4]"
+#define LIGHT_DIR "%s[4*%d+3]"
#define LIGHT_DIR_PARAMS(lightsName, index) (lightsName), (index)
/**
@@ -39,14 +39,11 @@ struct LightingUidData
template<class T>
-static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index, const char* lightsName, int coloralpha)
+static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index, const char* lightsColName, const char* lightsName, int coloralpha)
{
const LitChannel& chan = (litchan_index > 1) ? xfregs.alpha[litchan_index-2] : xfregs.color[litchan_index];
- const char* swizzle = "xyzw";
- if (coloralpha == 1)
- swizzle = "xyz";
- else if (coloralpha == 2)
- swizzle = "w";
+ const char* swizzle = (coloralpha == 1) ? "xyz" : (coloralpha == 2) ? "w" : "xyzw";
+ const char* swizzle_components = (coloralpha == 1) ? "3" : (coloralpha == 2) ? "" : "4";
uid_data.attnfunc |= chan.attnfunc << (2*litchan_index);
uid_data.diffusefunc |= chan.diffusefunc << (2*litchan_index);
@@ -56,13 +53,14 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index,
switch (chan.diffusefunc)
{
case LIGHTDIF_NONE:
- object.Write("lacc.%s += " LIGHT_COL";\n", swizzle, LIGHT_COL_PARAMS(lightsName, index, swizzle));
+ object.Write("lacc.%s += " LIGHT_COL";\n", swizzle, LIGHT_COL_PARAMS(lightsColName, index, swizzle));
break;
case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP:
object.Write("ldir = normalize(" LIGHT_POS".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(lightsName, index));
- object.Write("lacc.%s += %sdot(ldir, _norm0)) * " LIGHT_COL";\n",
- swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," :"(", LIGHT_COL_PARAMS(lightsName, index, swizzle));
+ object.Write("lacc.%s += int%s(round(%sdot(ldir, _norm0)) * float%s(" LIGHT_COL")));\n",
+ swizzle, swizzle_components, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," :"(",
+ swizzle_components, LIGHT_COL_PARAMS(lightsColName, index, swizzle));
break;
default: _assert_(0);
}
@@ -94,14 +92,16 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index,
switch (chan.diffusefunc)
{
case LIGHTDIF_NONE:
- object.Write("lacc.%s += attn * " LIGHT_COL";\n", swizzle, LIGHT_COL_PARAMS(lightsName, index, swizzle));
+ object.Write("lacc.%s += int%s(round(attn * float%s(" LIGHT_COL")));\n",
+ swizzle, swizzle_components,
+ swizzle_components, LIGHT_COL_PARAMS(lightsColName, index, swizzle));
break;
case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP:
- object.Write("lacc.%s += attn * %sdot(ldir, _norm0)) * " LIGHT_COL";\n",
- swizzle,
+ object.Write("lacc.%s += int%s(round(attn * %sdot(ldir, _norm0)) * float%s(" LIGHT_COL")));\n",
+ swizzle, swizzle_components,
chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," :"(",
- LIGHT_COL_PARAMS(lightsName, index, swizzle));
+ swizzle_components, LIGHT_COL_PARAMS(lightsColName, index, swizzle));
break;
default: _assert_(0);
}
@@ -115,7 +115,7 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index,
// inColorName is color in vs and colors_ in ps
// dest is o.colors_ in vs and colors_ in ps
template<class T>
-static void GenerateLightingShader(T& object, LightingUidData& uid_data, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest)
+static void GenerateLightingShader(T& object, LightingUidData& uid_data, int components, const char* materialsName, const char* lightsColName, const char* lightsName, const char* inColorName, const char* dest)
{
for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++)
{
@@ -128,15 +128,15 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
if (color.matsource) // from vertex
{
if (components & (VB_HAS_COL0 << j))
- object.Write("mat = %s%d;\n", inColorName, j);
+ object.Write("int4 mat = int4(round(%s%d * 255.0f));\n", inColorName, j);
else if (components & VB_HAS_COL0)
- object.Write("mat = %s0;\n", inColorName);
+ object.Write("int4 mat = int4(round(%s0 * 255.0f));\n", inColorName);
else
- object.Write("mat = float4(1.0, 1.0, 1.0, 1.0);\n");
+ object.Write("int4 mat = int4(255, 255, 255, 255);\n");
}
else // from color
{
- object.Write("mat = %s[%d];\n", materialsName, j+2);
+ object.Write("int4 mat = %s[%d];\n", materialsName, j+2);
}
uid_data.enablelighting |= xfregs.color[j].enablelighting << j;
@@ -146,14 +146,14 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
if (color.ambsource) // from vertex
{
if (components & (VB_HAS_COL0<<j) )
- object.Write("lacc = %s%d;\n", inColorName, j);
+ object.Write("lacc = int4(round(%s%d * 255.0f));\n", inColorName, j);
else if (components & VB_HAS_COL0 )
- object.Write("lacc = %s0;\n", inColorName);
+ object.Write("lacc = int4(round(%s0 * 255.0f));\n", inColorName);
else
// TODO: this isn't verified. Here we want to read the ambient from the vertex,
// but the vertex itself has no color. So we don't know which value to read.
// Returing 1.0 is the same as disabled lightning, so this could be fine
- object.Write("lacc = float4(1.0, 1.0, 1.0, 1.0);\n");
+ object.Write("lacc = int4(255, 255, 255, 255);\n");
}
else // from color
{
@@ -162,7 +162,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
}
else
{
- object.Write("lacc = float4(1.0, 1.0, 1.0, 1.0);\n");
+ object.Write("lacc = int4(255, 255, 255, 255);\n");
}
// check if alpha is different
@@ -172,10 +172,10 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
if (alpha.matsource) // from vertex
{
if (components & (VB_HAS_COL0<<j))
- object.Write("mat.w = %s%d.w;\n", inColorName, j);
+ object.Write("mat.w = int(round(%s%d.w * 255.0f));\n", inColorName, j);
else if (components & VB_HAS_COL0)
- object.Write("mat.w = %s0.w;\n", inColorName);
- else object.Write("mat.w = 1.0;\n");
+ object.Write("mat.w = int(round(%s0.w * 255.0f));\n", inColorName);
+ else object.Write("mat.w = 255;\n");
}
else // from color
{
@@ -190,12 +190,12 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
if (alpha.ambsource) // from vertex
{
if (components & (VB_HAS_COL0<<j) )
- object.Write("lacc.w = %s%d.w;\n", inColorName, j);
+ object.Write("lacc.w = int(round(%s%d.w * 255.0f));\n", inColorName, j);
else if (components & VB_HAS_COL0 )
- object.Write("lacc.w = %s0.w;\n", inColorName);
+ object.Write("lacc.w = int(round(%s0.w * 255.0f));\n", inColorName);
else
// TODO: The same for alpha: We want to read from vertex, but the vertex has no color
- object.Write("lacc.w = 1.0;\n");
+ object.Write("lacc.w = 255;\n");
}
else // from color
{
@@ -204,7 +204,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
}
else
{
- object.Write("lacc.w = 1.0;\n");
+ object.Write("lacc.w = 255;\n");
}
if(color.enablelighting && alpha.enablelighting)
@@ -226,7 +226,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
{
if (mask & (1<<i))
{
- GenerateLightShader<T>(object, uid_data, i, j, lightsName, 3);
+ GenerateLightShader<T>(object, uid_data, i, j, lightsColName, lightsName, 3);
}
}
}
@@ -236,9 +236,9 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
for (int i = 0; i < 8; ++i)
{
if (!(mask&(1<<i)) && (color.GetFullLightMask() & (1<<i)))
- GenerateLightShader<T>(object, uid_data, i, j, lightsName, 1);
+ GenerateLightShader<T>(object, uid_data, i, j, lightsColName, lightsName, 1);
if (!(mask&(1<<i)) && (alpha.GetFullLightMask() & (1<<i)))
- GenerateLightShader<T>(object, uid_data, i, j+2, lightsName, 2);
+ GenerateLightShader<T>(object, uid_data, i, j+2, lightsColName, lightsName, 2);
}
}
else if (color.enablelighting || alpha.enablelighting)
@@ -252,10 +252,10 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
for (int i = 0; i < 8; ++i)
{
if (workingchannel.GetFullLightMask() & (1<<i))
- GenerateLightShader<T>(object, uid_data, i, lit_index, lightsName, coloralpha);
+ GenerateLightShader<T>(object, uid_data, i, lit_index, lightsColName, lightsName, coloralpha);
}
}
- object.Write("%s%d = mat * clamp(lacc, 0.0, 1.0);\n", dest, j);
+ object.Write("%s%d = float4(mat * clamp(lacc, 0, 255) / 255) / 255.0f;\n", dest, j);
object.Write("}\n");
}
}
diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp
index e51294e..6667698 100644
--- a/Source/Core/VideoCommon/PixelShaderGen.cpp
+++ b/Source/Core/VideoCommon/PixelShaderGen.cpp
@@ -18,25 +18,16 @@
#include "VideoCommon/XFMemory.h" // for texture projection mode
-// old tev->pixelshader notes
-//
-// color for this stage (alpha, color) is given by bpmem.tevorders[0].colorchan0
-// konstant for this stage (alpha, color) is given by bpmem.tevksel
-// inputs are given by bpmem.combiners[0].colorC.a/b/c/d << could be current channel color
-// according to GXTevColorArg table above
-// output is given by .outreg
-// tevtemp is set according to swapmodetables and
-
-static const char *tevKSelTableC[] = // KCSEL
+static const char *tevKSelTableC[] =
{
- "1.0,1.0,1.0", // 1 = 0x00
- "0.875,0.875,0.875", // 7_8 = 0x01
- "0.75,0.75,0.75", // 3_4 = 0x02
- "0.625,0.625,0.625", // 5_8 = 0x03
- "0.5,0.5,0.5", // 1_2 = 0x04
- "0.375,0.375,0.375", // 3_8 = 0x05
- "0.25,0.25,0.25", // 1_4 = 0x06
- "0.125,0.125,0.125", // 1_8 = 0x07
+ "255,255,255", // 1 = 0x00
+ "223,223,223", // 7_8 = 0x01
+ "191,191,191", // 3_4 = 0x02
+ "159,159,159", // 5_8 = 0x03
+ "127,127,127", // 1_2 = 0x04
+ "95,95,95", // 3_8 = 0x05
+ "63,63,63", // 1_4 = 0x06
+ "31,31,31", // 1_8 = 0x07
"ERROR1", // 0x08
"ERROR2", // 0x09
"ERROR3", // 0x0a
@@ -63,16 +54,16 @@ static const char *tevKSelTableC[] = // KCSEL
I_KCOLORS"[3].aaa", // K3_A = 0x1F
};
-static const char *tevKSelTableA[] = // KASEL
+static const char *tevKSelTableA[] =
{
- "1.0", // 1 = 0x00
- "0.875",// 7_8 = 0x01
- "0.75", // 3_4 = 0x02
- "0.625",// 5_8 = 0x03
- "0.5", // 1_2 = 0x04
- "0.375",// 3_8 = 0x05
- "0.25", // 1_4 = 0x06
- "0.125",// 1_8 = 0x07
+ "255", // 1 = 0x00
+ "223", // 7_8 = 0x01
+ "191", // 3_4 = 0x02
+ "159", // 5_8 = 0x03
+ "127", // 1_2 = 0x04
+ "95", // 3_8 = 0x05
+ "63", // 1_4 = 0x06
+ "31", // 1_8 = 0x07
"ERROR5", // 0x08
"ERROR6", // 0x09
"ERROR7", // 0x0a
@@ -99,122 +90,77 @@ static const char *tevKSelTableA[] = // KASEL
I_KCOLORS"[3].a", // K3_A = 0x1F
};
-static const char *tevScaleTable[] = // CS
+static const char *tevScaleTable[] =
{
- "1.0", // SCALE_1
- "2.0", // SCALE_2
- "4.0", // SCALE_4
- "0.5", // DIVIDE_2
+ "", // SCALE_1
+ ">> 1", // SCALE_2
+ ">> 2", // SCALE_4
+ "<< 1", // DIVIDE_2
};
-static const char *tevBiasTable[] = // TB
+static const char *tevBiasTable[] =
{
"", // ZERO,
- "+0.5", // ADDHALF,
- "-0.5", // SUBHALF,
+ "+ 128", // ADDHALF,
+ "- 128", // SUBHALF,
"",
};
-static const char *tevOpTable[] = { // TEV
+static const char *tevOpTable[] = {
"+", // TEVOP_ADD = 0,
"-", // TEVOP_SUB = 1,
};
-static const char *tevCInputTable[] = // CC
+static const char *tevCInputTable[] =
{
- "(prev.rgb)", // CPREV,
- "(prev.aaa)", // APREV,
- "(c0.rgb)", // C0,
- "(c0.aaa)", // A0,
- "(c1.rgb)", // C1,
- "(c1.aaa)", // A1,
- "(c2.rgb)", // C2,
- "(c2.aaa)", // A2,
- "(textemp.rgb)", // TEXC,
- "(textemp.aaa)", // TEXA,
- "(rastemp.rgb)", // RASC,
- "(rastemp.aaa)", // RASA,
- "float3(1.0, 1.0, 1.0)", // ONE
- "float3(0.5, 0.5, 0.5)", // HALF
- "(konsttemp.rgb)", //"konsttemp.rgb", // KONST
- "float3(0.0, 0.0, 0.0)", // ZERO
- ///added extra values to map clamped values
- "(cprev.rgb)", // CPREV,
- "(cprev.aaa)", // APREV,
- "(cc0.rgb)", // C0,
- "(cc0.aaa)", // A0,
- "(cc1.rgb)", // C1,
- "(cc1.aaa)", // A1,
- "(cc2.rgb)", // C2,
- "(cc2.aaa)", // A2,
- "(textemp.rgb)", // TEXC,
- "(textemp.aaa)", // TEXA,
- "(crastemp.rgb)", // RASC,
- "(crastemp.aaa)", // RASA,
- "float3(1.0, 1.0, 1.0)", // ONE
- "float3(0.5, 0.5, 0.5)", // HALF
- "(ckonsttemp.rgb)", //"konsttemp.rgb", // KONST
- "float3(0.0, 0.0, 0.0)", // ZERO
- "PADERROR1", "PADERROR2", "PADERROR3", "PADERROR4"
+ "iprev.rgb", // CPREV,
+ "iprev.aaa", // APREV,
+ "ic0.rgb", // C0,
+ "ic0.aaa", // A0,
+ "ic1.rgb", // C1,
+ "ic1.aaa", // A1,
+ "ic2.rgb", // C2,
+ "ic2.aaa", // A2,
+ "itextemp.rgb", // TEXC,
+ "itextemp.aaa", // TEXA,
+ "irastemp.rgb", // RASC,
+ "irastemp.aaa", // RASA,
+ "int3(255,255,255)", // ONE
+ "int3(127,127,127)", // HALF
+ "ikonsttemp.rgb", // KONST
+ "int3(0,0,0)", // ZERO
};
-static const char *tevAInputTable[] = // CA
+static const char *tevAInputTable[] =
{
- "prev", // APREV,
- "c0", // A0,
- "c1", // A1,
- "c2", // A2,
- "textemp", // TEXA,
- "rastemp", // RASA,
- "konsttemp", // KONST, (hw1 had quarter)
- "float4(0.0, 0.0, 0.0, 0.0)", // ZERO
- ///added extra values to map clamped values
- "cprev", // APREV,
- "cc0", // A0,
- "cc1", // A1,
- "cc2", // A2,
- "textemp", // TEXA,
- "crastemp", // RASA,
- "ckonsttemp", // KONST, (hw1 had quarter)
- "float4(0.0, 0.0, 0.0, 0.0)", // ZERO
- "PADERROR5", "PADERROR6", "PADERROR7", "PADERROR8",
- "PADERROR9", "PADERROR10", "PADERROR11", "PADERROR12",
+ "iprev", // APREV,
+ "ic0", // A0,
+ "ic1", // A1,
+ "ic2", // A2,
+ "itextemp", // TEXA,
+ "irastemp", // RASA,
+ "ikonsttemp", // KONST, (hw1 had quarter)
+ "int4(0,0,0,0)", // ZERO
};
static const char *tevRasTable[] =
{
- "colors_0",
- "colors_1",
+ "int4(round(colors_0 * 255.0f))",
+ "int4(round(colors_1 * 255.0f))",
"ERROR13", //2
"ERROR14", //3
"ERROR15", //4
- "float4(alphabump,alphabump,alphabump,alphabump)", // use bump alpha
- "(float4(alphabump,alphabump,alphabump,alphabump)*(255.0/248.0))", //normalized
- "float4(0.0, 0.0, 0.0, 0.0)", // zero
+ "(int4(1, 1, 1, 1) * alphabump)", // bump alpha (0..248)
+ "(int4(1, 1, 1, 1) * (alphabump | (alphabump >> 5)))", // normalized bump alpha (0..255)
+ "int4(0, 0, 0, 0)", // zero
};
-//static const char *tevTexFunc[] = { "tex2D", "texRECT" };
-
-static const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" };
-static const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" };
-static const char *tevIndAlphaSel[] = {"", "x", "y", "z"};
-//static const char *tevIndAlphaScale[] = {"", "*32", "*16", "*8"};
-static const char *tevIndAlphaScale[] = {"*(248.0/255.0)", "*(224.0/255.0)", "*(240.0/255.0)", "*(248.0/255.0)"};
-static const char *tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias
-static const char *tevIndBiasAdd[] = {"-128.0", "1.0", "1.0", "1.0" }; // indexed by fmt
-static const char *tevIndWrapStart[] = {"0.0", "256.0", "128.0", "64.0", "32.0", "16.0", "0.001" };
-static const char *tevIndFmtScale[] = {"255.0", "31.0", "15.0", "7.0" };
-
-struct RegisterState
-{
- bool ColorNeedOverflowControl;
- bool AlphaNeedOverflowControl;
- bool AuxStored;
-};
+static const char *tevCOutputTable[] = { "iprev.rgb", "ic0.rgb", "ic1.rgb", "ic2.rgb" };
+static const char *tevAOutputTable[] = { "iprev.a", "ic0.a", "ic1.a", "ic2.a" };
static char text[16384];
-template<class T> static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, RegisterState RegisterStates[4], const char swapModeTable[4][5]);
+template<class T> static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, const char swapModeTable[4][5]);
template<class T> static inline void SampleTexture(T& out, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType);
template<class T> static inline void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth);
template<class T> static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data);
@@ -257,6 +203,19 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
uid_data.genMode_numtevstages = bpmem.genMode.numtevstages;
uid_data.genMode_numtexgens = bpmem.genMode.numtexgens;
+ // dot product for integer vectors
+ out.Write( "int idot(int3 x, int3 y)\n"
+ "{\n"
+ "\tint3 tmp = x * y;\n"
+ "\treturn tmp.x + tmp.y + tmp.z;\n"
+ "}\n");
+
+ out.Write( "int idot(int4 x, int4 y)\n"
+ "{\n"
+ "\tint4 tmp = x * y;\n"
+ "\treturn tmp.x + tmp.y + tmp.z + tmp.w;\n"
+ "}\n");
+
if (ApiType == API_OPENGL)
{
// Fmod implementation gleaned from Nvidia
@@ -286,18 +245,21 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
if (ApiType == API_OPENGL)
out.Write("layout(std140%s) uniform PSBlock {\n", g_ActiveConfig.backend_info.bSupportShadingLanguage420pack ? ", binding = 1" : "");
- DeclareUniform(out, ApiType, C_COLORS, "float4", I_COLORS"[4]");
- DeclareUniform(out, ApiType, C_KCOLORS, "float4", I_KCOLORS"[4]");
- DeclareUniform(out, ApiType, C_ALPHA, "float4", I_ALPHA"[1]"); // TODO: Why is this an array...-.-
+ DeclareUniform(out, ApiType, C_COLORS, "int4", I_COLORS"[4]");
+ DeclareUniform(out, ApiType, C_KCOLORS, "int4", I_KCOLORS"[4]");
+ DeclareUniform(out, ApiType, C_ALPHA, "int4", I_ALPHA);
DeclareUniform(out, ApiType, C_TEXDIMS, "float4", I_TEXDIMS"[8]");
- DeclareUniform(out, ApiType, C_ZBIAS, "float4", I_ZBIAS"[2]");
- DeclareUniform(out, ApiType, C_INDTEXSCALE, "float4", I_INDTEXSCALE"[2]");
- DeclareUniform(out, ApiType, C_INDTEXMTX, "float4", I_INDTEXMTX"[6]");
- DeclareUniform(out, ApiType, C_FOG, "float4", I_FOG"[3]");
+ DeclareUniform(out, ApiType, C_ZBIAS, "int4", I_ZBIAS"[2]");
+ DeclareUniform(out, ApiType, C_INDTEXSCALE, "int4", I_INDTEXSCALE"[2]");
+ DeclareUniform(out, ApiType, C_INDTEXMTX, "int4", I_INDTEXMTX"[6]");
+ DeclareUniform(out, ApiType, C_FOGCOLOR, "int4", I_FOGCOLOR);
+ DeclareUniform(out, ApiType, C_FOGI, "int4", I_FOGI"[1]");
+ DeclareUniform(out, ApiType, C_FOGF, "float4", I_FOGF"[2]");
// For pixel lighting - TODO: Should only be defined when per pixel lighting is enabled!
- DeclareUniform(out, ApiType, C_PLIGHTS, "float4", I_PLIGHTS"[40]");
- DeclareUniform(out, ApiType, C_PMATERIALS, "float4", I_PMATERIALS"[4]");
+ DeclareUniform(out, ApiType, C_PLIGHT_COLORS, "int4", I_PLIGHT_COLORS"[8]");
+ DeclareUniform(out, ApiType, C_PLIGHTS, "float4", I_PLIGHTS"[32]");
+ DeclareUniform(out, ApiType, C_PMATERIALS, "int4", I_PMATERIALS"[4]");
if (ApiType == API_OPENGL)
out.Write("};\n");
@@ -376,14 +338,12 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
out.Write(" ) {\n");
}
- out.Write(" float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0, 0.0, 0.0, 0.0), textemp = float4(0.0, 0.0, 0.0, 0.0), rastemp = float4(0.0, 0.0, 0.0, 0.0), konsttemp = float4(0.0, 0.0, 0.0, 0.0);\n"
- " float3 comp16 = float3(1.0, 255.0, 0.0), comp24 = float3(1.0, 255.0, 255.0*255.0);\n"
- " float alphabump=0.0;\n"
- " float3 tevcoord=float3(0.0, 0.0, 0.0);\n"
- " float2 wrappedcoord=float2(0.0,0.0), tempcoord=float2(0.0,0.0);\n"
- " float4 cc0=float4(0.0,0.0,0.0,0.0), cc1=float4(0.0,0.0,0.0,0.0);\n"
- " float4 cc2=float4(0.0,0.0,0.0,0.0), cprev=float4(0.0,0.0,0.0,0.0);\n"
- " float4 crastemp=float4(0.0,0.0,0.0,0.0),ckonsttemp=float4(0.0,0.0,0.0,0.0);\n\n");
+ out.Write(" int4 ic0 = " I_COLORS"[1], ic1 = " I_COLORS"[2], ic2 = " I_COLORS"[3], iprev = " I_COLORS"[0];\n"
+ " int4 irastemp = int4(0, 0, 0, 0), itextemp = int4(0, 0, 0, 0), ikonsttemp = int4(0, 0, 0, 0);\n"
+ " int3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n"
+ " int alphabump=0;\n"
+ " int3 tevcoord=int3(0, 0, 0);\n"
+ " int2 wrappedcoord=int2(0,0), tempcoord=int2(0,0);\n\n");
if (ApiType == API_OPENGL)
{
@@ -413,14 +373,15 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
out.Write("\tfloat3 _norm0 = normalize(Normal.xyz);\n\n");
out.Write("\tfloat3 pos = float3(clipPos.x,clipPos.y,Normal.w);\n");
- out.Write("\tfloat4 mat, lacc;\n"
+ out.Write("\tint4 lacc;\n"
"\tfloat3 ldir, h;\n"
"\tfloat dist, dist2, attn;\n");
- out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+39); // TODO: Can be optimized further
+ out.SetConstantsUsed(C_PLIGHT_COLORS, C_PLIGHT_COLORS+7); // TODO: Can be optimized further
+ out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+31); // TODO: Can be optimized further
out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3);
uid_data.components = components;
- GenerateLightingShader<T>(out, uid_data.lighting, components, I_PMATERIALS, I_PLIGHTS, "colors_", "colors_");
+ GenerateLightingShader<T>(out, uid_data.lighting, components, I_PMATERIALS, I_PLIGHT_COLORS, I_PLIGHTS, "colors_", "colors_");
}
out.Write("\tclipPos = float4(rawpos.x, rawpos.y, clipPos.z, clipPos.w);\n");
@@ -428,7 +389,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
// HACK to handle cases where the tex gen is not enabled
if (numTexgen == 0)
{
- out.Write("\tfloat3 uv0 = float3(0.0, 0.0, 0.0);\n");
+ out.Write("\tint2 fixpoint_uv0 = int2(0, 0);\n");
}
else
{
@@ -443,7 +404,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
out.Write("\t\tuv%d.xy = uv%d.xy / uv%d.z;\n", i, i, i);
}
- out.Write("uv%d.xy = uv%d.xy * " I_TEXDIMS"[%d].zw;\n", i, i, i);
+ out.Write("int2 fixpoint_uv%d = int2(round(uv%d.xy * " I_TEXDIMS"[%d].zw * 128.0));\n", i, i, i);
+ // TODO: S24 overflows here?
}
}
@@ -470,27 +432,16 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
if (texcoord < numTexgen)
{
out.SetConstantsUsed(C_INDTEXSCALE+i/2,C_INDTEXSCALE+i/2);
- out.Write("\ttempcoord = uv%d.xy * " I_INDTEXSCALE"[%d].%s;\n", texcoord, i/2, (i&1)?"zw":"xy");
+ out.Write("\ttempcoord = fixpoint_uv%d >> " I_INDTEXSCALE"[%d].%s;\n", texcoord, i / 2, (i & 1) ? "zw" : "xy");
}
else
- out.Write("\ttempcoord = float2(0.0, 0.0);\n");
+ out.Write("\ttempcoord = int2(0, 0);\n");
- out.Write("float3 indtex%d = ", i);
- SampleTexture<T>(out, "tempcoord", "abg", texmap, ApiType);
+ out.Write("\tint3 iindtex%d = ", i);
+ SampleTexture<T>(out, "(float2(tempcoord)/128.0)", "abg", texmap, ApiType);
}
}
- RegisterState RegisterStates[4];
- RegisterStates[0].AlphaNeedOverflowControl = false;
- RegisterStates[0].ColorNeedOverflowControl = false;
- RegisterStates[0].AuxStored = false;
- for(int i = 1; i < 4; i++)
- {
- RegisterStates[i].AlphaNeedOverflowControl = true;
- RegisterStates[i].ColorNeedOverflowControl = true;
- RegisterStates[i].AuxStored = false;
- }
-
// Uid fields for BuildSwapModeTable are set in WriteStage
char swapModeTable[4][5];
const char* swapColors = "rgba";
@@ -504,7 +455,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
}
for (unsigned int i = 0; i < numStages; i++)
- WriteStage<T>(out, uid_data, i, ApiType, RegisterStates, swapModeTable); // build the equation for this stage
+ WriteStage<T>(out, uid_data, i, ApiType, swapModeTable); // build the equation for this stage
#define MY_STRUCT_OFFSET(str,elem) ((u32)((u64)&(str).elem-(u64)&(str)))
bool enable_pl = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
@@ -517,20 +468,14 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
// regardless of the used destination register
if(bpmem.combiners[numStages - 1].colorC.dest != 0)
{
- bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].AuxStored;
- out.Write("\tprev.rgb = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]);
- RegisterStates[0].ColorNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl;
+ out.Write("\tiprev.rgb = %s;\n", tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]);
}
if(bpmem.combiners[numStages - 1].alphaC.dest != 0)
{
- bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AuxStored;
- out.Write("\tprev.a = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]);
- RegisterStates[0].AlphaNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl;
+ out.Write("\tiprev.a = %s;\n", tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]);
}
}
- // emulation of unsigned 8 overflow when casting if needed
- if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl)
- out.Write("\tprev = frac(prev * (255.0/256.0)) * (256.0/255.0);\n");
+ out.Write("\tiprev = iprev & 255;\n");
AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult();
uid_data.Pretest = Pretest;
@@ -546,12 +491,12 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
// The performance impact of this additional calculation doesn't matter, but it prevents
// the host GPU driver from performing any early depth test optimizations.
if (g_ActiveConfig.bFastDepthCalc)
- out.Write("float zCoord = rawpos.z;\n");
+ out.Write("int zCoord = int(round(rawpos.z * 16777215.0));\n");
else
{
out.SetConstantsUsed(C_ZBIAS+1, C_ZBIAS+1);
// the screen space depth value = far z + (clip z / clip w) * z range
- out.Write("float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n");
+ out.Write("int zCoord = " I_ZBIAS"[1].x + int(round((clipPos.z / clipPos.w) * float(" I_ZBIAS"[1].y)));\n");
}
// depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either
@@ -566,35 +511,31 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
// Note: z-textures are not written to depth buffer if early depth test is used
if (per_pixel_depth && bpmem.UseEarlyDepthTest())
- out.Write("depth = zCoord;\n");
+ out.Write("depth = float(zCoord) / 16777215.0;\n");
// Note: depth texture output is only written to depth buffer if late depth test is used
// theoretical final depth value is used for fog calculation, though, so we have to emulate ztextures anyway
if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !skip_ztexture)
{
- // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format...
+ // use the texture input of the last texture stage (itextemp), hopefully this has been read and is in correct format...
out.SetConstantsUsed(C_ZBIAS, C_ZBIAS+1);
- out.Write("zCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n",
+ out.Write("zCoord = idot(" I_ZBIAS"[0].xyzw, itextemp.xyzw) + " I_ZBIAS"[1].w %s;\n",
(bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : "");
-
- // U24 overflow emulation
- out.Write("zCoord = zCoord * (16777215.0/16777216.0);\n");
- out.Write("zCoord = frac(zCoord);\n");
- out.Write("zCoord = zCoord * (16777216.0/16777215.0);\n");
+ out.Write("zCoord = zCoord & 16777215;\n");
}
if (per_pixel_depth && bpmem.UseLateDepthTest())
- out.Write("depth = zCoord;\n");
+ out.Write("depth = float(zCoord) / 16777215.0;\n");
if (dstAlphaMode == DSTALPHA_ALPHA_PASS)
{
out.SetConstantsUsed(C_ALPHA, C_ALPHA);
- out.Write("\tocol0 = float4(prev.rgb, " I_ALPHA"[0].a);\n");
+ out.Write("\tocol0 = float4(float3(iprev.rgb), float(" I_ALPHA".a)) / 255.0;\n");
}
else
{
WriteFog<T>(out, uid_data);
- out.Write("\tocol0 = prev;\n");
+ out.Write("\tocol0 = float4(iprev) / 255.0f;\n");
}
// Use dual-source color blending to perform dst alpha in a single pass
@@ -604,8 +545,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
// Colors will be blended against the alpha from ocol1 and
// the alpha from ocol0 will be written to the framebuffer.
- out.Write("\tocol1 = prev;\n");
- out.Write("\tocol0.a = " I_ALPHA"[0].a;\n");
+ out.Write("\tocol1 = float4(iprev) / 255.0f;\n");
+ out.Write("\tocol0.a = " I_ALPHA".a / 255.0;\n");
}
out.Write("}\n");
@@ -623,51 +564,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
}
-
-//table with the color compare operations
-static const char *TEVCMPColorOPTable[16] =
-{
- "float3(0.0, 0.0, 0.0)",//0
- "float3(0.0, 0.0, 0.0)",//1
- "float3(0.0, 0.0, 0.0)",//2
- "float3(0.0, 0.0, 0.0)",//3
- "float3(0.0, 0.0, 0.0)",//4
- "float3(0.0, 0.0, 0.0)",//5
- "float3(0.0, 0.0, 0.0)",//6
- "float3(0.0, 0.0, 0.0)",//7
- " %s + ((%s.r >= %s.r + (0.25/255.0)) ? %s : float3(0.0, 0.0, 0.0))",//#define TEVCMP_R8_GT 8
- " %s + ((abs(%s.r - %s.r) < (0.5/255.0)) ? %s : float3(0.0, 0.0, 0.0))",//#define TEVCMP_R8_EQ 9
- " %s + (( dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25/255.0))) ? %s : float3(0.0, 0.0, 0.0))",//#define TEVCMP_GR16_GT 10
- " %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5/255.0) ? %s : float3(0.0, 0.0, 0.0))",//#define TEVCMP_GR16_EQ 11
- " %s + (( dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25/255.0))) ? %s : float3(0.0, 0.0, 0.0))",//#define TEVCMP_BGR24_GT 12
- " %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5/255.0) ? %s : float3(0.0, 0.0, 0.0))",//#define TEVCMP_BGR24_EQ 13
- " %s + (max(sign(%s.rgb - %s.rgb - (0.25/255.0)), float3(0.0, 0.0, 0.0)) * %s)",//#define TEVCMP_RGB8_GT 14
- " %s + ((float3(1.0, 1.0, 1.0) - max(sign(abs(%s.rgb - %s.rgb) - (0.5/255.0)), float3(0.0, 0.0, 0.0))) * %s)"//#define TEVCMP_RGB8_EQ 15
-};
-
-//table with the alpha compare operations
-static const char *TEVCMPAlphaOPTable[16] =
-{
- "0.0",//0
- "0.0",//1
- "0.0",//2
- "0.0",//3
- "0.0",//4
- "0.0",//5
- "0.0",//6
- "0.0",//7
- " %s.a + ((%s.r >= (%s.r + (0.25/255.0))) ? %s.a : 0.0)",//#define TEVCMP_R8_GT 8
- " %s.a + (abs(%s.r - %s.r) < (0.5/255.0) ? %s.a : 0.0)",//#define TEVCMP_R8_EQ 9
- " %s.a + ((dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25/255.0))) ? %s.a : 0.0)",//#define TEVCMP_GR16_GT 10
- " %s.a + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5/255.0) ? %s.a : 0.0)",//#define TEVCMP_GR16_EQ 11
- " %s.a + ((dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25/255.0))) ? %s.a : 0.0)",//#define TEVCMP_BGR24_GT 12
- " %s.a + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5/255.0) ? %s.a : 0.0)",//#define TEVCMP_BGR24_EQ 13
- " %s.a + ((%s.a >= (%s.a + (0.25/255.0))) ? %s.a : 0.0)",//#define TEVCMP_A8_GT 14
- " %s.a + (abs(%s.a - %s.a) < (0.5/255.0) ? %s.a : 0.0)"//#define TEVCMP_A8_EQ 15
-};
-
template<class T>
-static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, RegisterState RegisterStates[4], const char swapModeTable[4][5])
+static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, const char swapModeTable[4][5])
{
int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1);
bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens;
@@ -685,79 +583,110 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
uid_data.stagehash[n].tevind = bpmem.tevind[n].hex & 0x7FFFFF;
out.Write("// indirect op\n");
- // perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords
+ // perform the indirect op on the incoming regular coordinates using iindtex%d as the offset coords
if (bpmem.tevind[n].bs != ITBA_OFF)
{
- out.Write("alphabump = indtex%d.%s %s;\n",
+ const char *tevIndAlphaSel[] = {"", "x", "y", "z"};
+ const char *tevIndAlphaMask[] = {"248", "224", "240", "248"};
+ out.Write("alphabump = iindtex%d.%s & %s;\n",
bpmem.tevind[n].bt,
tevIndAlphaSel[bpmem.tevind[n].bs],
- tevIndAlphaScale[bpmem.tevind[n].fmt]);
+ tevIndAlphaMask[bpmem.tevind[n].fmt]);
+ }
+ else
+ {
+ // TODO: Should we reset alphabump to 0 here?
}
- // format
- out.Write("float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]);
-
- // bias
- if (bpmem.tevind[n].bias != ITB_NONE )
- out.Write("indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]);
- // multiply by offset matrix and scale
+ // format
+ const char *tevIndFmtMask[] = {"255", "31", "15", "7" };
+ out.Write("int3 iindtevcrd%d = iindtex%d & %s;\n", n, bpmem.tevind[n].bt, tevIndFmtMask[bpmem.tevind[n].fmt]);
+
+ // bias - TODO: Check if this needs to be this complicated..
+ const char *tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias
+ const char *tevIndBiasAdd[] = {"-128", "1", "1", "1" }; // indexed by fmt
+ if (bpmem.tevind[n].bias == ITB_S || bpmem.tevind[n].bias == ITB_T || bpmem.tevind[n].bias == ITB_U)
+ out.Write("iindtevcrd%d.%s += int(%s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]);
+ else if (bpmem.tevind[n].bias == ITB_ST || bpmem.tevind[n].bias == ITB_SU || bpmem.tevind[n].bias == ITB_TU)
+ out.Write("iindtevcrd%d.%s += int2(%s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]);
+ else if (bpmem.tevind[n].bias == ITB_STU)
+ out.Write("iindtevcrd%d.%s += int3(%s, %s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]);
+
+ // multiply by offset matrix and scale - calculations are likely to overflow badly,
+ // yet it works out since we only care about the lower 23 bits (+1 sign bit) of the result
if (bpmem.tevind[n].mid != 0)
{
if (bpmem.tevind[n].mid <= 3)
{
int mtxidx = 2*(bpmem.tevind[n].mid-1);
out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
- out.Write("float2 indtevtrans%d = float2(dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d), dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d));\n",
- n, mtxidx, n, mtxidx+1, n);
+
+ out.Write("int2 indtevtrans%d = int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)) >> 3;\n", n, mtxidx, n, mtxidx+1, n);
+
+ // TODO: should use a shader uid branch for this for better performance
+ out.Write("if (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
+ out.Write("else indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
}
else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord)
{ // s matrix
_assert_(bpmem.tevind[n].mid >= 5);
int mtxidx = 2*(bpmem.tevind[n].mid-5);
out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
- out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n);
+ out.Write("int2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n, texcoord, n);
+
+ out.Write("if (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
+ out.Write("else indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
}
else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord)
{ // t matrix
_assert_(bpmem.tevind[n].mid >= 9);
int mtxidx = 2*(bpmem.tevind[n].mid-9);
out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
- out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n);
+ out.Write("int2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n, texcoord, n);
+
+ out.Write("if (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
+ out.Write("else indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
}
else
{
- out.Write("float2 indtevtrans%d = float2(0.0, 0.0);\n", n);
+ out.Write("int2 indtevtrans%d = int2(0, 0);\n", n);
}
}
else
{
- out.Write("float2 indtevtrans%d = float2(0.0, 0.0);\n", n);
+ out.Write("int2 indtevtrans%d = int2(0, 0);\n", n);
}
// ---------
// Wrapping
// ---------
+ const char *tevIndWrapStart[] = {"0", "(256<<7)", "(128<<7)", "(64<<7)", "(32<<7)", "(16<<7)", "1" }; // TODO: Should the last one be 1 or (1<<7)?
// wrap S
if (bpmem.tevind[n].sw == ITW_OFF)
- out.Write("wrappedcoord.x = uv%d.x;\n", texcoord);
+ out.Write("wrappedcoord.x = fixpoint_uv%d.x;\n", texcoord);
else if (bpmem.tevind[n].sw == ITW_0)
- out.Write("wrappedcoord.x = 0.0;\n");
+ out.Write("wrappedcoord.x = 0;\n");
else
- out.Write("wrappedcoord.x = fmod( uv%d.x, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]);
+ out.Write("wrappedcoord.x = fixpoint_uv%d.x %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]);
// wrap T
if (bpmem.tevind[n].tw == ITW_OFF)
- out.Write("wrappedcoord.y = uv%d.y;\n", texcoord);
+ out.Write("wrappedcoord.y = fixpoint_uv%d.y;\n", texcoord);
else if (bpmem.tevind[n].tw == ITW_0)
- out.Write("wrappedcoord.y = 0.0;\n");
+ out.Write("wrappedcoord.y = 0;\n");
else
- out.Write("wrappedcoord.y = fmod( uv%d.y, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]);
+ out.Write("wrappedcoord.y = fixpoint_uv%d.y %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]);
+ // Casting uint to int preserves bit pattern
+ // comex says this works and I (neobrain) didn't find anything wrong about it...
if (bpmem.tevind[n].fb_addprev) // add previous tevcoord
out.Write("tevcoord.xy += wrappedcoord + indtevtrans%d;\n", n);
else
out.Write("tevcoord.xy = wrappedcoord + indtevtrans%d;\n", n);
+
+ // Emulate s24 overflows
+ out.Write("tevcoord.xy = (tevcoord.xy << 8) >> 8;\n");
}
TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC;
@@ -782,20 +711,20 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
uid_data.stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1);
const char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
- out.Write("rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap);
- out.Write("crastemp = frac(rastemp * (255.0/256.0)) * (256.0/255.0);\n");
+ out.Write("irastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap);
}
uid_data.stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1);
if (bpmem.tevorders[n/2].getEnable(n&1))
{
+ int texmap = bpmem.tevorders[n/2].getTexMap(n&1);
if (!bHasIndStage)
{
// calc tevcord
if(bHasTexCoord)
- out.Write("tevcoord.xy = uv%d.xy;\n", texcoord);
+ out.Write("tevcoord.xy = fixpoint_uv%d;\n", texcoord);
else
- out.Write("tevcoord.xy = float2(0.0, 0.0);\n");
+ out.Write("tevcoord.xy = int2(0, 0);\n");
}
const int i = bpmem.combiners[n].alphaC.tswap;
@@ -808,15 +737,14 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
uid_data.stagehash[n].tevorders_texmap= bpmem.tevorders[n/2].getTexMap(n&1);
const char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap];
- int texmap = bpmem.tevorders[n/2].getTexMap(n&1);
uid_data.SetTevindrefTexmap(i, texmap);
- out.Write("textemp = ");
- SampleTexture<T>(out, "tevcoord", texswap, texmap, ApiType);
+ out.Write("itextemp = ");
+ SampleTexture<T>(out, "(float2(tevcoord.xy)/128.0)", texswap, texmap, ApiType);
}
else
{
- out.Write("textemp = float4(1.0, 1.0, 1.0, 1.0);\n");
+ out.Write("itextemp = int4(255, 255, 255, 255);\n");
}
@@ -827,99 +755,14 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
int ka = bpmem.tevksel[n / 2].getKA(n & 1);
uid_data.stagehash[n].tevksel_kc = kc;
uid_data.stagehash[n].tevksel_ka = ka;
- out.Write("konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]);
- if(kc > 7 || ka > 7)
- {
- out.Write("ckonsttemp = frac(konsttemp * (255.0/256.0)) * (256.0/255.0);\n");
- }
- else
- {
- out.Write("ckonsttemp = konsttemp;\n");
- }
+ out.Write("ikonsttemp = int4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]);
+
if (kc > 7)
out.SetConstantsUsed(C_KCOLORS+((kc-0xc)%4),C_KCOLORS+((kc-0xc)%4));
if (ka > 7)
out.SetConstantsUsed(C_KCOLORS+((ka-0xc)%4),C_KCOLORS+((ka-0xc)%4));
}
- if(cc.a == TEVCOLORARG_CPREV || cc.a == TEVCOLORARG_APREV
- || cc.b == TEVCOLORARG_CPREV || cc.b == TEVCOLORARG_APREV
- || cc.c == TEVCOLORARG_CPREV || cc.c == TEVCOLORARG_APREV
- || ac.a == TEVALPHAARG_APREV || ac.b == TEVALPHAARG_APREV || ac.c == TEVALPHAARG_APREV)
- {
- if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl)
- {
- out.Write("cprev = frac(prev * (255.0/256.0)) * (256.0/255.0);\n");
- RegisterStates[0].AlphaNeedOverflowControl = false;
- RegisterStates[0].ColorNeedOverflowControl = false;
- }
- else
- {
- out.Write("cprev = prev;\n");
- }
- RegisterStates[0].AuxStored = true;
- }
-
- if(cc.a == TEVCOLORARG_C0 || cc.a == TEVCOLORARG_A0
- || cc.b == TEVCOLORARG_C0 || cc.b == TEVCOLORARG_A0
- || cc.c == TEVCOLORARG_C0 || cc.c == TEVCOLORARG_A0
- || ac.a == TEVALPHAARG_A0 || ac.b == TEVALPHAARG_A0 || ac.c == TEVALPHAARG_A0)
- {
- out.SetConstantsUsed(C_COLORS+1,C_COLORS+1);
- if(RegisterStates[1].AlphaNeedOverflowControl || RegisterStates[1].ColorNeedOverflowControl)
- {
- out.Write("cc0 = frac(c0 * (255.0/256.0)) * (256.0/255.0);\n");
- RegisterStates[1].AlphaNeedOverflowControl = false;
- RegisterStates[1].ColorNeedOverflowControl = false;
- }
- else
- {
- out.Write("cc0 = c0;\n");
- }
- RegisterStates[1].AuxStored = true;
- }
-
- if(cc.a == TEVCOLORARG_C1 || cc.a == TEVCOLORARG_A1
- || cc.b == TEVCOLORARG_C1 || cc.b == TEVCOLORARG_A1
- || cc.c == TEVCOLORARG_C1 || cc.c == TEVCOLORARG_A1
- || ac.a == TEVALPHAARG_A1 || ac.b == TEVALPHAARG_A1 || ac.c == TEVALPHAARG_A1)
- {
- out.SetConstantsUsed(C_COLORS+2,C_COLORS+2);
- if(RegisterStates[2].AlphaNeedOverflowControl || RegisterStates[2].ColorNeedOverflowControl)
- {
- out.Write("cc1 = frac(c1 * (255.0/256.0)) * (256.0/255.0);\n");
- RegisterStates[2].AlphaNeedOverflowControl = false;
- RegisterStates[2].ColorNeedOverflowControl = false;
- }
- else
- {
- out.Write("cc1 = c1;\n");
- }
- RegisterStates[2].AuxStored = true;
- }
-
- if(cc.a == TEVCOLORARG_C2 || cc.a == TEVCOLORARG_A2
- || cc.b == TEVCOLORARG_C2 || cc.b == TEVCOLORARG_A2
- || cc.c == TEVCOLORARG_C2 || cc.c == TEVCOLORARG_A2
- || ac.a == TEVALPHAARG_A2 || ac.b == TEVALPHAARG_A2 || ac.c == TEVALPHAARG_A2)
- {
- out.SetConstantsUsed(C_COLORS+3,C_COLORS+3);
- if(RegisterStates[3].AlphaNeedOverflowControl || RegisterStates[3].ColorNeedOverflowControl)
- {
- out.Write("cc2 = frac(c2 * (255.0/256.0)) * (256.0/255.0);\n");
- RegisterStates[3].AlphaNeedOverflowControl = false;
- RegisterStates[3].ColorNeedOverflowControl = false;
- }
- else
- {
- out.Write("cc2 = c2;\n");
- }
- RegisterStates[3].AuxStored = true;
- }
-
- RegisterStates[cc.dest].ColorNeedOverflowControl = (cc.clamp == 0);
- RegisterStates[cc.dest].AuxStored = false;
-
if (cc.d == TEVCOLORARG_C0 || cc.d == TEVCOLORARG_A0 || ac.d == TEVALPHAARG_A0)
out.SetConstantsUsed(C_COLORS+1,C_COLORS+1);
@@ -936,55 +779,50 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
out.SetConstantsUsed(C_COLORS+ac.dest, C_COLORS+ac.dest);
out.Write("// color combine\n");
- if (cc.clamp)
- out.Write("%s = clamp(", tevCOutputTable[cc.dest]);
- else
- out.Write("%s = ", tevCOutputTable[cc.dest]);
+ out.Write("%s = clamp(", tevCOutputTable[cc.dest]);
// combine the color channel
if (cc.bias != TevBias_COMPARE) // if not compare
{
//normal color combiner goes here
if (cc.shift > TEVSCALE_1)
- out.Write("%s*(", tevScaleTable[cc.shift]);
+ out.Write("(");
if(!(cc.d == TEVCOLORARG_ZERO && cc.op == TEVOP_ADD))
- out.Write("%s%s", tevCInputTable[cc.d], tevOpTable[cc.op]);
-
- if (cc.a == cc.b)
- out.Write("%s", tevCInputTable[cc.a + 16]);
- else if (cc.c == TEVCOLORARG_ZERO)
- out.Write("%s", tevCInputTable[cc.a + 16]);
- else if (cc.c == TEVCOLORARG_ONE)
- out.Write("%s", tevCInputTable[cc.b + 16]);
- else if (cc.a == TEVCOLORARG_ZERO)
- out.Write("%s*%s", tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]);
- else if (cc.b == TEVCOLORARG_ZERO)
- out.Write("%s*(float3(1.0, 1.0, 1.0)-%s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.c + 16]);
- else
- out.Write("lerp(%s, %s, %s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]);
+ out.Write("%s %s ", tevCInputTable[cc.d], tevOpTable[cc.op]); // TODO: Clamp d...
+
+ out.Write("((%s&255) * (int3(255,255,255) - (%s&255)) + (%s&255) * (%s&255)) / 255", tevCInputTable[cc.a], tevCInputTable[cc.c], tevCInputTable[cc.b], tevCInputTable[cc.c]);
- out.Write("%s", tevBiasTable[cc.bias]);
+ out.Write(" %s", tevBiasTable[cc.bias]);
if (cc.shift > TEVSCALE_1)
- out.Write(")");
+ out.Write(") %s", tevScaleTable[cc.shift]);
}
else
{
- int cmp = (cc.shift<<1)|cc.op|8; // comparemode stored here
- out.Write(TEVCMPColorOPTable[cmp],//lookup the function from the op table
- tevCInputTable[cc.d],
- tevCInputTable[cc.a + 16],
- tevCInputTable[cc.b + 16],
- tevCInputTable[cc.c + 16]);
+ static const char *function_table[] =
+ {
+ "(((%s.r&255) > %s.r) ? (%s&255): int3(0,0,0))", // TEVCMP_R8_GT
+ "(((%s.r&255) == %s.r) ? (%s&255): int3(0,0,0))", // TEVCMP_R8_EQ
+ "((idot((%s.rgb&255), comp16) > idot((%s.rgb&255), comp16)) ? (%s&255): int3(0,0,0))", // TEVCMP_GR16_GT
+ "((idot((%s.rgb&255), comp16) == idot((%s.rgb&255), comp16)) ? (%s&255): int3(0,0,0))", // TEVCMP_GR16_EQ
+ "((idot((%s.rgb&255), comp24) > idot((%s.rgb&255), comp24)) ? (%s&255): int3(0,0,0))", // TEVCMP_BGR24_GT
+ "((idot((%s.rgb&255), comp24) == idot((%s.rgb&255), comp24)) ? (%s&255): int3(0,0,0))", // TEVCMP_BGR24_EQ
+ "int3(max(sign(int3((%s.rgb&255)) - int3((%s.rgb&255))), int3(0,0,0)) * (%s&255))", // TEVCMP_RGB8_GT
+ "int3((int3(255,255,255) - max(sign(abs(int3((%s.rgb&255)) - int3((%s.rgb&255)))), int3(0,0,0))) * (%s&255))" // TEVCMP_RGB8_EQ
+ };
+
+ int mode = (cc.shift<<1)|cc.op;
+ out.Write(" (%s&255) + ", tevCInputTable[cc.d]);
+ out.Write(function_table[mode], tevCInputTable[cc.a],
+ tevCInputTable[cc.b], tevCInputTable[cc.c]);
}
if (cc.clamp)
- out.Write(", 0.0, 1.0)");
+ out.Write(", int3(0,0,0), int3(255,255,255))");
+ else
+ out.Write(", int3(-1024,-1024,-1024), int3(1023,1023,1023))");
out.Write(";\n");
- RegisterStates[ac.dest].AlphaNeedOverflowControl = (ac.clamp == 0);
- RegisterStates[ac.dest].AuxStored = false;
-
out.Write("// alpha combine\n");
if (ac.clamp)
out.Write("%s = clamp(", tevAOutputTable[ac.dest]);
@@ -994,66 +832,65 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
if (ac.bias != TevBias_COMPARE) // if not compare
{
//normal alpha combiner goes here
- if (ac.shift > TEVSCALE_1)
- out.Write("%s*(", tevScaleTable[ac.shift]);
+ if (ac.shift > 0)
+ out.Write("(");
if(!(ac.d == TEVALPHAARG_ZERO && ac.op == TEVOP_ADD))
- out.Write("%s.a%s", tevAInputTable[ac.d], tevOpTable[ac.op]);
-
- if (ac.a == ac.b)
- out.Write("%s.a", tevAInputTable[ac.a + 8]);
- else if (ac.c == TEVALPHAARG_ZERO)
- out.Write("%s.a", tevAInputTable[ac.a + 8]);
- else if (ac.a == TEVALPHAARG_ZERO)
- out.Write("%s.a*%s.a", tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]);
- else if (ac.b == TEVALPHAARG_ZERO)
- out.Write("%s.a*(1.0-%s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.c + 8]);
- else
- out.Write("lerp(%s.a, %s.a, %s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]);
+ out.Write("%s.a %s ", tevAInputTable[ac.d], tevOpTable[ac.op]);
- out.Write("%s",tevBiasTable[ac.bias]);
+ out.Write("((%s.a&255) * (255 - (%s.a&255)) + (%s.a&255) * (%s.a&255)) / 255", tevAInputTable[ac.a], tevAInputTable[ac.c], tevAInputTable[ac.b], tevAInputTable[ac.c]);
- if (ac.shift>0)
- out.Write(")");
+ out.Write(" %s",tevBiasTable[ac.bias]);
+ if (ac.shift>0)
+ out.Write(") %s", tevScaleTable[ac.shift]);
}
else
{
- //compare alpha combiner goes here
- int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here
- out.Write(TEVCMPAlphaOPTable[cmp],
- tevAInputTable[ac.d],
- tevAInputTable[ac.a + 8],
- tevAInputTable[ac.b + 8],
- tevAInputTable[ac.c + 8]);
+ static const char *function_table[] =
+ {
+ "(((%s.r&255) > (%s.r&255)) ? (%s.a&255) : 0)", // TEVCMP_R8_GT
+ "(((%s.r&255) == (%s.r&255)) ? (%s.a&255) : 0)", // TEVCMP_R8_EQ
+ "((idot((%s.rgb&255), comp16) > idot((%s.rgb&255), comp16)) ? (%s.a&255) : 0)", // TEVCMP_GR16_GT
+ "((idot((%s.rgb&255), comp16) == idot((%s.rgb&255), comp16)) ? (%s.a&255) : 0)", // TEVCMP_GR16_EQ
+ "((idot((%s.rgb&255), comp24) > idot((%s.rgb&255), comp24)) ? (%s.a&255) : 0)", // TEVCMP_BGR24_GT
+ "((idot((%s.rgb&255), comp24) == idot((%s.rgb&255), comp24)) ? (%s.a&255) : 0)", // TEVCMP_BGR24_EQ
+ "(((%s.a&255) > (%s.a&255)) ? (%s.a&255) : 0)", // TEVCMP_A8_GT
+ "(((%s.a&255) == (%s.a&255)) ? (%s.a&255) : 0)" // TEVCMP_A8_EQ
+ };
+
+ int mode = (ac.shift<<1)|ac.op;
+ out.Write(" (%s.a&255) + ", tevAInputTable[ac.d]);
+ out.Write(function_table[mode], tevAInputTable[ac.a],
+ tevAInputTable[ac.b], tevAInputTable[ac.c]);
}
if (ac.clamp)
- out.Write(", 0.0, 1.0)");
+ out.Write(", 0, 255)");
out.Write(";\n\n");
out.Write("// TEV done\n");
}
template<class T>
-void SampleTexture(T& out, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType)
+static inline void SampleTexture(T& out, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType)
{
out.SetConstantsUsed(C_TEXDIMS+texmap,C_TEXDIMS+texmap);
if (ApiType == API_D3D)
- out.Write("Tex%d.Sample(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", texmap,texmap, texcoords, texmap, texswap);
- else // OGL
- out.Write("texture(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", texmap, texcoords, texmap, texswap);
+ out.Write("int4(255.0f * Tex%d.Sample(samp%d,%s.xy * " I_TEXDIMS"[%d].xy)).%s;\n", texmap,texmap, texcoords, texmap, texswap);
+ else
+ out.Write("int4(255.0f * texture(samp%d,%s.xy * " I_TEXDIMS"[%d].xy)).%s;\n", texmap, texcoords, texmap, texswap);
}
static const char *tevAlphaFuncsTable[] =
{
- "(false)", // NEVER
- "(prev.a <= %s - (0.25/255.0))", // LESS
- "(abs( prev.a - %s ) < (0.5/255.0))", // EQUAL
- "(prev.a < %s + (0.25/255.0))", // LEQUAL
- "(prev.a >= %s + (0.25/255.0))", // GREATER
- "(abs( prev.a - %s ) >= (0.5/255.0))", // NEQUAL
- "(prev.a > %s - (0.25/255.0))", // GEQUAL
- "(true)" // ALWAYS
+ "(false)", // NEVER
+ "(iprev.a < %s)", // LESS
+ "(iprev.a == %s)", // EQUAL
+ "(iprev.a <= %s)", // LEQUAL
+ "(iprev.a > %s)", // GREATER
+ "(iprev.a != %s)", // NEQUAL
+ "(iprev.a >= %s)", // GEQUAL
+ "(true)" // ALWAYS
};
static const char *tevAlphaFunclogicTable[] =
@@ -1069,8 +906,8 @@ static inline void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_T
{
static const char *alphaRef[2] =
{
- I_ALPHA"[0].r",
- I_ALPHA"[0].g"
+ I_ALPHA".r",
+ I_ALPHA".g"
};
out.SetConstantsUsed(C_ALPHA, C_ALPHA);
@@ -1085,7 +922,7 @@ static inline void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_T
int compindex = bpmem.alpha_test.comp0;
out.Write(tevAlphaFuncsTable[compindex], alphaRef[0]);
- out.Write("%s", tevAlphaFunclogicTable[bpmem.alpha_test.logic]);//lookup the logic op
+ out.Write("%s", tevAlphaFunclogicTable[bpmem.alpha_test.logic]); // lookup the logic op
// Lookup the second component from the alpha function table
compindex = bpmem.alpha_test.comp1;
@@ -1143,33 +980,39 @@ static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data)
uid_data.fog_proj = bpmem.fog.c_proj_fsel.proj;
- out.SetConstantsUsed(C_FOG, C_FOG+1);
+ out.SetConstantsUsed(C_FOGCOLOR, C_FOGCOLOR);
+ out.SetConstantsUsed(C_FOGI, C_FOGI);
+ out.SetConstantsUsed(C_FOGF, C_FOGF+1);
if (bpmem.fog.c_proj_fsel.proj == 0)
{
// perspective
// ze = A/(B - (Zs >> B_SHF)
- out.Write("\tfloat ze = " I_FOG"[1].x / (" I_FOG"[1].y - (zCoord / " I_FOG"[1].w));\n");
+ // TODO: Verify that we want to drop lower bits here! (currently taken over from software renderer)
+ // Maybe we want to use "ze = (A << B_SHF)/((B << B_SHF) - Zs)" instead?
+ // That's equivalent, but keeps the lower bits of Zs.
+ out.Write("\tfloat ze = (" I_FOGF"[1].x * 16777215.0) / float(" I_FOGI"[0].y - (zCoord >> " I_FOGI"[0].w));\n");
}
else
{
// orthographic
// ze = a*Zs (here, no B_SHF)
- out.Write("\tfloat ze = " I_FOG"[1].x * zCoord;\n");
+ out.Write("\tfloat ze = " I_FOGF"[1].x * float(zCoord) / 16777215.0;\n");
}
// x_adjust = sqrt((x-center)^2 + k^2)/k
// ze *= x_adjust
- // this is completely theoretical as the real hardware seems to use a table intead of calculating the values.
+ // TODO Instead of this theoretical calculation, we should use the
+ // coefficient table given in the fog range BP registers!
uid_data.fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled;
if (bpmem.fogRange.Base.Enabled)
{
- out.SetConstantsUsed(C_FOG+2, C_FOG+2);
- out.Write("\tfloat x_adjust = (2.0 * (clipPos.x / " I_FOG"[2].y)) - 1.0 - " I_FOG"[2].x;\n");
- out.Write("\tx_adjust = sqrt(x_adjust * x_adjust + " I_FOG"[2].z * " I_FOG"[2].z) / " I_FOG"[2].z;\n");
+ out.SetConstantsUsed(C_FOGF, C_FOGF);
+ out.Write("\tfloat x_adjust = (2.0 * (clipPos.x / " I_FOGF"[0].y)) - 1.0 - " I_FOGF"[0].x;\n");
+ out.Write("\tx_adjust = sqrt(x_adjust * x_adjust + " I_FOGF"[0].z * " I_FOGF"[0].z) / " I_FOGF"[0].z;\n");
out.Write("\tze *= x_adjust;\n");
}
- out.Write("\tfloat fog = clamp(ze - " I_FOG"[1].z, 0.0, 1.0);\n");
+ out.Write("\tfloat fog = clamp(ze - " I_FOGF"[1].z, 0.0, 1.0);\n");
if (bpmem.fog.c_proj_fsel.fsel > 3)
{
@@ -1181,7 +1024,8 @@ static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data)
WARN_LOG(VIDEO, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel);
}
- out.Write("\tprev.rgb = lerp(prev.rgb, " I_FOG"[0].rgb, fog);\n");
+ out.Write("\tint ifog = int(fog * 256.0);\n");
+ out.Write("\tiprev.rgb = (iprev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog) >> 8;\n");
}
void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components)
diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h
index 6ed6960..5ec8424 100644
--- a/Source/Core/VideoCommon/PixelShaderGen.h
+++ b/Source/Core/VideoCommon/PixelShaderGen.h
@@ -9,30 +9,36 @@
#include "VideoCommon/ShaderGenCommon.h"
#include "VideoCommon/VideoCommon.h"
-#define I_COLORS "color"
-#define I_KCOLORS "k"
-#define I_ALPHA "alphaRef"
-#define I_TEXDIMS "texdim"
-#define I_ZBIAS "czbias"
-#define I_INDTEXSCALE "cindscale"
-#define I_INDTEXMTX "cindmtx"
-#define I_FOG "cfog"
-#define I_PLIGHTS "cPLights"
-#define I_PMATERIALS "cPmtrl"
+#define I_COLORS "color"
+#define I_KCOLORS "k"
+#define I_ALPHA "alphaRef"
+#define I_TEXDIMS "texdim"
+#define I_ZBIAS "czbias"
+#define I_INDTEXSCALE "cindscale"
+#define I_INDTEXMTX "cindmtx"
+#define I_FOGCOLOR "cfogcolor"
+#define I_FOGI "cfogi"
+#define I_FOGF "cfogf"
+#define I_PLIGHT_COLORS "cPLightColors"
+#define I_PLIGHTS "cPLights"
+#define I_PMATERIALS "cPmtrl"
// TODO: get rid of them as they aren't used
-#define C_COLORMATRIX 0 // 0
-#define C_COLORS 0 // 0
-#define C_KCOLORS (C_COLORS + 4) // 4
-#define C_ALPHA (C_KCOLORS + 4) // 8
-#define C_TEXDIMS (C_ALPHA + 1) // 9
-#define C_ZBIAS (C_TEXDIMS + 8) //17
-#define C_INDTEXSCALE (C_ZBIAS + 2) //19
-#define C_INDTEXMTX (C_INDTEXSCALE + 2) //21
-#define C_FOG (C_INDTEXMTX + 6) //27
-
-#define C_PLIGHTS (C_FOG + 3)
-#define C_PMATERIALS (C_PLIGHTS + 40)
+#define C_COLORMATRIX 0 // 0
+#define C_COLORS 0 // 0
+#define C_KCOLORS (C_COLORS + 4) // 4
+#define C_ALPHA (C_KCOLORS + 4) // 8
+#define C_TEXDIMS (C_ALPHA + 1) // 9
+#define C_ZBIAS (C_TEXDIMS + 8) //17
+#define C_INDTEXSCALE (C_ZBIAS + 2) //19
+#define C_INDTEXMTX (C_INDTEXSCALE + 2) //21
+#define C_FOGCOLOR (C_INDTEXMTX + 6) //27
+#define C_FOGI (C_FOGCOLOR + 1) //28
+#define C_FOGF (C_FOGI + 1) //29
+
+#define C_PLIGHT_COLORS (C_FOGF + 2)
+#define C_PLIGHTS (C_PLIGHT_COLORS + 8)
+#define C_PMATERIALS (C_PLIGHTS + 32)
#define C_PENVCONST_END (C_PMATERIALS + 4)
// Different ways to achieve rendering with destination alpha
diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp
index 74cc5c1..c3bed39 100644
--- a/Source/Core/VideoCommon/PixelShaderManager.cpp
+++ b/Source/Core/VideoCommon/PixelShaderManager.cpp
@@ -84,15 +84,16 @@ void PixelShaderManager::SetConstants()
// they always seems to be larger than 256 so my theory is :
// they are the coefficients from the center to the border of the screen
// so to simplify I use the hi coefficient as K in the shader taking 256 as the scale
- constants.fog[2][0] = ScreenSpaceCenter;
- constants.fog[2][1] = (float)Renderer::EFBToScaledX((int)(2.0f * xfregs.viewport.wd));
- constants.fog[2][2] = bpmem.fogRange.K[4].HI / 256.0f;
+ // TODO: Shouldn't this be EFBToScaledXf?
+ constants.fogf[0][0] = ScreenSpaceCenter;
+ constants.fogf[0][1] = (float)Renderer::EFBToScaledX((int)(2.0f * xfregs.viewport.wd));
+ constants.fogf[0][2] = bpmem.fogRange.K[4].HI / 256.0f;
}
else
{
- constants.fog[2][0] = 0;
- constants.fog[2][1] = 1;
- constants.fog[2][2] = 1;
+ constants.fogf[0][0] = 0;
+ constants.fogf[0][1] = 1;
+ constants.fogf[0][2] = 1;
}
dirty = true;
@@ -103,6 +104,7 @@ void PixelShaderManager::SetConstants()
{
if (nLightsChanged[0] >= 0)
{
+ // TODO: Outdated comment
// lights don't have a 1 to 1 mapping, the color component needs to be converted to 4 floats
int istart = nLightsChanged[0] / 0x10;
int iend = (nLightsChanged[1] + 15) / 0x10;
@@ -111,10 +113,10 @@ void PixelShaderManager::SetConstants()
for (int i = istart; i < iend; ++i)
{
u32 color = *(const u32*)(xfmemptr + 3);
- constants.plights[5*i][0] = ((color >> 24) & 0xFF) / 255.0f;
- constants.plights[5*i][1] = ((color >> 16) & 0xFF) / 255.0f;
- constants.plights[5*i][2] = ((color >> 8) & 0xFF) / 255.0f;
- constants.plights[5*i][3] = ((color) & 0xFF) / 255.0f;
+ constants.plight_colors[i][0] = (color >> 24) & 0xFF;
+ constants.plight_colors[i][1] = (color >> 16) & 0xFF;
+ constants.plight_colors[i][2] = (color >> 8) & 0xFF;
+ constants.plight_colors[i][3] = (color) & 0xFF;
xfmemptr += 4;
for (int j = 0; j < 4; ++j, xfmemptr += 3)
@@ -124,11 +126,11 @@ void PixelShaderManager::SetConstants()
fabs(xfmemptr[1]) < 0.00001f &&
fabs(xfmemptr[2]) < 0.00001f)
// dist attenuation, make sure not equal to 0!!!
- constants.plights[5*i+j+1][0] = 0.00001f;
+ constants.plights[4*i+j][0] = 0.00001f;
else
- constants.plights[5*i+j+1][0] = xfmemptr[0];
- constants.plights[5*i+j+1][1] = xfmemptr[1];
- constants.plights[5*i+j+1][2] = xfmemptr[2];
+ constants.plights[4*i+j][0] = xfmemptr[0];
+ constants.plights[4*i+j][1] = xfmemptr[1];
+ constants.plights[4*i+j][2] = xfmemptr[2];
}
}
dirty = true;
@@ -139,8 +141,8 @@ void PixelShaderManager::SetConstants()
if(s_bViewPortChanged)
{
- constants.zbias[1][0] = xfregs.viewport.farZ / 16777216.0f;
- constants.zbias[1][1] = xfregs.viewport.zRange / 16777216.0f;
+ constants.zbias[1][0] = xfregs.viewport.farZ;
+ constants.zbias[1][1] = xfregs.viewport.zRange;
dirty = true;
s_bViewPortChanged = false;
}
@@ -152,26 +154,26 @@ void PixelShaderManager::SetConstants()
// TODO: Conversion should be checked in the context of tev_fixes..
void PixelShaderManager::SetColorChanged(int type, int num)
{
- float4* c = type ? constants.kcolors : constants.colors;
- c[num][0] = bpmem.tevregs[num].low.a / 255.0f;
- c[num][3] = bpmem.tevregs[num].low.b / 255.0f;
- c[num][2] = bpmem.tevregs[num].high.a / 255.0f;
- c[num][1] = bpmem.tevregs[num].high.b / 255.0f;
+ int4* c = type ? constants.kcolors : constants.colors;
+ c[num][0] = bpmem.tevregs[num].low.a;
+ c[num][3] = bpmem.tevregs[num].low.b;
+ c[num][2] = bpmem.tevregs[num].high.a;
+ c[num][1] = bpmem.tevregs[num].high.b;
dirty = true;
- PRIM_LOG("pixel %scolor%d: %f %f %f %f\n", type?"k":"", num, c[num][0], c[num][1], c[num][2], c[num][3]);
+ PRIM_LOG("pixel %scolor%d: %d %d %d %d\n", type?"k":"", num, c[num][0], c[num][1], c[num][2], c[num][3]);
}
void PixelShaderManager::SetAlpha()
{
- constants.alpha[0] = bpmem.alpha_test.ref0 / 255.0f;
- constants.alpha[1] = bpmem.alpha_test.ref1 / 255.0f;
+ constants.alpha[0] = bpmem.alpha_test.ref0;
+ constants.alpha[1] = bpmem.alpha_test.ref1;
dirty = true;
}
void PixelShaderManager::SetDestAlpha()
{
- constants.alpha[3] = bpmem.dstalpha.alpha / 255.0f;
+ constants.alpha[3] = bpmem.dstalpha.alpha;
dirty = true;
}
@@ -188,7 +190,7 @@ void PixelShaderManager::SetTexDims(int texmapid, u32 width, u32 height, u32 wra
void PixelShaderManager::SetZTextureBias()
{
- constants.zbias[1][3] = bpmem.ztex1.bias/16777215.0f;
+ constants.zbias[1][3] = bpmem.ztex1.bias;
dirty = true;
}
@@ -200,37 +202,35 @@ void PixelShaderManager::SetViewportChanged()
void PixelShaderManager::SetIndTexScaleChanged(bool high)
{
- constants.indtexscale[high][0] = bpmem.texscale[high].getScaleS(0);
- constants.indtexscale[high][1] = bpmem.texscale[high].getScaleT(0);
- constants.indtexscale[high][2] = bpmem.texscale[high].getScaleS(1);
- constants.indtexscale[high][3] = bpmem.texscale[high].getScaleT(1);
+ constants.indtexscale[high][0] = bpmem.texscale[high].ss0;
+ constants.indtexscale[high][1] = bpmem.texscale[high].ts0;
+ constants.indtexscale[high][2] = bpmem.texscale[high].ss1;
+ constants.indtexscale[high][3] = bpmem.texscale[high].ts1;
dirty = true;
}
void PixelShaderManager::SetIndMatrixChanged(int matrixidx)
{
int scale = ((u32)bpmem.indmtx[matrixidx].col0.s0 << 0) |
- ((u32)bpmem.indmtx[matrixidx].col1.s1 << 2) |
- ((u32)bpmem.indmtx[matrixidx].col2.s2 << 4);
- float fscale = powf(2.0f, (float)(scale - 17)) / 1024.0f;
+ ((u32)bpmem.indmtx[matrixidx].col1.s1 << 2) |
+ ((u32)bpmem.indmtx[matrixidx].col2.s2 << 4);
// xyz - static matrix
- // TODO w - dynamic matrix scale / 256...... somehow / 4 works better
- // rev 2972 - now using / 256.... verify that this works
- constants.indtexmtx[2*matrixidx][0] = bpmem.indmtx[matrixidx].col0.ma * fscale;
- constants.indtexmtx[2*matrixidx][1] = bpmem.indmtx[matrixidx].col1.mc * fscale;
- constants.indtexmtx[2*matrixidx][2] = bpmem.indmtx[matrixidx].col2.me * fscale;
- constants.indtexmtx[2*matrixidx][3] = fscale * 4.0f;
- constants.indtexmtx[2*matrixidx+1][0] = bpmem.indmtx[matrixidx].col0.mb * fscale;
- constants.indtexmtx[2*matrixidx+1][1] = bpmem.indmtx[matrixidx].col1.md * fscale;
- constants.indtexmtx[2*matrixidx+1][2] = bpmem.indmtx[matrixidx].col2.mf * fscale;
- constants.indtexmtx[2*matrixidx+1][3] = fscale * 4.0f;
+ // w - dynamic matrix scale / 128
+ constants.indtexmtx[2*matrixidx ][0] = bpmem.indmtx[matrixidx].col0.ma;
+ constants.indtexmtx[2*matrixidx ][1] = bpmem.indmtx[matrixidx].col1.mc;
+ constants.indtexmtx[2*matrixidx ][2] = bpmem.indmtx[matrixidx].col2.me;
+ constants.indtexmtx[2*matrixidx ][3] = 17 - scale;
+ constants.indtexmtx[2*matrixidx+1][0] = bpmem.indmtx[matrixidx].col0.mb;
+ constants.indtexmtx[2*matrixidx+1][1] = bpmem.indmtx[matrixidx].col1.md;
+ constants.indtexmtx[2*matrixidx+1][2] = bpmem.indmtx[matrixidx].col2.mf;
+ constants.indtexmtx[2*matrixidx+1][3] = 17 - scale;
dirty = true;
- PRIM_LOG("indmtx%d: scale=%f, mat=(%f %f %f; %f %f %f)\n",
- matrixidx, 1024.0f*fscale,
- bpmem.indmtx[matrixidx].col0.ma * fscale, bpmem.indmtx[matrixidx].col1.mc * fscale, bpmem.indmtx[matrixidx].col2.me * fscale,
- bpmem.indmtx[matrixidx].col0.mb * fscale, bpmem.indmtx[matrixidx].col1.md * fscale, bpmem.indmtx[matrixidx].col2.mf * fscale);
+ PRIM_LOG("indmtx%d: scale=%d, mat=(%d %d %d; %d %d %d)\n",
+ matrixidx, scale,
+ bpmem.indmtx[matrixidx].col0.ma, bpmem.indmtx[matrixidx].col1.mc, bpmem.indmtx[matrixidx].col2.me,
+ bpmem.indmtx[matrixidx].col0.mb, bpmem.indmtx[matrixidx].col1.md, bpmem.indmtx[matrixidx].col2.mf);
}
@@ -242,18 +242,18 @@ void PixelShaderManager::SetZTextureTypeChanged()
constants.zbias[0][0] = 0;
constants.zbias[0][1] = 0;
constants.zbias[0][2] = 0;
- constants.zbias[0][3] = 255.0f/16777215.0f;
+ constants.zbias[0][3] = 1;
break;
case TEV_ZTEX_TYPE_U16:
- constants.zbias[0][0] = 255.0f/16777215.0f;
+ constants.zbias[0][0] = 1;
constants.zbias[0][1] = 0;
constants.zbias[0][2] = 0;
- constants.zbias[0][3] = 65280.0f/16777215.0f;
+ constants.zbias[0][3] = 256;
break;
case TEV_ZTEX_TYPE_U24:
- constants.zbias[0][0] = 16711680.0f/16777215.0f;
- constants.zbias[0][1] = 65280.0f/16777215.0f;
- constants.zbias[0][2] = 255.0f/16777215.0f;
+ constants.zbias[0][0] = 65536;
+ constants.zbias[0][1] = 256;
+ constants.zbias[0][2] = 1;
constants.zbias[0][3] = 0;
break;
default:
@@ -272,9 +272,9 @@ void PixelShaderManager::SetTexCoordChanged(u8 texmapid)
void PixelShaderManager::SetFogColorChanged()
{
- constants.fog[0][0] = bpmem.fog.color.r / 255.0f;
- constants.fog[0][1] = bpmem.fog.color.g / 255.0f;
- constants.fog[0][2] = bpmem.fog.color.b / 255.0f;
+ constants.fogcolor[0] = bpmem.fog.color.r;
+ constants.fogcolor[1] = bpmem.fog.color.g;
+ constants.fogcolor[2] = bpmem.fog.color.b;
dirty = true;
}
@@ -282,17 +282,17 @@ void PixelShaderManager::SetFogParamChanged()
{
if(!g_ActiveConfig.bDisableFog)
{
- constants.fog[1][0] = bpmem.fog.a.GetA();
- constants.fog[1][1] = (float)bpmem.fog.b_magnitude / 0xFFFFFF;
- constants.fog[1][2] = bpmem.fog.c_proj_fsel.GetC();
- constants.fog[1][3] = (float)(1 << bpmem.fog.b_shift);
+ constants.fogf[1][0] = bpmem.fog.a.GetA();
+ constants.fogi[0][1] = bpmem.fog.b_magnitude;
+ constants.fogf[1][2] = bpmem.fog.c_proj_fsel.GetC();
+ constants.fogi[0][3] = bpmem.fog.b_shift;
}
else
{
- constants.fog[1][0] = 0;
- constants.fog[1][1] = 1;
- constants.fog[1][2] = 0;
- constants.fog[1][3] = 1;
+ constants.fogf[1][0] = 0.f;
+ constants.fogi[0][1] = 1;
+ constants.fogf[1][2] = 0.f;
+ constants.fogi[0][3] = 1;
}
dirty = true;
}
@@ -326,10 +326,10 @@ void PixelShaderManager::SetMaterialColorChanged(int index, u32 color)
{
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
- constants.pmaterials[index][0] = ((color >> 24) & 0xFF) / 255.0f;
- constants.pmaterials[index][1] = ((color >> 16) & 0xFF) / 255.0f;
- constants.pmaterials[index][2] = ((color >> 8) & 0xFF) / 255.0f;
- constants.pmaterials[index][3] = ( color & 0xFF) / 255.0f;
+ constants.pmaterials[index][0] = (color >> 24) & 0xFF;
+ constants.pmaterials[index][1] = (color >> 16) & 0xFF;
+ constants.pmaterials[index][2] = (color >> 8) & 0xFF;
+ constants.pmaterials[index][3] = (color) & 0xFF;
dirty = true;
}
}
diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp
index 3e2e02e..358e2ee 100644
--- a/Source/Core/VideoCommon/VertexShaderGen.cpp
+++ b/Source/Core/VideoCommon/VertexShaderGen.cpp
@@ -87,8 +87,9 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
DeclareUniform(out, api_type, C_POSNORMALMATRIX, "float4", I_POSNORMALMATRIX"[6]");
DeclareUniform(out, api_type, C_PROJECTION, "float4", I_PROJECTION"[4]");
- DeclareUniform(out, api_type, C_MATERIALS, "float4", I_MATERIALS"[4]");
- DeclareUniform(out, api_type, C_LIGHTS, "float4", I_LIGHTS"[40]");
+ DeclareUniform(out, api_type, C_MATERIALS, "int4", I_MATERIALS"[4]");
+ DeclareUniform(out, api_type, C_LIGHT_COLORS, "int4", I_LIGHT_COLORS"[8]");
+ DeclareUniform(out, api_type, C_LIGHTS, "float4", I_LIGHTS"[32]");
DeclareUniform(out, api_type, C_TEXMATRICES, "float4", I_TEXMATRICES"[24]");
DeclareUniform(out, api_type, C_TRANSFORMMATRICES, "float4", I_TRANSFORMMATRICES"[64]");
DeclareUniform(out, api_type, C_NORMALMATRICES, "float4", I_NORMALMATRICES"[32]");
@@ -217,7 +218,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
out.Write("o.pos = float4(dot(" I_PROJECTION"[0], pos), dot(" I_PROJECTION"[1], pos), dot(" I_PROJECTION"[2], pos), dot(" I_PROJECTION"[3], pos));\n");
- out.Write("float4 mat, lacc;\n"
+ out.Write("int4 lacc;\n"
"float3 ldir, h;\n"
"float dist, dist2, attn;\n");
@@ -230,7 +231,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");
}
- GenerateLightingShader<T>(out, uid_data.lighting, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_");
+ GenerateLightingShader<T>(out, uid_data.lighting, components, I_MATERIALS, I_LIGHT_COLORS, I_LIGHTS, "color", "o.colors_");
if (xfregs.numChan.numColorChans < 2)
{
diff --git a/Source/Core/VideoCommon/VertexShaderGen.h b/Source/Core/VideoCommon/VertexShaderGen.h
index e705845..43d5273 100644
--- a/Source/Core/VideoCommon/VertexShaderGen.h
+++ b/Source/Core/VideoCommon/VertexShaderGen.h
@@ -33,6 +33,7 @@
#define I_POSNORMALMATRIX "cpnmtx"
#define I_PROJECTION "cproj"
#define I_MATERIALS "cmtrl"
+#define I_LIGHT_COLORS "clight_colors"
#define I_LIGHTS "clights"
#define I_TEXMATRICES "ctexmtx"
#define I_TRANSFORMMATRICES "ctrmtx"
@@ -44,8 +45,9 @@
#define C_POSNORMALMATRIX 0
#define C_PROJECTION (C_POSNORMALMATRIX + 6)
#define C_MATERIALS (C_PROJECTION + 4)
-#define C_LIGHTS (C_MATERIALS + 4)
-#define C_TEXMATRICES (C_LIGHTS + 40)
+#define C_LIGHT_COLORS (C_MATERIALS + 4)
+#define C_LIGHTS (C_LIGHT_COLORS + 8)
+#define C_TEXMATRICES (C_LIGHTS + 32)
#define C_TRANSFORMMATRICES (C_TEXMATRICES + 24)
#define C_NORMALMATRICES (C_TRANSFORMMATRICES + 64)
#define C_POSTTRANSFORMMATRICES (C_NORMALMATRICES + 32)
diff --git a/Source/Core/VideoCommon/VertexShaderManager.cpp b/Source/Core/VideoCommon/VertexShaderManager.cpp
index a9a9f4d..4e7a119 100644
--- a/Source/Core/VideoCommon/VertexShaderManager.cpp
+++ b/Source/Core/VideoCommon/VertexShaderManager.cpp
@@ -252,6 +252,7 @@ void VertexShaderManager::SetConstants()
if (nLightsChanged[0] >= 0)
{
+ // TODO: Outdated comment
// lights don't have a 1 to 1 mapping, the color component needs to be converted to 4 floats
int istart = nLightsChanged[0] / 0x10;
int iend = (nLightsChanged[1] + 15) / 0x10;
@@ -260,10 +261,10 @@ void VertexShaderManager::SetConstants()
for (int i = istart; i < iend; ++i)
{
u32 color = *(const u32*)(xfmemptr + 3);
- constants.lights[5*i][0] = ((color >> 24) & 0xFF) / 255.0f;
- constants.lights[5*i][1] = ((color >> 16) & 0xFF) / 255.0f;
- constants.lights[5*i][2] = ((color >> 8) & 0xFF) / 255.0f;
- constants.lights[5*i][3] = ((color) & 0xFF) / 255.0f;
+ constants.light_colors[i][0] = (color >> 24) & 0xFF;
+ constants.light_colors[i][1] = (color >> 16) & 0xFF;
+ constants.light_colors[i][2] = (color >> 8) & 0xFF;
+ constants.light_colors[i][3] = (color) & 0xFF;
xfmemptr += 4;
for (int j = 0; j < 4; ++j, xfmemptr += 3)
@@ -274,12 +275,12 @@ void VertexShaderManager::SetConstants()
fabs(xfmemptr[2]) < 0.00001f)
{
// dist attenuation, make sure not equal to 0!!!
- constants.lights[5*i+j+1][0] = 0.00001f;
+ constants.lights[4*i+j][0] = 0.00001f;
}
else
- constants.lights[5*i+j+1][0] = xfmemptr[0];
- constants.lights[5*i+j+1][1] = xfmemptr[1];
- constants.lights[5*i+j+1][2] = xfmemptr[2];
+ constants.lights[4*i+j][0] = xfmemptr[0];
+ constants.lights[4*i+j][1] = xfmemptr[1];
+ constants.lights[4*i+j][2] = xfmemptr[2];
}
}
dirty = true;
@@ -294,10 +295,10 @@ void VertexShaderManager::SetConstants()
if (nMaterialsChanged & (1 << i))
{
u32 data = *(xfregs.ambColor + i);
- constants.materials[i][0] = ((data >> 24) & 0xFF) / 255.0f;
- constants.materials[i][1] = ((data >> 16) & 0xFF) / 255.0f;
- constants.materials[i][2] = ((data >> 8) & 0xFF) / 255.0f;
- constants.materials[i][3] = ( data & 0xFF) / 255.0f;
+ constants.materials[i][0] = (data >> 24) & 0xFF;
+ constants.materials[i][1] = (data >> 16) & 0xFF;
+ constants.materials[i][2] = (data >> 8) & 0xFF;
+ constants.materials[i][3] = data & 0xFF;
}
}
@@ -306,10 +307,10 @@ void VertexShaderManager::SetConstants()
if (nMaterialsChanged & (1 << (i + 2)))
{
u32 data = *(xfregs.matColor + i);
- constants.materials[i+2][0] = ((data >> 24) & 0xFF) / 255.0f;
- constants.materials[i+2][1] = ((data >> 16) & 0xFF) / 255.0f;
- constants.materials[i+2][2] = ((data >> 8) & 0xFF) / 255.0f;
- constants.materials[i+2][3] = ( data & 0xFF) / 255.0f;
+ constants.materials[i+2][0] = (data >> 24) & 0xFF;
+ constants.materials[i+2][1] = (data >> 16) & 0xFF;
+ constants.materials[i+2][2] = (data >> 8) & 0xFF;
+ constants.materials[i+2][3] = data & 0xFF;
}
}
dirty = true;