(view as text)
diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp
index 76e235e..09f9097 100644
--- a/Source/Core/Common/x64Emitter.cpp
+++ b/Source/Core/Common/x64Emitter.cpp
@@ -204,7 +204,7 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg,
{
// Oh, RIP addressing.
_offsetOrBaseReg = 5;
- emit->WriteModRM(0, _operandReg&7, 5);
+ emit->WriteModRM(0, _operandReg, _offsetOrBaseReg);
//TODO : add some checks
#ifdef _M_X64
u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes;
@@ -328,7 +328,6 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg,
}
}
-
// W = operand extended width (1 if 64-bit)
// R = register# upper bit
// X = scale amnt upper bit
@@ -1510,6 +1509,24 @@ void XEmitter::FWAIT()
Write8(0x9B);
}
+// TODO: make this more generic
+void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, OpArg arg)
+{
+ int mf = 0;
+ switch (bits) {
+ case 32: mf = 0; break;
+ case 64: mf = 2; break;
+ default: _assert_msg_(DYNA_REC, 0, "WriteFloatLoadStore: bits is not 32 or 64");
+ }
+ Write8(0xd9 | (mf << 1));
+ // x87 instructions use the reg field of the ModR/M byte as opcode:
+ arg.WriteRest(this, 0, (X64Reg) op);
+}
+
+void XEmitter::FLD(int bits, OpArg src) {WriteFloatLoadStore(bits, floatLD, src);}
+void XEmitter::FST(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatST, dest);}
+void XEmitter::FSTP(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatSTP, dest);}
+
void XEmitter::RTDSC() { Write8(0x0F); Write8(0x31); }
// helper routines for setting pointers
diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h
index 87e76ef..11581d0 100644
--- a/Source/Core/Common/x64Emitter.h
+++ b/Source/Core/Common/x64Emitter.h
@@ -98,6 +98,13 @@ enum NormalOp {
nrmXCHG,
};
+enum FloatOp {
+ floatLD,
+ floatUnused,
+ floatST,
+ floatSTP,
+};
+
class XEmitter;
// RIP addressing does not benefit from micro op fusion on Core arch
@@ -116,6 +123,7 @@ struct OpArg
void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const;
void WriteVex(XEmitter* emit, int size, int packed, Gen::X64Reg regOp1, X64Reg regOp2) const;
void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=(X64Reg)0xFF, bool warn_64bit_offset = true) const;
+ void WriteFloatModRM(XEmitter *emit, FloatOp op);
void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits);
// This one is public - must be written to
u64 offset; // use RIP-relative as much as possible - 64-bit immediates are not available.
@@ -245,6 +253,7 @@ private:
void WriteSSEOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0);
void WriteAVXOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0);
void WriteAVXOp(int size, u8 sseOp, bool packed, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
+ void WriteFloatLoadStore(int bits, FloatOp op, OpArg arg);
void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2);
protected:
@@ -425,6 +434,10 @@ public:
void REP();
void REPNE();
+ // x87
+ void FLD(int bits, OpArg src);
+ void FST(int bits, OpArg dest);
+ void FSTP(int bits, OpArg dest);
void FWAIT();
// SSE/SSE2: Floating point arithmetic
diff --git a/Source/Core/Common/x64FPURoundMode.cpp b/Source/Core/Common/x64FPURoundMode.cpp
index 34438d1..f46c600 100644
--- a/Source/Core/Common/x64FPURoundMode.cpp
+++ b/Source/Core/Common/x64FPURoundMode.cpp
@@ -101,8 +101,7 @@ namespace FPURoundMode
FTZ, // flush-to-zero only
FTZ | DAZ, // flush-to-zero and denormals-are-zero (may not be supported)
};
- // FIXME: proper (?) non-IEEE mode emulation causes issues in lots of games
- if (nonIEEEMode && false)
+ if (nonIEEEMode)
{
csr |= denormalLUT[cpu_info.bFlushToZero];
}
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp
index bc056e6..ef39a9e 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp
@@ -49,8 +49,9 @@ void Jit64::lfs(UGeckoInstruction inst)
MOV(32, M(&temp32), R(EAX));
fpr.Lock(d);
fpr.BindToRegister(d, false);
- CVTSS2SD(fpr.RX(d), M(&temp32));
- MOVDDUP(fpr.RX(d), fpr.R(d));
+ FLD(32, M(&temp32));
+ FSTP(64, M(&temp64));
+ MOVDDUP(fpr.RX(d), M(&temp64));
MEMCHECK_END
@@ -235,13 +236,18 @@ void Jit64::stfs(UGeckoInstruction inst)
return;
}
+ fpr.BindToRegister(s, true, false);
+ MOVSD(M(&temp64), fpr.RX(s));
+ FLD(64, M(&temp64));
+ FSTP(32, M(&temp32));
+ MOVSS(XMM0, M(&temp32));
+
if (gpr.R(a).IsImm())
{
u32 addr = (u32)(gpr.R(a).offset + offset);
if (Memory::IsRAMAddress(addr))
{
if (cpu_info.bSSSE3) {
- CVTSD2SS(XMM0, fpr.R(s));
PSHUFB(XMM0, M((void *)bswapShuffle1x4));
WriteFloatToConstRamAddress(XMM0, addr);
return;
@@ -250,7 +256,6 @@ void Jit64::stfs(UGeckoInstruction inst)
else if (addr == 0xCC008000)
{
// Float directly to write gather pipe! Fun!
- CVTSD2SS(XMM0, fpr.R(s));
CALL((void*)asm_routines.fifoDirectWriteFloat);
// TODO
js.fifoBytesThisBlock += 4;
@@ -275,7 +280,6 @@ void Jit64::stfs(UGeckoInstruction inst)
MEMCHECK_END
}
- CVTSD2SS(XMM0, fpr.R(s));
SafeWriteFloatToReg(XMM0, ABI_PARAM2, RegistersInUse());
gpr.UnlockAll();
gpr.UnlockAllX();
@@ -294,8 +298,11 @@ void Jit64::stfsx(UGeckoInstruction inst)
MOV(32, R(ABI_PARAM1), gpr.R(inst.RB));
if (inst.RA)
ADD(32, R(ABI_PARAM1), gpr.R(inst.RA));
- CVTSD2SS(XMM0, fpr.R(inst.RS));
- MOVD_xmm(R(EAX), XMM0);
+ fpr.BindToRegister(inst.RS, true, false);
+ MOVSD(M(&temp64), fpr.RX(inst.RS));
+ FLD(64, M(&temp64));
+ FSTP(32, M(&temp32));
+ MOV(32, R(EAX), M(&temp32));
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse());
gpr.UnlockAllX();
@@ -336,7 +343,9 @@ void Jit64::lfsx(UGeckoInstruction inst)
MEMCHECK_START
MOV(32, M(&temp32), R(EAX));
- CVTSS2SD(XMM0, M(&temp32));
+ FLD(32, M(&temp32));
+ FSTP(64, M(&temp64));
+ MOVSD(XMM0, M(&temp64));
fpr.Lock(inst.RS);
fpr.BindToRegister(inst.RS, false, true);
MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0));