diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp index 76e235e..09f9097 100644 --- a/Source/Core/Common/x64Emitter.cpp +++ b/Source/Core/Common/x64Emitter.cpp @@ -204,7 +204,7 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, { // Oh, RIP addressing. _offsetOrBaseReg = 5; - emit->WriteModRM(0, _operandReg&7, 5); + emit->WriteModRM(0, _operandReg, _offsetOrBaseReg); //TODO : add some checks #ifdef _M_X64 u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes; @@ -328,7 +328,6 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, } } - // W = operand extended width (1 if 64-bit) // R = register# upper bit // X = scale amnt upper bit @@ -1510,6 +1509,24 @@ void XEmitter::FWAIT() Write8(0x9B); } +// TODO: make this more generic +void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, OpArg arg) +{ + int mf = 0; + switch (bits) { + case 32: mf = 0; break; + case 64: mf = 2; break; + default: _assert_msg_(DYNA_REC, 0, "WriteFloatLoadStore: bits is not 32 or 64"); + } + Write8(0xd9 | (mf << 1)); + // x87 instructions use the reg field of the ModR/M byte as opcode: + arg.WriteRest(this, 0, (X64Reg) op); +} + +void XEmitter::FLD(int bits, OpArg src) {WriteFloatLoadStore(bits, floatLD, src);} +void XEmitter::FST(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatST, dest);} +void XEmitter::FSTP(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatSTP, dest);} + void XEmitter::RTDSC() { Write8(0x0F); Write8(0x31); } // helper routines for setting pointers diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index 87e76ef..11581d0 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -98,6 +98,13 @@ enum NormalOp { nrmXCHG, }; +enum FloatOp { + floatLD, + floatUnused, + floatST, + floatSTP, +}; + class XEmitter; // RIP addressing does not benefit from micro op fusion on Core arch @@ -116,6 +123,7 @@ struct OpArg void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const; void WriteVex(XEmitter* emit, int size, int packed, Gen::X64Reg regOp1, X64Reg regOp2) const; void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=(X64Reg)0xFF, bool warn_64bit_offset = true) const; + void WriteFloatModRM(XEmitter *emit, FloatOp op); void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits); // This one is public - must be written to u64 offset; // use RIP-relative as much as possible - 64-bit immediates are not available. @@ -245,6 +253,7 @@ private: void WriteSSEOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0); void WriteAVXOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0); void WriteAVXOp(int size, u8 sseOp, bool packed, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); + void WriteFloatLoadStore(int bits, FloatOp op, OpArg arg); void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2); protected: @@ -425,6 +434,10 @@ public: void REP(); void REPNE(); + // x87 + void FLD(int bits, OpArg src); + void FST(int bits, OpArg dest); + void FSTP(int bits, OpArg dest); void FWAIT(); // SSE/SSE2: Floating point arithmetic diff --git a/Source/Core/Common/x64FPURoundMode.cpp b/Source/Core/Common/x64FPURoundMode.cpp index 34438d1..f46c600 100644 --- a/Source/Core/Common/x64FPURoundMode.cpp +++ b/Source/Core/Common/x64FPURoundMode.cpp @@ -101,8 +101,7 @@ namespace FPURoundMode FTZ, // flush-to-zero only FTZ | DAZ, // flush-to-zero and denormals-are-zero (may not be supported) }; - // FIXME: proper (?) non-IEEE mode emulation causes issues in lots of games - if (nonIEEEMode && false) + if (nonIEEEMode) { csr |= denormalLUT[cpu_info.bFlushToZero]; } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index bc056e6..ef39a9e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -49,8 +49,9 @@ void Jit64::lfs(UGeckoInstruction inst) MOV(32, M(&temp32), R(EAX)); fpr.Lock(d); fpr.BindToRegister(d, false); - CVTSS2SD(fpr.RX(d), M(&temp32)); - MOVDDUP(fpr.RX(d), fpr.R(d)); + FLD(32, M(&temp32)); + FSTP(64, M(&temp64)); + MOVDDUP(fpr.RX(d), M(&temp64)); MEMCHECK_END @@ -235,13 +236,18 @@ void Jit64::stfs(UGeckoInstruction inst) return; } + fpr.BindToRegister(s, true, false); + MOVSD(M(&temp64), fpr.RX(s)); + FLD(64, M(&temp64)); + FSTP(32, M(&temp32)); + MOVSS(XMM0, M(&temp32)); + if (gpr.R(a).IsImm()) { u32 addr = (u32)(gpr.R(a).offset + offset); if (Memory::IsRAMAddress(addr)) { if (cpu_info.bSSSE3) { - CVTSD2SS(XMM0, fpr.R(s)); PSHUFB(XMM0, M((void *)bswapShuffle1x4)); WriteFloatToConstRamAddress(XMM0, addr); return; @@ -250,7 +256,6 @@ void Jit64::stfs(UGeckoInstruction inst) else if (addr == 0xCC008000) { // Float directly to write gather pipe! Fun! - CVTSD2SS(XMM0, fpr.R(s)); CALL((void*)asm_routines.fifoDirectWriteFloat); // TODO js.fifoBytesThisBlock += 4; @@ -275,7 +280,6 @@ void Jit64::stfs(UGeckoInstruction inst) MEMCHECK_END } - CVTSD2SS(XMM0, fpr.R(s)); SafeWriteFloatToReg(XMM0, ABI_PARAM2, RegistersInUse()); gpr.UnlockAll(); gpr.UnlockAllX(); @@ -294,8 +298,11 @@ void Jit64::stfsx(UGeckoInstruction inst) MOV(32, R(ABI_PARAM1), gpr.R(inst.RB)); if (inst.RA) ADD(32, R(ABI_PARAM1), gpr.R(inst.RA)); - CVTSD2SS(XMM0, fpr.R(inst.RS)); - MOVD_xmm(R(EAX), XMM0); + fpr.BindToRegister(inst.RS, true, false); + MOVSD(M(&temp64), fpr.RX(inst.RS)); + FLD(64, M(&temp64)); + FSTP(32, M(&temp32)); + MOV(32, R(EAX), M(&temp32)); SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse()); gpr.UnlockAllX(); @@ -336,7 +343,9 @@ void Jit64::lfsx(UGeckoInstruction inst) MEMCHECK_START MOV(32, M(&temp32), R(EAX)); - CVTSS2SD(XMM0, M(&temp32)); + FLD(32, M(&temp32)); + FSTP(64, M(&temp64)); + MOVSD(XMM0, M(&temp64)); fpr.Lock(inst.RS); fpr.BindToRegister(inst.RS, false, true); MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0));