(view as text)
diff --git a/Source/Core/Common/x64ABI.cpp b/Source/Core/Common/x64ABI.cpp
index ecbbfa0..867d190 100644
--- a/Source/Core/Common/x64ABI.cpp
+++ b/Source/Core/Common/x64ABI.cpp
@@ -176,6 +176,14 @@ void XEmitter::ABI_CallFunctionCC(void *func, u32 param1, u32 param2) {
 	ABI_RestoreStack(2 * 4);
 }
 
+void XEmitter::ABI_CallFunctionCP(void *func, u32 param1, void *param2) {
+	ABI_AlignStack(2 * 4);
+	PUSH(32, Imm32((u32)param2));
+	PUSH(32, Imm32(param1));
+	CALL(func);
+	ABI_RestoreStack(2 * 4);
+}
+
 void XEmitter::ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3) {
 	ABI_AlignStack(3 * 4);
 	PUSH(32, Imm32(param3));
@@ -204,6 +212,14 @@ void XEmitter::ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2,u32 param
 	ABI_RestoreStack(4 * 4);
 }
 
+void XEmitter::ABI_CallFunctionPC(void *func, void *param1, u32 param2) {
+	ABI_AlignStack(3 * 4);
+	PUSH(32, Imm32(param2));
+	PUSH(32, Imm32((u32)param1));
+	CALL(func);
+	ABI_RestoreStack(3 * 4);
+}
+
 void XEmitter::ABI_CallFunctionPPC(void *func, void *param1, void *param2,u32 param3) {
 	ABI_AlignStack(3 * 4);
 	PUSH(32, Imm32(param3));
@@ -344,6 +360,22 @@ void XEmitter::ABI_CallFunctionCC(void *func, u32 param1, u32 param2) {
 	ABI_RestoreStack(0);
 }
 
+void XEmitter::ABI_CallFunctionCP(void *func, u32 param1, void *param2) {
+	ABI_AlignStack(0);
+	MOV(32, R(ABI_PARAM1), Imm32(param1));
+	MOV(64, R(ABI_PARAM2), Imm64((u64)param2));
+	u64 distance = u64(func) - (u64(code) + 5);
+	if (distance >= 0x0000000080000000ULL
+	 && distance <  0xFFFFFFFF80000000ULL) {
+		// Far call
+		MOV(64, R(RAX), Imm64((u64)func));
+		CALLptr(R(RAX));
+	} else {
+		CALL(func);
+	}
+	ABI_RestoreStack(0);
+}
+
 void XEmitter::ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3) {
 	ABI_AlignStack(0);
 	MOV(32, R(ABI_PARAM1), Imm32(param1));
@@ -396,6 +428,22 @@ void XEmitter::ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2, u32 para
 	ABI_RestoreStack(0);
 }
 
+void XEmitter::ABI_CallFunctionPC(void *func, void *param1, u32 param2) {
+	ABI_AlignStack(0);
+	MOV(64, R(ABI_PARAM1), Imm64((u64)param1));
+	MOV(32, R(ABI_PARAM2), Imm32(param2));
+	u64 distance = u64(func) - (u64(code) + 5);
+	if (distance >= 0x0000000080000000ULL
+	 && distance <  0xFFFFFFFF80000000ULL) {
+		// Far call
+		MOV(64, R(RAX), Imm64((u64)func));
+		CALLptr(R(RAX));
+	} else {
+		CALL(func);
+	}
+	ABI_RestoreStack(0);
+}
+
 void XEmitter::ABI_CallFunctionPPC(void *func, void *param1, void *param2, u32 param3) {
 	ABI_AlignStack(0);
 	MOV(64, R(ABI_PARAM1), Imm64((u64)param1));
diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h
index 368939e..2cc9267 100644
--- a/Source/Core/Common/x64Emitter.h
+++ b/Source/Core/Common/x64Emitter.h
@@ -171,7 +171,7 @@ private:
 	u16 indexReg;
 };
 
-inline OpArg M(void *ptr)       {return OpArg((u64)ptr, (int)SCALE_RIP);}
+inline OpArg M(const void *ptr) {return OpArg((u64)ptr, (int)SCALE_RIP);}
 inline OpArg R(X64Reg value)    {return OpArg(0, SCALE_NONE, value);}
 inline OpArg MatR(X64Reg value) {return OpArg(0, SCALE_ATREG, value);}
 inline OpArg MDisp(X64Reg value, int offset) {
@@ -194,9 +194,9 @@ inline OpArg Imm16(u16 imm) {return OpArg(imm, SCALE_IMM16);} //rarely used
 inline OpArg Imm32(u32 imm) {return OpArg(imm, SCALE_IMM32);}
 inline OpArg Imm64(u64 imm) {return OpArg(imm, SCALE_IMM64);}
 #ifdef _M_X64
-inline OpArg ImmPtr(void* imm) {return Imm64((u64)imm);}
+inline OpArg ImmPtr(const void* imm) {return Imm64((u64)imm);}
 #else
-inline OpArg ImmPtr(void* imm) {return Imm32((u32)imm);}
+inline OpArg ImmPtr(const void* imm) {return Imm32((u32)imm);}
 #endif
 inline u32 PtrOffset(void* ptr, void* base) {
 #ifdef _M_X64
@@ -671,9 +671,11 @@ public:
 	// These will destroy the 1 or 2 first "parameter regs".
 	void ABI_CallFunctionC(void *func, u32 param1);
 	void ABI_CallFunctionCC(void *func, u32 param1, u32 param2);
+	void ABI_CallFunctionCP(void *func, u32 param1, void *param2);
 	void ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3);
 	void ABI_CallFunctionCCP(void *func, u32 param1, u32 param2, void *param3);
 	void ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2,u32 param3, void *param4);
+	void ABI_CallFunctionPC(void *func, void *param1, u32 param2);
 	void ABI_CallFunctionPPC(void *func, void *param1, void *param2,u32 param3);
 	void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2);
 	void ABI_CallFunctionA(void *func, const Gen::OpArg &arg1);
@@ -737,6 +739,25 @@ public:
 	#define DECLARE_IMPORT(x) extern "C" void *__imp_##x
 
 #endif
+
+	// Utility to generate a call to a std::function object.
+	//
+	// Unfortunately, calling operator() directly is undefined behavior in C++
+	// (this method might be a thunk in the case of multi-inheritance) so we
+	// have to go through a trampoline function.
+	template <typename T, typename... Args>
+	static void CallLambdaTrampoline(const std::function<T(Args...)>* f,
+	                                 Args... args)
+	{
+		(*f)(args...);
+	}
+
+	template <typename T, typename... Args>
+	void ABI_CallLambdaC(const std::function<T(Args...)>* f, u32 p1)
+	{
+		ABI_CallFunctionPC((void*)&XEmitter::CallLambdaTrampoline<T, Args...>,
+		                   const_cast<void*>((const void*)f), p1);
+	}
 };  // class XEmitter
 
 
diff --git a/Source/Core/Core/HW/MMIO.cpp b/Source/Core/Core/HW/MMIO.cpp
index 39ae318..f8cbe9e 100644
--- a/Source/Core/Core/HW/MMIO.cpp
+++ b/Source/Core/Core/HW/MMIO.cpp
@@ -148,12 +148,12 @@ public:
 
 	virtual void AcceptReadVisitor(ReadHandlingMethodVisitor<T>& v) const
 	{
-		v.VisitComplex(read_lambda_);
+		v.VisitComplex(&read_lambda_);
 	}
 
 	virtual void AcceptWriteVisitor(WriteHandlingMethodVisitor<T>& v) const
 	{
-		v.VisitComplex(write_lambda_);
+		v.VisitComplex(&write_lambda_);
 	}
 
 private:
@@ -313,9 +313,9 @@ void ReadHandler<T>::ResetMethod(ReadHandlingMethod<T>* method)
 			ret = [addr, mask](u32) { return *addr & mask; };
 		}
 
-		virtual void VisitComplex(std::function<T(u32)> lambda)
+		virtual void VisitComplex(const std::function<T(u32)>* lambda)
 		{
-			ret = lambda;
+			ret = *lambda;
 		}
 	};
 
@@ -367,9 +367,9 @@ void WriteHandler<T>::ResetMethod(WriteHandlingMethod<T>* method)
 			ret = [ptr, mask](u32, T val) { *ptr = val & mask; };
 		}
 
-		virtual void VisitComplex(std::function<void(u32, T)> lambda)
+		virtual void VisitComplex(const std::function<void(u32, T)>* lambda)
 		{
-			ret = lambda;
+			ret = *lambda;
 		}
 	};
 
diff --git a/Source/Core/Core/HW/MMIO.h b/Source/Core/Core/HW/MMIO.h
index 447ae52..c59a726 100644
--- a/Source/Core/Core/HW/MMIO.h
+++ b/Source/Core/Core/HW/MMIO.h
@@ -31,6 +31,19 @@ enum Block
 const u32 BLOCK_SIZE = 0x10000;
 const u32 NUM_MMIOS = NUM_BLOCKS * BLOCK_SIZE;
 
+// Checks if a given physical memory address refers to the MMIO address range.
+// In practice, most of games use a virtual memory mapping (via BATs set in the
+// IPL) that matches the physical memory mapping for MMIOs.
+//
+// We have a special exception here for FIFO writes: these are handled via a
+// different mechanism and should not go through the normal MMIO access
+// interface.
+inline bool IsMMIOAddress(u32 address)
+{
+	return ((address & 0xC0000000) == 0xC0000000) &&
+	       ((address & 0x0000FFFF) != 0x00008000);
+}
+
 // Compute the internal unique ID for a given MMIO address. This ID is computed
 // from a very simple formula: (block_id << 16) | lower_16_bits(address).
 //
diff --git a/Source/Core/Core/HW/MMIOHandlers.h b/Source/Core/Core/HW/MMIOHandlers.h
index b0f41a4..7afe9f6 100644
--- a/Source/Core/Core/HW/MMIOHandlers.h
+++ b/Source/Core/Core/HW/MMIOHandlers.h
@@ -88,7 +88,7 @@ class ReadHandlingMethodVisitor
 public:
 	virtual void VisitConstant(T value) = 0;
 	virtual void VisitDirect(const T* addr, u32 mask) = 0;
-	virtual void VisitComplex(std::function<T(u32)> lambda) = 0;
+	virtual void VisitComplex(const std::function<T(u32)>* lambda) = 0;
 };
 template <typename T>
 class WriteHandlingMethodVisitor
@@ -96,7 +96,7 @@ class WriteHandlingMethodVisitor
 public:
 	virtual void VisitNop() = 0;
 	virtual void VisitDirect(T* addr, u32 mask) = 0;
-	virtual void VisitComplex(std::function<void(u32, T)> lambda) = 0;
+	virtual void VisitComplex(const std::function<void(u32, T)>* lambda) = 0;
 };
 
 // These classes are INTERNAL. Do not use outside of the MMIO implementation
diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp
index cc1faca..aa75f53 100644
--- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp
+++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp
@@ -5,6 +5,7 @@
 #include "Common/Common.h"
 #include "Common/CPUDetect.h"
 
+#include "Core/HW/MMIO.h"
 #include "Core/PowerPC/JitCommon/Jit_Util.h"
 #include "Core/PowerPC/JitCommon/JitBase.h"
 
@@ -117,6 +118,121 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac
 	return result;
 }
 
+// Visitor that generates code to read a MMIO value to EAX.
+class MMIOReadCodeGeneratorBase
+{
+public:
+	MMIOReadCodeGeneratorBase(Gen::XCodeBlock* code, u32 registers_in_use,
+	                          Gen::X64Reg dst_reg, u32 address, bool sign_extend)
+		: code_(code), registers_in_use_(registers_in_use), dst_reg_(dst_reg),
+		  address_(address), sign_extend_(sign_extend)
+	{
+	}
+
+protected:
+	// Generates code to load a constant to the destination register. In
+	// practice it would be better to avoid using a register for this, but it
+	// would require refactoring a lot of JIT code.
+	void LoadConstantToReg(int sbits, u32 value)
+	{
+		if (sign_extend_)
+		{
+			u32 sign = !!(value & (1 << (sbits - 1)));
+			value |= sign * ((0xFFFFFFFF >> sbits) << sbits);
+		}
+		code_->MOV(32, R(dst_reg_), Gen::Imm32(value));
+	}
+
+	// Generate the proper MOV instruction depending on whether the read should
+	// be sign extended or zero extended.
+	void MoveOpArgToReg(int sbits, Gen::OpArg arg)
+	{
+		if (sign_extend_)
+			code_->MOVSX(32, sbits, dst_reg_, arg);
+		else
+			code_->MOVZX(32, sbits, dst_reg_, arg);
+	}
+
+	void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask)
+	{
+		code_->MOV(64, R(RAX), ImmPtr(ptr));
+		// If we do not need to mask, we can do the sign extend while loading
+		// from memory. If masking is required, we have to first zero extend,
+		// then mask, then sign extend if needed (1 instr vs. 2/3).
+		u32 all_ones = (1ULL << sbits) - 1;
+		if ((all_ones & mask) == all_ones)
+			MoveOpArgToReg(sbits, MDisp(RAX, 0));
+		else
+		{
+			code_->MOVZX(32, sbits, dst_reg_, MDisp(RAX, 0));
+			code_->AND(32, R(dst_reg_), Imm32(mask));
+			if (sign_extend_)
+				code_->MOVSX(32, sbits, dst_reg_, R(dst_reg_));
+		}
+	}
+
+	template <typename T>
+	void CallLambda(int sbits, const std::function<T(u32)>* lambda)
+	{
+		// TODO(delroth): make portable by abstracting the C++ method call.
+		code_->ABI_PushRegistersAndAdjustStack(registers_in_use_, false);
+		code_->ABI_CallLambdaC(lambda, address_);
+		code_->ABI_PopRegistersAndAdjustStack(registers_in_use_, false);
+		MoveOpArgToReg(sbits, R(EAX));
+	}
+
+	Gen::XCodeBlock* code_;
+	u32 registers_in_use_;
+	Gen::X64Reg dst_reg_;
+	u32 address_;
+	bool sign_extend_;
+};
+
+#define MMIO_READ_CODE_GENERATOR_U(Bits) \
+	class MMIOReadCodeGenerator##Bits : public MMIOReadCodeGeneratorBase, \
+	                                    public MMIO::ReadHandlingMethodVisitor<u##Bits> \
+	{ \
+	public: \
+		using MMIOReadCodeGeneratorBase::MMIOReadCodeGeneratorBase; \
+	\
+		virtual void VisitConstant(u##Bits value) { LoadConstantToReg(Bits, value); } \
+		virtual void VisitDirect(const u##Bits* addr, u32 mask) { LoadAddrMaskToReg(Bits, addr, mask); } \
+		virtual void VisitComplex(const std::function<u##Bits(u32)>* lambda) { CallLambda(Bits, lambda); } \
+	}
+MMIO_READ_CODE_GENERATOR_U(8);
+MMIO_READ_CODE_GENERATOR_U(16);
+MMIO_READ_CODE_GENERATOR_U(32);
+
+void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value,
+                                 u32 registers_in_use, u32 address,
+                                 int access_size, bool sign_extend)
+{
+	switch (access_size)
+	{
+	case 8:
+		{
+			MMIOReadCodeGenerator8 gen(this, registers_in_use, reg_value,
+			                           address, sign_extend);
+			mmio->GetHandlerForRead8(address).Visit(gen);
+			break;
+		}
+	case 16:
+		{
+			MMIOReadCodeGenerator16 gen(this, registers_in_use, reg_value,
+			                            address, sign_extend);
+			mmio->GetHandlerForRead16(address).Visit(gen);
+			break;
+		}
+	case 32:
+		{
+			MMIOReadCodeGenerator32 gen(this, registers_in_use, reg_value,
+			                            address, sign_extend);
+			mmio->GetHandlerForRead32(address).Visit(gen);
+			break;
+		}
+	}
+}
+
 void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags)
 {
 	if (!jit->js.memcheck)
@@ -156,10 +272,25 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
 		if (opAddress.IsImm())
 		{
 			u32 address = (u32)opAddress.offset + offset;
+
+			// If we know the address, try the following loading methods in
+			// order:
+			//
+			// 1. If the address is in RAM, generate an unsafe load (directly
+			//    access the RAM buffer and load from there).
+			// 2. If the address is in the MMIO range, find the appropriate
+			//    MMIO handler and generate the code to load using the handler.
+			// 3. Otherwise, just generate a call to Memory::Read_* with the
+			//    address hardcoded.
 			if ((address & mem_mask) == 0)
 			{
 				UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
 			}
+			else if (!Core::g_CoreStartupParameter.bMMU && MMIO::IsMMIOAddress(address))
+			{
+				MMIOLoadToReg(Memory::mmio_mapping, reg_value, registersInUse,
+				              address, accessSize, signExtend);
+			}
 			else
 			{
 				ABI_PushRegistersAndAdjustStack(registersInUse, false);
diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h
index 52d0b12..cb67a2e 100644
--- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h
+++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h
@@ -8,6 +8,8 @@
 
 #include "Common/x64Emitter.h"
 
+namespace MMIO { class Mapping; }
+
 #define MEMCHECK_START \
 	FixupBranch memException; \
 	if (jit->js.memcheck) \
@@ -28,6 +30,11 @@ public:
 	// these return the address of the MOV, for backpatching
 	u8 *UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true);
 	u8 *UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend);
+
+	// Generate a load/write from the MMIO handler for a given address. Only
+	// call for known addresses in MMIO range (MMIO::IsMMIOAddress).
+	void MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, u32 registers_in_use, u32 address, int access_size, bool sign_extend);
+
 	enum SafeLoadStoreFlags
 	{
 		SAFE_LOADSTORE_NO_SWAP = 1,