(view as text)
diff --git a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp
index f653dfb..f4ba979 100644
--- a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp
+++ b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp
@@ -44,6 +44,8 @@ static GLuint s_encode_VBO = 0;
static GLuint s_encode_VAO = 0;
static TargetRectangle s_cached_sourceRc;
+static GLuint s_PBO = 0; // for readback with different strides
+
static const char *VProgram =
"ATTRIN vec2 rawpos;\n"
"ATTRIN vec2 tex0;\n"
@@ -185,6 +187,8 @@ void Init()
glBindTexture(GL_TEXTURE_2D, s_dstTexture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, renderBufferWidth, renderBufferHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+
+ glGenBuffers(1, &s_PBO);
CreatePrograms();
}
@@ -196,6 +200,7 @@ void Shutdown()
glDeleteFramebuffers(1, &s_texConvFrameBuffer);
glDeleteBuffers(1, &s_encode_VBO );
glDeleteVertexArrays(1, &s_encode_VAO );
+ glDeleteBuffers(1, &s_PBO);
s_rgbToYuyvProgram.Destroy();
s_yuyvToRgbProgram.Destroy();
@@ -206,6 +211,7 @@ void Shutdown()
s_srcTexture = 0;
s_dstTexture = 0;
s_texConvFrameBuffer = 0;
+ s_PBO = 0;
}
void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc,
@@ -267,25 +273,38 @@ void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc,
// TODO: make this less slow.
int writeStride = bpmem.copyMipMapStrideChannels * 32;
+ int readHeight = readStride / dstWidth / 4; // 4 bytes per pixel
+ int readLoops = dstHeight / readHeight;
- if (writeStride != readStride && toTexture)
+ if (writeStride != readStride && readLoops > 1 && toTexture)
{
// writing to a texture of a different size
-
- int readHeight = readStride / dstWidth;
- readHeight /= 4; // 4 bytes per pixel
-
- int readStart = 0;
- int readLoops = dstHeight / readHeight;
+ // also copy more then one block line, so the different strides matters
+
+ // copy into one pbo first, map this buffer, and then memcpy into gc memory
+ // in this way, we only have one vram->ram transfer, but maybe a bigger
+ // cpu overhead because of the pbo
+
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, s_PBO);
+ glBufferData(GL_PIXEL_PACK_BUFFER, dstWidth*dstHeight*4, NULL, GL_STREAM_READ);
+ glReadPixels(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE, 0);
+ u8* pbo = (u8*)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
+
+ //int readStart = 0;
for (int i = 0; i < readLoops; i++)
{
- glReadPixels(0, readStart, (GLsizei)dstWidth, (GLsizei)readHeight, GL_BGRA, GL_UNSIGNED_BYTE, destAddr);
- readStart += readHeight;
+ memcpy(destAddr, pbo, readStride);
+ pbo += readStride;
destAddr += writeStride;
}
+
+ glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
}
else
+ {
glReadPixels(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE, destAddr);
+ }
GL_REPORT_ERRORD();