/* FreeRDP: A Remote Desktop Protocol Client * Copy operations. * vi:ts=4 sw=4: * * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. * Licensed under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may obtain * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing * permissions and limitations under the License. */ #include #include #include #include #include #include "prim_internal.h" #include "prim_copy.h" #include "../codec/color.h" #include static primitives_t* generic = nullptr; /* ------------------------------------------------------------------------- */ /*static inline BOOL memory_regions_overlap_1d(*/ static BOOL memory_regions_overlap_1d(const BYTE* p1, const BYTE* p2, size_t bytes) { const ULONG_PTR p1m = (const ULONG_PTR)p1; const ULONG_PTR p2m = (const ULONG_PTR)p2; if (p1m <= p2m) { if (p1m + bytes > p2m) return TRUE; } else { if (p2m + bytes > p1m) return TRUE; } /* else */ return FALSE; } /* ------------------------------------------------------------------------- */ /*static inline BOOL memory_regions_overlap_2d( */ static BOOL memory_regions_overlap_2d(const BYTE* p1, int p1Step, int p1Size, const BYTE* p2, int p2Step, int p2Size, int width, int height) { ULONG_PTR p1m = (ULONG_PTR)p1; ULONG_PTR p2m = (ULONG_PTR)p2; if (p1m <= p2m) { ULONG_PTR p1mEnd = p1m + 1ull * (WINPR_ASSERTING_INT_CAST(uint32_t, height - 1)) * WINPR_ASSERTING_INT_CAST(uint32_t, p1Step) + 1ull * WINPR_ASSERTING_INT_CAST(uint32_t, width* p1Size); if (p1mEnd > p2m) return TRUE; } else { ULONG_PTR p2mEnd = p2m + 1ull * (WINPR_ASSERTING_INT_CAST(uintptr_t, height - 1)) * WINPR_ASSERTING_INT_CAST(uintptr_t, p2Step) + 1ull * WINPR_ASSERTING_INT_CAST(uintptr_t, width* p2Size); if (p2mEnd > p1m) return TRUE; } /* else */ return FALSE; } /* ------------------------------------------------------------------------- */ static pstatus_t general_copy_8u(const BYTE* WINPR_RESTRICT pSrc, BYTE* WINPR_RESTRICT pDst, INT32 len) { if (memory_regions_overlap_1d(pSrc, pDst, (size_t)len)) { memmove((void*)pDst, (const void*)pSrc, (size_t)len); } else { memcpy((void*)pDst, (const void*)pSrc, (size_t)len); } return PRIMITIVES_SUCCESS; } /* ------------------------------------------------------------------------- */ /* Copy a block of pixels from one buffer to another. * The addresses are assumed to have been already offset to the upper-left * corners of the source and destination region of interest. */ static pstatus_t general_copy_8u_AC4r(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep, BYTE* WINPR_RESTRICT pDst, INT32 dstStep, INT32 width, INT32 height) { const BYTE* src = pSrc; BYTE* dst = pDst; const size_t rowbytes = WINPR_ASSERTING_INT_CAST(size_t, width) * sizeof(UINT32); if ((width == 0) || (height == 0)) return PRIMITIVES_SUCCESS; if (memory_regions_overlap_2d(pSrc, srcStep, sizeof(UINT32), pDst, dstStep, sizeof(UINT32), width, height)) { do { const pstatus_t rc = generic->copy(src, dst, WINPR_ASSERTING_INT_CAST(int32_t, rowbytes)); if (rc != PRIMITIVES_SUCCESS) return rc; src += srcStep; dst += dstStep; } while (--height); } else { /* TODO: do it in one operation when the rowdata is adjacent. */ do { /* If we find a replacement for memcpy that is consistently * faster, this could be replaced with that. */ memcpy(dst, src, rowbytes); src += srcStep; dst += dstStep; } while (--height); } return PRIMITIVES_SUCCESS; } static inline pstatus_t generic_image_copy_bgr24_bgrx32(BYTE* WINPR_RESTRICT pDstData, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, int64_t srcVMultiplier, int64_t srcVOffset, int64_t dstVMultiplier, int64_t dstVOffset) { const int64_t srcByte = 3; const int64_t dstByte = 4; const UINT32 width = nWidth - nWidth % 8; for (int64_t y = 0; y < nHeight; y++) { const BYTE* WINPR_RESTRICT srcLine = &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset]; BYTE* WINPR_RESTRICT dstLine = &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset]; int64_t x = 0; WINPR_PRAGMA_UNROLL_LOOP for (; x < width; x++) { dstLine[(x + nXDst) * dstByte + 0] = srcLine[(x + nXSrc) * srcByte + 0]; dstLine[(x + nXDst) * dstByte + 1] = srcLine[(x + nXSrc) * srcByte + 1]; dstLine[(x + nXDst) * dstByte + 2] = srcLine[(x + nXSrc) * srcByte + 2]; } for (; x < nWidth; x++) { dstLine[(x + nXDst) * dstByte + 0] = srcLine[(x + nXSrc) * srcByte + 0]; dstLine[(x + nXDst) * dstByte + 1] = srcLine[(x + nXSrc) * srcByte + 1]; dstLine[(x + nXDst) * dstByte + 2] = srcLine[(x + nXSrc) * srcByte + 2]; } } return PRIMITIVES_SUCCESS; } static inline pstatus_t generic_image_copy_bgrx32_bgrx32(BYTE* WINPR_RESTRICT pDstData, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, int64_t srcVMultiplier, int64_t srcVOffset, int64_t dstVMultiplier, int64_t dstVOffset) { const int64_t srcByte = 4; const int64_t dstByte = 4; const UINT32 width = nWidth - nWidth % 8; for (int64_t y = 0; y < nHeight; y++) { const BYTE* WINPR_RESTRICT srcLine = &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset]; BYTE* WINPR_RESTRICT dstLine = &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset]; int64_t x = 0; WINPR_PRAGMA_UNROLL_LOOP for (; x < width; x++) { dstLine[(x + nXDst) * dstByte + 0] = srcLine[(x + nXSrc) * srcByte + 0]; dstLine[(x + nXDst) * dstByte + 1] = srcLine[(x + nXSrc) * srcByte + 1]; dstLine[(x + nXDst) * dstByte + 2] = srcLine[(x + nXSrc) * srcByte + 2]; } for (; x < nWidth; x++) { dstLine[(x + nXDst) * dstByte + 0] = srcLine[(x + nXSrc) * srcByte + 0]; dstLine[(x + nXDst) * dstByte + 1] = srcLine[(x + nXSrc) * srcByte + 1]; dstLine[(x + nXDst) * dstByte + 2] = srcLine[(x + nXSrc) * srcByte + 2]; } } return PRIMITIVES_SUCCESS; } pstatus_t generic_image_copy_no_overlap_convert( BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette, int64_t srcVMultiplier, int64_t srcVOffset, int64_t dstVMultiplier, int64_t dstVOffset) { const int64_t srcByte = FreeRDPGetBytesPerPixel(SrcFormat); const int64_t dstByte = FreeRDPGetBytesPerPixel(DstFormat); const UINT32 width = nWidth - nWidth % 8; for (int64_t y = 0; y < nHeight; y++) { const BYTE* WINPR_RESTRICT srcLine = &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset]; BYTE* WINPR_RESTRICT dstLine = &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset]; int64_t x = 0; // WINPR_PRAGMA_UNROLL_LOOP for (; x < width; x++) { const UINT32 color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat); const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette); if (!FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor)) return -1; } for (; x < nWidth; x++) { const UINT32 color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat); const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette); if (!FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor)) return -1; } } return PRIMITIVES_SUCCESS; } pstatus_t generic_image_copy_no_overlap_memcpy( BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, WINPR_ATTR_UNUSED const gdiPalette* WINPR_RESTRICT palette, int64_t srcVMultiplier, int64_t srcVOffset, int64_t dstVMultiplier, int64_t dstVOffset, WINPR_ATTR_UNUSED UINT32 flags) { const int64_t dstByte = FreeRDPGetBytesPerPixel(DstFormat); const int64_t srcByte = FreeRDPGetBytesPerPixel(SrcFormat); const int64_t copyDstWidth = nWidth * dstByte; const int64_t xSrcOffset = nXSrc * srcByte; const int64_t xDstOffset = nXDst * dstByte; for (int64_t y = 0; y < nHeight; y++) { const BYTE* WINPR_RESTRICT srcLine = &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset]; BYTE* WINPR_RESTRICT dstLine = &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset]; memcpy(&dstLine[xDstOffset], &srcLine[xSrcOffset], WINPR_ASSERTING_INT_CAST(size_t, copyDstWidth)); } return PRIMITIVES_SUCCESS; } static inline pstatus_t generic_image_copy_no_overlap_dst_alpha( BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette, int64_t srcVMultiplier, int64_t srcVOffset, int64_t dstVMultiplier, int64_t dstVOffset) { WINPR_ASSERT(pDstData); WINPR_ASSERT(pSrcData); switch (SrcFormat) { case PIXEL_FORMAT_BGR24: switch (DstFormat) { case PIXEL_FORMAT_BGRX32: case PIXEL_FORMAT_BGRA32: return generic_image_copy_bgr24_bgrx32( pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep, nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset); default: break; } break; case PIXEL_FORMAT_BGRX32: case PIXEL_FORMAT_BGRA32: switch (DstFormat) { case PIXEL_FORMAT_BGRX32: case PIXEL_FORMAT_BGRA32: return generic_image_copy_bgrx32_bgrx32( pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep, nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset); default: break; } break; case PIXEL_FORMAT_RGBX32: case PIXEL_FORMAT_RGBA32: switch (DstFormat) { case PIXEL_FORMAT_RGBX32: case PIXEL_FORMAT_RGBA32: return generic_image_copy_bgrx32_bgrx32( pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep, nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset); case PIXEL_FORMAT_RGB24: return generic_image_copy_bgr24_bgrx32( pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep, nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset); default: break; } break; default: break; } return generic_image_copy_no_overlap_convert( pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat, nSrcStep, nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset); } static inline pstatus_t generic_image_copy_no_overlap_no_alpha( BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette, int64_t srcVMultiplier, int64_t srcVOffset, int64_t dstVMultiplier, int64_t dstVOffset, UINT32 flags) { if (FreeRDPAreColorFormatsEqualNoAlpha(SrcFormat, DstFormat)) return generic_image_copy_no_overlap_memcpy(pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat, nSrcStep, nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset, flags); else return generic_image_copy_no_overlap_convert(pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat, nSrcStep, nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset); } static pstatus_t generic_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat, UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette, UINT32 flags) { const BOOL vSrcVFlip = (flags & FREERDP_FLIP_VERTICAL) != 0; int64_t srcVOffset = 0; int64_t srcVMultiplier = 1; int64_t dstVOffset = 0; int64_t dstVMultiplier = 1; if ((nWidth == 0) || (nHeight == 0)) return PRIMITIVES_SUCCESS; if ((nHeight > INT32_MAX) || (nWidth > INT32_MAX)) return -1; if (!pDstData || !pSrcData) return -1; if (nDstStep == 0) nDstStep = nWidth * FreeRDPGetBytesPerPixel(DstFormat); if (nSrcStep == 0) nSrcStep = nWidth * FreeRDPGetBytesPerPixel(SrcFormat); if (vSrcVFlip) { srcVOffset = (nHeight - 1ll) * nSrcStep; srcVMultiplier = -1; } if (((flags & FREERDP_KEEP_DST_ALPHA) != 0) && FreeRDPColorHasAlpha(DstFormat)) return generic_image_copy_no_overlap_dst_alpha( pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat, nSrcStep, nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset); else return generic_image_copy_no_overlap_no_alpha( pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat, nSrcStep, nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset, flags); return PRIMITIVES_SUCCESS; } /* ------------------------------------------------------------------------- */ void primitives_init_copy(primitives_t* WINPR_RESTRICT prims) { /* Start with the default. */ prims->copy_8u = general_copy_8u; prims->copy_8u_AC4r = general_copy_8u_AC4r; prims->copy = WINPR_FUNC_PTR_CAST(prims->copy_8u, fn_copy_t); prims->copy_no_overlap = generic_image_copy_no_overlap; } void primitives_init_copy_opt(primitives_t* WINPR_RESTRICT prims) { primitives_init_copy(prims); primitives_init_copy_sse41(prims); #if defined(WITH_AVX2) primitives_init_copy_avx2(prims); #endif }