[FFmpeg-devel] [PATCH v06 2/5] fbtile helperRoutines cpu based framebuffer detiling
hanishkvc
hanishkvc at gmail.com
Sat Jul 4 16:17:14 EEST 2020
Add helper routines which can be used to detile tiled framebuffer
layouts into a linear layout, using the cpu.
Currently it supports Legacy Intel Tile-X, Legacy Intel Tile-Y and
Newer Intel Tile-Yf tiled layouts.
Currently supported pixel format is 32bit RGB.
It also contains detile_generic logic, which can be easily configured
to support different kinds of tiling layouts, at the expense of some
processing speed, compared to developing a targeted detiling logic.
---
libavutil/Makefile | 2 +
libavutil/fbtile.c | 441 +++++++++++++++++++++++++++++++++++++++++++++
libavutil/fbtile.h | 228 +++++++++++++++++++++++
3 files changed, 671 insertions(+)
create mode 100644 libavutil/fbtile.c
create mode 100644 libavutil/fbtile.h
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 9b08372eb2..9b58ac5980 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -84,6 +84,7 @@ HEADERS = adler32.h \
xtea.h \
tea.h \
tx.h \
+ fbtile.h \
HEADERS-$(CONFIG_LZO) += lzo.h
@@ -169,6 +170,7 @@ OBJS = adler32.o \
tx_float.o \
tx_double.o \
tx_int32.o \
+ fbtile.o \
video_enc_params.o \
diff --git a/libavutil/fbtile.c b/libavutil/fbtile.c
new file mode 100644
index 0000000000..ca04f0a7d2
--- /dev/null
+++ b/libavutil/fbtile.c
@@ -0,0 +1,441 @@
+/*
+ * CPU based Framebuffer Tile DeTile logic
+ * Copyright (c) 2020 C Hanish Menon <HanishKVC>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "avutil.h"
+#include "common.h"
+#include "fbtile.h"
+#if CONFIG_LIBDRM
+#include <drm_fourcc.h>
+#endif
+
+
+int fbtilemode_from_formatmodifier(uint64_t formatModifier)
+{
+ int mode = TILE_NONE_END;
+
+#if CONFIG_LIBDRM
+ switch(formatModifier) {
+ case DRM_FORMAT_MOD_LINEAR:
+ mode = TILE_NONE;
+ break;
+ case I915_FORMAT_MOD_X_TILED:
+ mode = TILE_INTELX;
+ break;
+ case I915_FORMAT_MOD_Y_TILED:
+ mode = TILE_INTELY;
+ break;
+ case I915_FORMAT_MOD_Yf_TILED:
+ mode = TILE_INTELYF;
+ break;
+ default:
+ mode = TILE_NONE_END;
+ break;
+ }
+#endif
+#ifdef DEBUG_FBTILE_FORMATMODIFIER_MAPPING
+ av_log(NULL, AV_LOG_DEBUG, "fbtile:formatmodifier[%lx] mapped to mode[%d]\n", formatModifier, mode);
+#endif
+ return mode;
+}
+
+
+/**
+ * Supported pixel formats
+ * Currently only RGB based 32bit formats are specified
+ * TODO: Technically the logic is transparent to 16bit RGB formats also to a great extent
+ */
+const enum AVPixelFormat fbtilePixFormats[] = {AV_PIX_FMT_RGB0, AV_PIX_FMT_0RGB, AV_PIX_FMT_BGR0, AV_PIX_FMT_0BGR,
+ AV_PIX_FMT_RGBA, AV_PIX_FMT_ARGB, AV_PIX_FMT_BGRA, AV_PIX_FMT_ABGR,
+ AV_PIX_FMT_NONE};
+
+int fbtile_checkpixformats(const enum AVPixelFormat srcPixFormat, const enum AVPixelFormat dstPixFormat)
+{
+ int okSrc = 0;
+ int okDst = 0;
+ for (int i = 0; fbtilePixFormats[i] != AV_PIX_FMT_NONE; i++) {
+ if (fbtilePixFormats[i] == srcPixFormat)
+ okSrc = 1;
+ if (fbtilePixFormats[i] == dstPixFormat)
+ okDst = 1;
+ }
+ return (okSrc && okDst);
+}
+
+
+void detile_intelx(int w, int h,
+ uint8_t *dst, int dstLineSize,
+ const uint8_t *src, int srcLineSize)
+{
+ // Offsets and LineSize are in bytes
+ const int pixBytes = 4; // bytes per pixel
+ const int tileW = 128; // tileWidth inPixels, 512/4, For a 32Bits/Pixel framebuffer
+ const int tileH = 8; // tileHeight inPixelLines
+ const int tileWBytes = tileW*pixBytes; // tileWidth inBytes
+
+ if (w*pixBytes != srcLineSize) {
+ av_log(NULL, AV_LOG_ERROR, "fbdetile:intelx: w%dxh%d, dL%d, sL%d\n", w, h, dstLineSize, srcLineSize);
+ av_log(NULL, AV_LOG_ERROR, "fbdetile:intelx: dont support LineSize | Pitch going beyond width\n");
+ }
+ int sO = 0; // srcOffset inBytes
+ int dX = 0; // destX inPixels
+ int dY = 0; // destY inPixels
+ int nTLines = (w*h)/tileW; // numTileLines; One TileLine = One TileWidth
+ int cTL = 0; // curTileLine
+ while (cTL < nTLines) {
+ int dO = dY*dstLineSize + dX*pixBytes;
+#ifdef DEBUG_FBTILE
+ av_log(NULL, AV_LOG_DEBUG, "fbdetile:intelx: dX%d dY%d, sO%d, dO%d\n", dX, dY, sO, dO);
+#endif
+ memcpy(dst+dO+0*dstLineSize, src+sO+0*tileWBytes, tileWBytes);
+ memcpy(dst+dO+1*dstLineSize, src+sO+1*tileWBytes, tileWBytes);
+ memcpy(dst+dO+2*dstLineSize, src+sO+2*tileWBytes, tileWBytes);
+ memcpy(dst+dO+3*dstLineSize, src+sO+3*tileWBytes, tileWBytes);
+ memcpy(dst+dO+4*dstLineSize, src+sO+4*tileWBytes, tileWBytes);
+ memcpy(dst+dO+5*dstLineSize, src+sO+5*tileWBytes, tileWBytes);
+ memcpy(dst+dO+6*dstLineSize, src+sO+6*tileWBytes, tileWBytes);
+ memcpy(dst+dO+7*dstLineSize, src+sO+7*tileWBytes, tileWBytes);
+ dX += tileW;
+ if (dX >= w) {
+ dX = 0;
+ dY += tileH;
+ }
+ sO = sO + tileW*tileH*pixBytes;
+ cTL += tileH;
+ }
+}
+
+
+/*
+ * Intel Legacy Tile-Y layout conversion support
+ *
+ * currently done in a simple dumb way. Two low hanging optimisations
+ * that could be readily applied are
+ *
+ * a) unrolling the inner for loop
+ * --- Given small size memcpy, should help, DONE
+ *
+ * b) using simd based 128bit loading and storing along with prefetch
+ * hinting.
+ *
+ * TOTHINK|CHECK: Does memcpy already does this and more if situation
+ * is right?!
+ *
+ * As code (or even intrinsics) would be specific to each architecture,
+ * avoiding for now. Later have to check if vector_size attribute and
+ * corresponding implementation by gcc can handle different architectures
+ * properly, such that it wont become worse than memcpy provided for that
+ * architecture.
+ *
+ * Or maybe I could even merge the two intel detiling logics into one, as
+ * the semantic and flow is almost same for both logics.
+ *
+ */
+void detile_intely(int w, int h,
+ uint8_t *dst, int dstLineSize,
+ const uint8_t *src, int srcLineSize)
+{
+ // Offsets and LineSize are in bytes
+ const int pixBytes = 4; // bytesPerPixel
+ // tileW represents subTileWidth here, as it can be repeated to fill a tile
+ const int tileW = 4; // tileWidth inPixels, 16/4, For a 32Bits/Pixel framebuffer
+ const int tileH = 32; // tileHeight inPixelLines
+ const int tileWBytes = tileW*pixBytes; // tileWidth inBytes
+
+ if (w*pixBytes != srcLineSize) {
+ av_log(NULL, AV_LOG_ERROR, "fbdetile:intely: w%dxh%d, dL%d, sL%d\n", w, h, dstLineSize, srcLineSize);
+ av_log(NULL, AV_LOG_ERROR, "fbdetile:intely: dont support LineSize | Pitch going beyond width\n");
+ }
+ int sO = 0;
+ int dX = 0;
+ int dY = 0;
+ const int nTLines = (w*h)/tileW;
+ int cTL = 0;
+ while (cTL < nTLines) {
+ int dO = dY*dstLineSize + dX*pixBytes;
+#ifdef DEBUG_FBTILE
+ av_log(NULL, AV_LOG_DEBUG, "fbdetile:intely: dX%d dY%d, sO%d, dO%d\n", dX, dY, sO, dO);
+#endif
+
+ memcpy(dst+dO+0*dstLineSize, src+sO+0*tileWBytes, tileWBytes);
+ memcpy(dst+dO+1*dstLineSize, src+sO+1*tileWBytes, tileWBytes);
+ memcpy(dst+dO+2*dstLineSize, src+sO+2*tileWBytes, tileWBytes);
+ memcpy(dst+dO+3*dstLineSize, src+sO+3*tileWBytes, tileWBytes);
+ memcpy(dst+dO+4*dstLineSize, src+sO+4*tileWBytes, tileWBytes);
+ memcpy(dst+dO+5*dstLineSize, src+sO+5*tileWBytes, tileWBytes);
+ memcpy(dst+dO+6*dstLineSize, src+sO+6*tileWBytes, tileWBytes);
+ memcpy(dst+dO+7*dstLineSize, src+sO+7*tileWBytes, tileWBytes);
+ memcpy(dst+dO+8*dstLineSize, src+sO+8*tileWBytes, tileWBytes);
+ memcpy(dst+dO+9*dstLineSize, src+sO+9*tileWBytes, tileWBytes);
+ memcpy(dst+dO+10*dstLineSize, src+sO+10*tileWBytes, tileWBytes);
+ memcpy(dst+dO+11*dstLineSize, src+sO+11*tileWBytes, tileWBytes);
+ memcpy(dst+dO+12*dstLineSize, src+sO+12*tileWBytes, tileWBytes);
+ memcpy(dst+dO+13*dstLineSize, src+sO+13*tileWBytes, tileWBytes);
+ memcpy(dst+dO+14*dstLineSize, src+sO+14*tileWBytes, tileWBytes);
+ memcpy(dst+dO+15*dstLineSize, src+sO+15*tileWBytes, tileWBytes);
+ memcpy(dst+dO+16*dstLineSize, src+sO+16*tileWBytes, tileWBytes);
+ memcpy(dst+dO+17*dstLineSize, src+sO+17*tileWBytes, tileWBytes);
+ memcpy(dst+dO+18*dstLineSize, src+sO+18*tileWBytes, tileWBytes);
+ memcpy(dst+dO+19*dstLineSize, src+sO+19*tileWBytes, tileWBytes);
+ memcpy(dst+dO+20*dstLineSize, src+sO+20*tileWBytes, tileWBytes);
+ memcpy(dst+dO+21*dstLineSize, src+sO+21*tileWBytes, tileWBytes);
+ memcpy(dst+dO+22*dstLineSize, src+sO+22*tileWBytes, tileWBytes);
+ memcpy(dst+dO+23*dstLineSize, src+sO+23*tileWBytes, tileWBytes);
+ memcpy(dst+dO+24*dstLineSize, src+sO+24*tileWBytes, tileWBytes);
+ memcpy(dst+dO+25*dstLineSize, src+sO+25*tileWBytes, tileWBytes);
+ memcpy(dst+dO+26*dstLineSize, src+sO+26*tileWBytes, tileWBytes);
+ memcpy(dst+dO+27*dstLineSize, src+sO+27*tileWBytes, tileWBytes);
+ memcpy(dst+dO+28*dstLineSize, src+sO+28*tileWBytes, tileWBytes);
+ memcpy(dst+dO+29*dstLineSize, src+sO+29*tileWBytes, tileWBytes);
+ memcpy(dst+dO+30*dstLineSize, src+sO+30*tileWBytes, tileWBytes);
+ memcpy(dst+dO+31*dstLineSize, src+sO+31*tileWBytes, tileWBytes);
+
+ dX += tileW;
+ if (dX >= w) {
+ dX = 0;
+ dY += tileH;
+ }
+ sO = sO + tileW*tileH*pixBytes;
+ cTL += tileH;
+ }
+}
+
+
+/*
+ * Generic detile logic
+ */
+
+/*
+ * Direction Change Entry
+ * Used to specify the tile walking of subtiles within a tile.
+ */
+/**
+ * Settings for Intel Tile-Yf framebuffer layout.
+ * May need to swap the 4 pixel wide subtile, have to check doc bit more
+ */
+const int tyfBytesPerPixel = 4;
+const int tyfSubTileWidth = 4;
+const int tyfSubTileHeight = 8;
+const int tyfSubTileWidthBytes = tyfSubTileWidth*tyfBytesPerPixel; //16
+const int tyfTileWidth = 32;
+const int tyfTileHeight = 32;
+const int tyfNumDirChanges = 6;
+struct dirChange tyfDirChanges[] = { {8, 4, 0}, {16, -4, 8}, {32, 4, -8}, {64, -12, 8 }, {128, 4, -24}, {256, 4, -24} };
+
+/**
+ * Setting for Intel Tile-X framebuffer layout
+ */
+const int txBytesPerPixel = 4;
+const int txSubTileWidth = 128;
+const int txSubTileHeight = 8;
+const int txSubTileWidthBytes = txSubTileWidth*txBytesPerPixel; //512
+const int txTileWidth = 128;
+const int txTileHeight = 8;
+const int txNumDirChanges = 1;
+struct dirChange txDirChanges[] = { {8, 128, 0} };
+
+/**
+ * Setting for Intel Tile-Y framebuffer layout
+ * Even thou a simple generic detiling logic doesnt require the
+ * dummy 256 posOffset entry. The pseudo parallel detiling based
+ * opti logic requires to know about the Tile boundry.
+ */
+const int tyBytesPerPixel = 4;
+const int tySubTileWidth = 4;
+const int tySubTileHeight = 32;
+const int tySubTileWidthBytes = tySubTileWidth*tyBytesPerPixel; //16
+const int tyTileWidth = 32;
+const int tyTileHeight = 32;
+const int tyNumDirChanges = 2;
+struct dirChange tyDirChanges[] = { {32, 4, 0}, {256, 4, 0} };
+
+
+void detile_generic_simple(int w, int h,
+ uint8_t *dst, int dstLineSize,
+ const uint8_t *src, int srcLineSize,
+ int bytesPerPixel,
+ int subTileWidth, int subTileHeight, int subTileWidthBytes,
+ int tileWidth, int tileHeight,
+ int numDirChanges, struct dirChange *dirChanges)
+{
+
+ if (w*bytesPerPixel != srcLineSize) {
+ av_log(NULL, AV_LOG_ERROR, "fbdetile:generic: w%dxh%d, dL%d, sL%d\n", w, h, dstLineSize, srcLineSize);
+ av_log(NULL, AV_LOG_ERROR, "fbdetile:generic: dont support LineSize | Pitch going beyond width\n");
+ }
+ int sO = 0;
+ int dX = 0;
+ int dY = 0;
+ int nSTLines = (w*h)/subTileWidth; // numSubTileLines
+ int cSTL = 0; // curSubTileLine
+ while (cSTL < nSTLines) {
+ int dO = dY*dstLineSize + dX*bytesPerPixel;
+#ifdef DEBUG_FBTILE
+ av_log(NULL, AV_LOG_DEBUG, "fbdetile:generic: dX%d dY%d, sO%d, dO%d\n", dX, dY, sO, dO);
+#endif
+
+ for (int k = 0; k < subTileHeight; k++) {
+ memcpy(dst+dO+k*dstLineSize, src+sO+k*subTileWidthBytes, subTileWidthBytes);
+ }
+ sO = sO + subTileHeight*subTileWidthBytes;
+
+ cSTL += subTileHeight;
+ for (int i=numDirChanges-1; i>=0; i--) {
+ if ((cSTL%dirChanges[i].posOffset) == 0) {
+ dX += dirChanges[i].xDelta;
+ dY += dirChanges[i].yDelta;
+ break;
+ }
+ }
+ if (dX >= w) {
+ dX = 0;
+ dY += tileHeight;
+ }
+ }
+}
+
+
+void detile_generic_opti(int w, int h,
+ uint8_t *dst, int dstLineSize,
+ const uint8_t *src, int srcLineSize,
+ int bytesPerPixel,
+ int subTileWidth, int subTileHeight, int subTileWidthBytes,
+ int tileWidth, int tileHeight,
+ int numDirChanges, struct dirChange *dirChanges)
+{
+ int parallel = 1;
+
+ if (w*bytesPerPixel != srcLineSize) {
+ av_log(NULL, AV_LOG_ERROR, "fbdetile:generic: w%dxh%d, dL%d, sL%d\n", w, h, dstLineSize, srcLineSize);
+ av_log(NULL, AV_LOG_ERROR, "fbdetile:generic: dont support LineSize | Pitch going beyond width\n");
+ }
+ if (w%tileWidth != 0) {
+ av_log(NULL, AV_LOG_ERROR, "fbdetile:generic:NotSupported:NonMultWidth: width%d, tileWidth%d\n", w, tileWidth);
+ }
+ int sO = 0;
+ int sOPrev = 0;
+ int dX = 0;
+ int dY = 0;
+ int nSTLines = (w*h)/subTileWidth;
+ //int nSTLinesInATile = (tileWidth*tileHeight)/subTileWidth;
+ int nTilesInARow = w/tileWidth;
+ for (parallel=8; parallel>0; parallel--) {
+ if (nTilesInARow%parallel == 0)
+ break;
+ }
+ int cSTL = 0;
+ int curTileInRow = 0;
+ while (cSTL < nSTLines) {
+ int dO = dY*dstLineSize + dX*bytesPerPixel;
+#ifdef DEBUG_FBTILE
+ av_log(NULL, AV_LOG_DEBUG, "fbdetile:generic: dX%d dY%d, sO%d, dO%d\n", dX, dY, sO, dO);
+#endif
+
+ // As most tiling layouts have a minimum subtile of 4x4, if I remember correctly,
+ // so this loop has been unrolled to be multiples of 4, and speed up a bit.
+ // However tiling involving 3x3 or 2x2 wont be handlable. Use detile_generic_simple
+ // for such tile layouts.
+ // Detile parallely to a limited extent. To avoid any cache set-associativity and or
+ // limited cache based thrashing, keep it spacially and inturn temporaly small at one level.
+ for (int k = 0; k < subTileHeight; k+=4) {
+ for (int p = 0; p < parallel; p++) {
+ int pSrcOffset = p*tileWidth*tileHeight*bytesPerPixel;
+ int pDstOffset = p*tileWidth*bytesPerPixel;
+ memcpy(dst+dO+k*dstLineSize+pDstOffset, src+sO+k*subTileWidthBytes+pSrcOffset, subTileWidthBytes);
+ memcpy(dst+dO+(k+1)*dstLineSize+pDstOffset, src+sO+(k+1)*subTileWidthBytes+pSrcOffset, subTileWidthBytes);
+ memcpy(dst+dO+(k+2)*dstLineSize+pDstOffset, src+sO+(k+2)*subTileWidthBytes+pSrcOffset, subTileWidthBytes);
+ memcpy(dst+dO+(k+3)*dstLineSize+pDstOffset, src+sO+(k+3)*subTileWidthBytes+pSrcOffset, subTileWidthBytes);
+ }
+ }
+ sO = sO + subTileHeight*subTileWidthBytes;
+
+ cSTL += subTileHeight;
+ for (int i=numDirChanges-1; i>=0; i--) {
+ if ((cSTL%dirChanges[i].posOffset) == 0) {
+ if (i == numDirChanges-1) {
+ curTileInRow += parallel;
+ dX = curTileInRow*tileWidth;
+ sO = sOPrev + tileWidth*tileHeight*bytesPerPixel*(parallel);
+ sOPrev = sO;
+ } else {
+ dX += dirChanges[i].xDelta;
+ }
+ dY += dirChanges[i].yDelta;
+ break;
+ }
+ }
+ if (dX >= w) {
+ dX = 0;
+ curTileInRow = 0;
+ dY += tileHeight;
+ if (dY >= h) {
+ break;
+ }
+ }
+ }
+}
+
+
+int detile_this(int mode, uint64_t arg1,
+ int w, int h,
+ uint8_t *dst, int dstLineSize,
+ uint8_t *src, int srcLineSize,
+ int bytesPerPixel)
+{
+ static int logState=0;
+ if (mode == TILE_AUTO) {
+ mode = fbtilemode_from_formatmodifier(arg1);
+ }
+ if (mode == TILE_NONE) {
+ return 1;
+ }
+
+ if (mode == TILE_INTELX) {
+ detile_intelx(w, h, dst, dstLineSize, src, srcLineSize);
+ } else if (mode == TILE_INTELY) {
+ detile_intely(w, h, dst, dstLineSize, src, srcLineSize);
+ } else if (mode == TILE_INTELYF) {
+ detile_generic(w, h, dst, dstLineSize, src, srcLineSize,
+ tyfBytesPerPixel, tyfSubTileWidth, tyfSubTileHeight, tyfSubTileWidthBytes,
+ tyfTileWidth, tyfTileHeight,
+ tyfNumDirChanges, tyfDirChanges);
+ } else if (mode == TILE_INTELGX) {
+ detile_generic(w, h, dst, dstLineSize, src, srcLineSize,
+ txBytesPerPixel, txSubTileWidth, txSubTileHeight, txSubTileWidthBytes,
+ txTileWidth, txTileHeight,
+ txNumDirChanges, txDirChanges);
+ } else if (mode == TILE_INTELGY) {
+ detile_generic(w, h, dst, dstLineSize, src, srcLineSize,
+ tyBytesPerPixel, tySubTileWidth, tySubTileHeight, tySubTileWidthBytes,
+ tyTileWidth, tyTileHeight,
+ tyNumDirChanges, tyDirChanges);
+ } else if (mode == TILE_NONE_END) {
+ av_log_once(NULL, AV_LOG_WARNING, AV_LOG_VERBOSE, &logState, "fbtile:detile_this:TILE_AUTOOr???: invalid or unsupported format_modifier:%"PRIx64"\n",arg1);
+ return 1;
+ } else {
+ av_log(NULL, AV_LOG_ERROR, "fbtile:detile_this:????: unknown mode specified, check caller\n");
+ return 1;
+ }
+ return 0;
+}
+
+
+// vim: set expandtab sts=4: //
diff --git a/libavutil/fbtile.h b/libavutil/fbtile.h
new file mode 100644
index 0000000000..51556db93a
--- /dev/null
+++ b/libavutil/fbtile.h
@@ -0,0 +1,228 @@
+/*
+ * CPU based Framebuffer Tile DeTile logic
+ * Copyright (c) 2020 C Hanish Menon <HanishKVC>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_FBTILE_H
+#define AVUTIL_FBTILE_H
+
+#include <stdio.h>
+#include <stdint.h>
+
+/**
+ * @file
+ * @brief CPU based Framebuffer tiler detiler
+ * @author C Hanish Menon <HanishKVC>
+ * @{
+ */
+
+
+enum FBTileMode {
+ TILE_NONE,
+ TILE_AUTO,
+ TILE_INTELX,
+ TILE_INTELY,
+ TILE_INTELYF,
+ TILE_INTELGX,
+ TILE_INTELGY,
+ TILE_NONE_END,
+};
+
+
+/**
+ * Map from formatmodifier to fbtile's internal mode.
+ *
+ * @param formatModifier the format_modifier to map
+ * @return the fbtile's equivalent internal mode
+ */
+#undef DEBUG_FBTILE_FORMATMODIFIER_MAPPING
+int fbtilemode_from_formatmodifier(uint64_t formatModifier);
+
+
+/**
+ * Supported pixel formats by the fbtile logics
+ */
+extern const enum AVPixelFormat fbtilePixFormats[];
+/**
+ * Check if the given pixel formats are supported by fbtile logic.
+ *
+ * @param srcPixFormat pixel format of source image
+ * @param dstPixFormat pixel format of destination image
+ */
+int fbtile_checkpixformats(const enum AVPixelFormat srcPixFormat, const enum AVPixelFormat dstPixFormat);
+
+
+/**
+ * Detile legacy intel tile-x layout into linear layout.
+ *
+ * @param w width of the image
+ * @param h height of the image
+ * @param dst the destination image buffer
+ * @param dstLineSize the size of each row in dst image, in bytes
+ * @param src the source image buffer
+ * @param srcLineSize the size of each row in src image, in bytes
+ */
+void detile_intelx(int w, int h,
+ uint8_t *dst, int dstLineSize,
+ const uint8_t *src, int srcLineSize);
+
+
+/**
+ * Detile legacy intel tile-y layout into linear layout.
+ *
+ * @param w width of the image
+ * @param h height of the image
+ * @param dst the destination image buffer
+ * @param dstLineSize the size of each row in dst image, in bytes
+ * @param src the source image buffer
+ * @param srcLineSize the size of each row in src image, in bytes
+ */
+void detile_intely(int w, int h,
+ uint8_t *dst, int dstLineSize,
+ const uint8_t *src, int srcLineSize);
+
+
+/**
+ * Generic Logic.
+ */
+
+/*
+ * Direction Change Entry
+ * Used to specify the tile walking of subtiles within a tile.
+ */
+struct dirChange {
+ int posOffset;
+ int xDelta;
+ int yDelta;
+};
+/**
+ * Settings for Intel Tile-Yf framebuffer layout.
+ * May need to swap the 4 pixel wide subtile, have to check doc bit more
+ */
+extern const int tyfBytesPerPixel;
+extern const int tyfSubTileWidth;
+extern const int tyfSubTileHeight;
+extern const int tyfSubTileWidthBytes;
+extern const int tyfTileWidth;
+extern const int tyfTileHeight;
+extern const int tyfNumDirChanges;
+extern struct dirChange tyfDirChanges[];
+/**
+ * Setting for Intel Tile-X framebuffer layout
+ */
+extern const int txBytesPerPixel;
+extern const int txSubTileWidth;
+extern const int txSubTileHeight;
+extern const int txSubTileWidthBytes;
+extern const int txTileWidth;
+extern const int txTileHeight;
+extern const int txNumDirChanges;
+extern struct dirChange txDirChanges[];
+/**
+ * Setting for Intel Tile-Y framebuffer layout
+ * Even thou a simple generic detiling logic doesnt require the
+ * dummy 256 posOffset entry. The pseudo parallel detiling based
+ * opti logic requires to know about the Tile boundry.
+ */
+extern const int tyBytesPerPixel;
+extern const int tySubTileWidth;
+extern const int tySubTileHeight;
+extern const int tySubTileWidthBytes;
+extern const int tyTileWidth;
+extern const int tyTileHeight;
+extern const int tyNumDirChanges;
+extern struct dirChange tyDirChanges[];
+
+/**
+ * Generic Logic to Detile into linear layout.
+ *
+ * @param w width of the image
+ * @param h height of the image
+ * @param dst the destination image buffer
+ * @param dstLineSize the size of each row in dst image, in bytes
+ * @param src the source image buffer
+ * @param srcLineSize the size of each row in src image, in bytes
+ * @param bytesPerPixel the bytes per pixel for the image
+ * @param subTileWidth the width of subtile within the tile, in pixels
+ * @param subTileHeight the height of subtile within the tile, in pixels
+ * @param subTileWidthBytes the width of subtile within the tile, in bytes
+ * @param tileWidth the width of the tile, in pixels
+ * @param tileHeight the height of the tile, in pixels
+ */
+
+
+/**
+ * Generic detile simple version, which is fine-grained.
+ */
+void detile_generic_simple(int w, int h,
+ uint8_t *dst, int dstLineSize,
+ const uint8_t *src, int srcLineSize,
+ int bytesPerPixel,
+ int subTileWidth, int subTileHeight, int subTileWidthBytes,
+ int tileWidth, int tileHeight,
+ int numDirChanges, struct dirChange *dirChanges);
+
+
+/**
+ * Generic detile optimised version, minimum subtile supported 4x4.
+ */
+void detile_generic_opti(int w, int h,
+ uint8_t *dst, int dstLineSize,
+ const uint8_t *src, int srcLineSize,
+ int bytesPerPixel,
+ int subTileWidth, int subTileHeight, int subTileWidthBytes,
+ int tileWidth, int tileHeight,
+ int numDirChanges, struct dirChange *dirChanges);
+
+
+#ifdef DETILE_GENERIC_OPTI
+#define detile_generic detile_generic_opti
+#else
+#define detile_generic detile_generic_simple
+#endif
+
+
+/**
+ * detile demuxer.
+ *
+ * @param mode the fbtile mode based detiling to call
+ * @param arg1 the format_modifier, in case mode is TILE_AUTO
+ * @param w width of the image
+ * @param h height of the image
+ * @param dst the destination image buffer
+ * @param dstLineSize the size of each row in dst image, in bytes
+ * @param src the source image buffer
+ * @param srcLineSize the size of each row in src image, in bytes
+ * @param bytesPerPixel the bytes per pixel for the image
+ *
+ * @return 0 if detiled, 1 if not
+ */
+int detile_this(int mode, uint64_t arg1,
+ int w, int h,
+ uint8_t *dst, int dstLineSize,
+ uint8_t *src, int srcLineSize,
+ int bytesPerPixel);
+
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_FBTILE_H */
+// vim: set expandtab sts=4: //
--
2.25.1
More information about the ffmpeg-devel
mailing list