gldownloadelement: accelerate with glExportTexImageDMA

Accelerate GL texture download with glExportTexImageDMA if driver
supports it.

On columbia, dowloading 720p textures:
* RGBA performace up from 28 fps to 108 fps (3.8x improvement)
* RGB performance up from 2 fps to 48 fps (24x improvement)

Enterprise sees performance improvments too but they're less drastic
as it's a much powerful platform overall.
Measurements based on otherwise idle system, add more workloads such
as on screen rendering and performance increase will not be as
apparent.

BUG: 146907014
Change-Id: Idced5ea3aeae835187461bef0cf0d9113f27bfe3
diff --git a/ext/gl/gstgldownloadelement.c b/ext/gl/gstgldownloadelement.c
index 0812010..845dd6e 100644
--- a/ext/gl/gstgldownloadelement.c
+++ b/ext/gl/gstgldownloadelement.c
@@ -36,6 +36,8 @@
 
 #if GST_GL_HAVE_IONDMA
 #include <gst/gl/gstglmemorydma.h>
+#include <gst/allocators/gstionmemory.h>
+#include <libdrm/drm_fourcc.h>
 #endif
 
 GST_DEBUG_CATEGORY_STATIC (gst_gl_download_element_debug);
@@ -64,6 +66,13 @@
     trans, GstQuery * query);
 static void gst_gl_download_element_finalize (GObject * object);
 
+#if GST_GL_HAVE_IONDMA
+static gboolean gst_gl_download_element_gl_start (GstGLBaseFilter * base);
+static void gst_gl_download_element_gl_stop (GstGLBaseFilter * base);
+static GstBuffer *gst_gl_download_element_export_teximage_dma (
+    GstGLDownloadElement * dl, GstBuffer * inbuf);
+#endif
+
 #if GST_GL_HAVE_PLATFORM_EGL && GST_GL_HAVE_DMABUF
 #define EXTRA_CAPS_TEMPLATE "video/x-raw(" GST_CAPS_FEATURE_MEMORY_DMABUF "); "
 #else
@@ -90,6 +99,9 @@
   GstBaseTransformClass *bt_class = GST_BASE_TRANSFORM_CLASS (klass);
   GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
   GObjectClass *object_class = G_OBJECT_CLASS (klass);
+#if GST_GL_HAVE_IONDMA
+  GstGLBaseFilterClass *gl_class = GST_GL_BASE_FILTER_CLASS (klass);
+#endif
 
   bt_class->transform_caps = gst_gl_download_element_transform_caps;
   bt_class->set_caps = gst_gl_download_element_set_caps;
@@ -102,6 +114,11 @@
 
   bt_class->passthrough_on_same_caps = TRUE;
 
+#if GST_GL_HAVE_IONDMA
+  gl_class->gl_start = gst_gl_download_element_gl_start;
+  gl_class->gl_stop = gst_gl_download_element_gl_stop;
+#endif
+
   gst_element_class_add_static_pad_template (element_class,
       &gst_gl_download_element_src_pad_template);
   gst_element_class_add_static_pad_template (element_class,
@@ -414,6 +431,11 @@
   gst_caps_replace (&src_caps, NULL);
 
 #if GST_GL_HAVE_IONDMA
+  *outbuf = gst_gl_download_element_export_teximage_dma (dl, inbuf);
+  if (*outbuf) {
+    return GST_FLOW_OK;
+  }
+
   if (gst_is_gl_memory_dma (mem)) {
     GstGLContext *context = GST_GL_BASE_FILTER (bt)->context;
 
@@ -509,6 +531,13 @@
   gsize size;
   GstVideoFormat fmt;
 
+#if GST_GL_HAVE_IONDMA
+  if (download->glExportTexImageDMA) {
+    return GST_BASE_TRANSFORM_CLASS (parent_class)->propose_allocation (bt,
+        decide_query, query);
+  }
+#endif
+
   gst_query_parse_allocation (query, &caps, NULL);
   if (!gst_video_info_from_caps (&info, caps)) {
     GST_WARNING_OBJECT (bt, "invalid caps specified");
@@ -609,3 +638,363 @@
 
   G_OBJECT_CLASS (parent_class)->finalize (object);
 }
+
+#if GST_GL_HAVE_IONDMA
+static const gchar*
+_video_format_to_string (GstVideoFormat format)
+{
+  if (format == GST_VIDEO_FORMAT_UNKNOWN)
+    return "UNSET";
+  return gst_video_format_to_string (format);
+}
+
+static gboolean
+has_blitter (GstGLDownloadElement * dl, gboolean * blitter)
+{
+  gsize length;
+  gchar *contents = NULL;
+  gboolean ret = FALSE;
+
+  if (!g_file_get_contents ("/sys/firmware/devicetree/base/model",
+      &contents, &length, NULL)) {
+    GST_WARNING_OBJECT (dl, "Failed to read devicetree model");
+    goto beach;
+  }
+
+  if (g_strrstr(contents, "i.MX8MQ")) {
+    ret = TRUE;
+    *blitter = TRUE;
+  } else if (g_strrstr(contents, "i.MX8MM")) {
+    ret = TRUE;
+    *blitter = FALSE;
+  } else {
+    GST_WARNING_OBJECT (dl, "Unsupported model '%s'", contents);
+  }
+
+beach:
+  g_free (contents);
+  return ret;
+}
+
+static void
+setup_export_teximage_dma (GstGLDownloadElement * dl)
+{
+  gboolean blitter;
+  GstStructure *config;
+  GstAllocationParams params;
+  GstVideoAlignment alignment;
+  GstGLContext *export_context = NULL;
+  GstCaps *export_caps = NULL;
+  GstBufferPool *pool = NULL;
+  GstVideoConverter *converter = NULL;
+  GstVideoFormat export_format = GST_VIDEO_FORMAT_UNKNOWN, src_format;
+  GstGLContext *context = GST_GL_BASE_FILTER (dl)->context;
+  GstBaseTransform *transform = GST_BASE_TRANSFORM (dl);
+  GstCaps *src_caps = gst_pad_get_current_caps (transform->srcpad);
+    PFNGLEXPORTTEXIMAGEDMAPROC glExportTexImageDMA =
+      gst_gl_context_get_proc_address (context, "glExportTexImageDMA");
+
+  gst_video_info_init (&dl->export_info);
+  gst_video_info_init (&dl->src_info);
+  gst_allocation_params_init (&params);
+  gst_video_alignment_reset (&alignment);
+  dl->glExportTexImageDMA = NULL;
+
+  if (!glExportTexImageDMA) {
+    GST_INFO_OBJECT (dl, "glExportTexImageDMA not supported");
+    goto beach;
+  }
+
+  if (!gst_video_info_from_caps (&dl->src_info, src_caps)) {
+    GST_WARNING_OBJECT (dl, "invalid src_caps %" GST_PTR_FORMAT, src_caps);
+    goto beach;
+  }
+  src_format = GST_VIDEO_INFO_FORMAT (&dl->src_info);
+
+  if (!has_blitter (dl, &blitter)) {
+    goto beach;
+  }
+
+  switch (src_format) {
+    case GST_VIDEO_FORMAT_RGBA:
+      export_format = src_format;
+      break;
+    case GST_VIDEO_FORMAT_RGB:
+    case GST_VIDEO_FORMAT_BGR:
+      dl->src_info.stride[0] = dl->src_info.width * 3;
+      dl->src_info.size = dl->src_info.stride[0] * dl->src_info.height;
+      /* Tightly pack 24bit formats and fall through. */
+    case GST_VIDEO_FORMAT_BGRA:
+      export_format = blitter ? src_format : GST_VIDEO_FORMAT_RGBA;
+      break;
+    default:
+      export_format = GST_VIDEO_FORMAT_UNKNOWN;
+      break;
+  }
+
+  if (export_format == GST_VIDEO_FORMAT_UNKNOWN) {
+    GST_INFO_OBJECT (dl, "Format '%s' not supported",
+        _video_format_to_string (src_format));
+    goto beach;
+  }
+
+  gst_video_info_set_format(&dl->export_info, export_format,
+      blitter ? dl->src_info.width : GST_ROUND_UP_16 (dl->src_info.width),
+      blitter ? dl->src_info.height : GST_ROUND_UP_16 (dl->src_info.height));
+  dl->export_info.colorimetry = dl->src_info.colorimetry;
+  dl->export_info.par_n = dl->src_info.par_n;
+  dl->export_info.par_d = dl->src_info.par_d;
+  dl->export_info.fps_n = dl->src_info.fps_n;
+  dl->export_info.fps_d = dl->src_info.fps_d;
+  params.align = blitter ? 0 : 127;
+  alignment.stride_align[0] = params.align;
+  gst_video_info_align (&dl->export_info, &alignment);
+  export_caps = gst_video_info_to_caps (&dl->export_info);
+  GST_DEBUG_OBJECT (dl, "export_caps %" GST_PTR_FORMAT, export_caps);
+  GST_DEBUG_OBJECT (dl, "src_caps %" GST_PTR_FORMAT, src_caps);
+
+  pool = gst_video_buffer_pool_new ();
+  config = gst_buffer_pool_get_config (pool);
+
+  gst_buffer_pool_config_set_params (config, export_caps,
+      dl->export_info.size, 0, 3);
+  gst_buffer_pool_config_add_option (config,
+      GST_BUFFER_POOL_OPTION_VIDEO_META);
+  gst_buffer_pool_config_add_option (config,
+    GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
+  gst_buffer_pool_config_set_video_alignment (config, &alignment);
+  gst_buffer_pool_config_set_allocator (config,
+      gst_ion_allocator_obtain (), &params);
+
+  if (!gst_buffer_pool_set_config (pool, config)) {
+    GST_ERROR_OBJECT (dl, "gst_buffer_pool_set_config failed");
+    goto beach;
+  }
+  gst_buffer_pool_set_active (pool, TRUE);
+
+  dl->export_info.width = dl->src_info.width;
+  dl->export_info.height = dl->src_info.height;
+
+  if (export_format != src_format ||
+      dl->export_info.stride[0] != dl->src_info.stride[0]) {
+    converter = gst_video_converter_new (&dl->export_info, &dl->src_info,
+        gst_structure_new ("GstVideoConvertConfig",
+            GST_VIDEO_CONVERTER_OPT_THREADS, G_TYPE_UINT,
+            g_get_num_processors(), NULL));
+    if (!converter) {
+      GST_ERROR_OBJECT (dl, "gst_video_converter_new failed");
+      goto beach;
+    }
+  }
+
+  export_context = gst_gl_context_new (gst_gl_context_get_display (context));
+  if (!export_context) {
+    GST_ERROR_OBJECT (dl, "gst_gl_context_new failed");
+    goto beach;
+  }
+
+  if (!gst_gl_context_create (export_context, context, NULL)) {
+    GST_ERROR_OBJECT (dl, "gst_gl_context_create failed");
+    goto beach;
+  }
+
+  dl->glExportTexImageDMA = glExportTexImageDMA;
+  dl->export_context = export_context;
+  dl->export_pool = pool;
+  dl->converter = converter;
+  export_context = NULL;
+  pool = NULL;
+  converter = NULL;
+
+beach:
+  if (src_caps) {
+    gst_caps_unref (src_caps);
+  }
+  if (export_caps) {
+    gst_caps_unref (export_caps);
+  }
+  if (pool) {
+    gst_object_unref (pool);
+  }
+  if (converter) {
+    gst_video_converter_free (converter);
+  }
+  if (export_context) {
+    gst_object_unref (export_context);
+  }
+
+  GST_INFO_OBJECT (dl,
+      "glExportTexImageDMA %d converter %d: %s (%dx%d %d) -> %s (%dx%d %d)",
+      !!dl->glExportTexImageDMA, !!dl->converter,
+      _video_format_to_string (GST_VIDEO_INFO_FORMAT (&dl->export_info)),
+      dl->export_info.width, dl->export_info.height, dl->export_info.stride[0],
+      _video_format_to_string (GST_VIDEO_INFO_FORMAT (&dl->src_info)),
+      dl->src_info.width, dl->src_info.height, dl->src_info.stride[0]);
+}
+
+struct ExportParams
+{
+  GstGLDownloadElement *dl;
+  GLuint tex;
+  GLint fd;
+  GLint fourcc;
+  GLsizei width;
+  GLsizei height;
+  GLsizei stride;
+  GLboolean res;
+};
+
+static void
+_export_gl (GstGLContext * context, struct ExportParams * params)
+{
+  GstClockTime ts;
+
+  ts = gst_util_get_timestamp ();
+  params->res = params->dl->glExportTexImageDMA(params->tex, &params->fd,
+      &params->fourcc, &params->width, &params->height, &params->stride);
+  ts = gst_util_get_timestamp () - ts;
+  GST_DEBUG_OBJECT (params->dl, "glExportTexImageDMA %.2g ms",
+      (double) ts / GST_MSECOND);
+}
+
+static GstBuffer *
+gst_gl_download_element_export_teximage_dma (
+    GstGLDownloadElement * dl, GstBuffer * inbuf)
+{
+  GstVideoFormat export_format;
+  GstMemory *memory = gst_buffer_peek_memory (inbuf, 0);
+  GstGLContext *context = GST_GL_BASE_MEMORY_CAST (memory)->context;
+  GstBuffer *export_buf = NULL, *src_buf = NULL, *outbuf = NULL;
+  struct ExportParams params = { 0 };
+
+  if (!dl->glExportTexImageDMA) {
+    goto beach;
+  }
+
+  if (!gst_gl_context_can_share (context, dl->export_context)) {
+    GST_WARNING_OBJECT (dl, "buffer and internal GL contexts can't share"
+        " resources, performance will be degraded");
+  } else {
+    context = dl->export_context;
+  }
+
+  if (gst_buffer_pool_acquire_buffer (dl->export_pool, &export_buf, NULL) !=
+      GST_FLOW_OK) {
+    GST_ERROR_OBJECT (dl, "gst_buffer_pool_acquire_buffer failed");
+    goto beach;
+  }
+
+  export_format = GST_VIDEO_INFO_FORMAT (&dl->export_info);
+  switch (export_format) {
+    case GST_VIDEO_FORMAT_RGBA:
+      params.fourcc = DRM_FORMAT_ABGR8888;
+      break;
+    case GST_VIDEO_FORMAT_BGRA:
+      params.fourcc = DRM_FORMAT_ARGB8888;
+      break;
+    case GST_VIDEO_FORMAT_RGB:
+      params.fourcc = DRM_FORMAT_BGR888;
+      break;
+    case GST_VIDEO_FORMAT_BGR:
+      params.fourcc = DRM_FORMAT_RGB888;
+      break;
+    default:
+      goto beach;
+  }
+
+  params.dl = dl;
+  params.tex = gst_gl_memory_get_texture_id (GST_GL_MEMORY_CAST (memory));
+  params.fd = gst_dmabuf_memory_get_fd (gst_buffer_peek_memory (export_buf, 0));
+  params.width = dl->export_info.width;
+  params.height = dl->export_info.height;
+  params.stride = dl->export_info.stride[0];
+  gst_gl_context_thread_add (context, (GstGLContextThreadFunc) _export_gl,
+      &params);
+
+  if (!params.res) {
+    GST_ERROR_OBJECT (dl, "glExportTexImageDMA failed");
+    goto beach;
+  }
+
+  if (dl->converter) {
+    GstVideoFrame in_frame, out_frame;
+    GstClockTime ts;
+
+    /* TODO: buffer pool */
+    src_buf = gst_buffer_new_allocate (NULL, dl->src_info.size, NULL);
+
+    if (!src_buf) {
+      GST_ERROR_OBJECT (dl, "failed to allocate buffer of size %zu",
+          dl->src_info.size);
+      goto beach;
+    }
+
+    ts = gst_util_get_timestamp ();
+    if (!gst_video_frame_map (&in_frame, &dl->export_info, export_buf,
+        GST_MAP_READ | GST_VIDEO_FRAME_MAP_FLAG_NO_REF)) {
+      GST_ERROR_OBJECT (dl, "gst_video_frame_map failed");
+      goto beach;
+    }
+    if (!gst_video_frame_map (&out_frame, &dl->src_info, src_buf,
+        GST_MAP_READ | GST_VIDEO_FRAME_MAP_FLAG_NO_REF)) {
+      GST_ERROR_OBJECT (dl, "gst_video_frame_map failed");
+      goto beach;
+    }
+    gst_video_converter_frame (dl->converter, &in_frame, &out_frame);
+    gst_video_frame_unmap(&out_frame);
+    gst_video_frame_unmap(&in_frame);
+    ts = gst_util_get_timestamp () - ts;
+    GST_DEBUG_OBJECT (dl, "convert %.2g ms",
+        (double) ts / GST_MSECOND);
+
+    outbuf = src_buf;
+    src_buf = NULL;
+  } else {
+    outbuf = export_buf;
+    export_buf = NULL;
+  }
+
+  gst_buffer_copy_into (outbuf, inbuf,
+        GST_BUFFER_COPY_FLAGS | GST_BUFFER_COPY_TIMESTAMPS, 0, -1);
+
+beach:
+  gst_buffer_replace (&export_buf, NULL);
+  gst_buffer_replace (&src_buf, NULL);
+  return outbuf;
+}
+
+static gboolean
+gst_gl_download_element_gl_start (GstGLBaseFilter * base)
+{
+  GstGLDownloadElement *download = GST_GL_DOWNLOAD_ELEMENT_CAST (base);
+
+  setup_export_teximage_dma (download);
+
+  return GST_GL_BASE_FILTER_CLASS (parent_class)->gl_start (base);
+}
+
+static void
+gst_gl_download_element_gl_stop (GstGLBaseFilter * base)
+{
+  GstGLDownloadElement *download = GST_GL_DOWNLOAD_ELEMENT_CAST (base);
+
+  GST_GL_BASE_FILTER_CLASS (parent_class)->gl_stop (base);
+
+  if (download->export_pool) {
+    gst_buffer_pool_set_active (download->export_pool, FALSE);
+    gst_object_unref (GST_OBJECT (download->export_pool));
+    download->export_pool = NULL;
+  }
+
+  if (download->converter) {
+    gst_video_converter_free (download->converter);
+    download->converter = NULL;
+  }
+
+  if (download->export_context) {
+    gst_object_unref (download->export_context);
+    download->export_context = NULL;
+  }
+}
+#endif
+
diff --git a/ext/gl/gstgldownloadelement.h b/ext/gl/gstgldownloadelement.h
index a9fb4e1..7019f62 100644
--- a/ext/gl/gstgldownloadelement.h
+++ b/ext/gl/gstgldownloadelement.h
@@ -25,6 +25,7 @@
 #include <gst/gstmemory.h>
 
 #include <gst/gl/gl.h>
+#include <gst/gl/gstglfuncs.h>
 
 G_BEGIN_DECLS
 
@@ -39,6 +40,9 @@
 typedef struct _GstGLDownloadElement GstGLDownloadElement;
 typedef struct _GstGLDownloadElementClass GstGLDownloadElementClass;
 
+typedef GLboolean (GL_APIENTRYP PFNGLEXPORTTEXIMAGEDMAPROC)
+    (GLuint Texture, GLint *Fd, GLint *Fourcc, GLsizei *Width, GLsizei *Height, GLsizei *Stride);
+
 struct _GstGLDownloadElement
 {
   /* <private> */
@@ -47,6 +51,13 @@
   gboolean do_pbo_transfers;
   GstAllocator * dmabuf_allocator;
   gboolean add_videometa;
+
+  PFNGLEXPORTTEXIMAGEDMAPROC glExportTexImageDMA;
+  GstGLContext *export_context;
+  GstBufferPool * export_pool;
+  GstVideoInfo src_info;
+  GstVideoInfo export_info;
+  GstVideoConverter *converter;
 };
 
 struct _GstGLDownloadElementClass