gldownload: Use EGL fences instead of glFinish to sync dmabufs
NXP implemented fast texture downloads using ION backed dmabufs
and used glFinish to ensure rendering is complete before passing
the CPU accessible buffer downstream. glFinish is disastrous for
performance though as it stalls the GPU until any and all pending
operations are completed, as well as the single GL thread shared
between al GL element in the pipeline.
This is particularly bad when there are two or more branches in
the pipeline (i.e. using tee). If one, say, is rendering to the
screen and one is doing color space conversion for inference in
separate paths this glFinish will force both to always wait for
drawing operations in the other even if they're not operating
on the same surfaces.
Since the dmabuf is imported as an EGLImage we can instead use
the EGL_KHR_fence_sync extension to sync CPU access to the
buffer. Not stalling the GPU is the main benefit, but we also
don't have to wait for the rendering to complete in the GL
thread so we're not blocking it either.
End result is about 20% higher throughput in the above
described use case, as well as much smaller screen refresh rate
jitter.
Change-Id: I6de5bcb2198571ac047f985d8179099595bbe8e8
diff --git a/gst-libs/gst/gl/gstglmemorydma.c b/gst-libs/gst/gl/gstglmemorydma.c
index 6609d46..362fbb7 100644
--- a/gst-libs/gst/gl/gstglmemorydma.c
+++ b/gst-libs/gst/gl/gstglmemorydma.c
@@ -239,8 +239,16 @@
{
GstBuffer *buf;
GstGLMemoryDMA *glmem;
+ GstGLSyncMeta *sync_meta;
- gst_gl_context_thread_add (ctx, (GstGLContextThreadFunc) _finish_texture, NULL);
+ sync_meta = gst_buffer_get_gl_sync_meta (glbuf);
+ if (!sync_meta) {
+ GstClockTime ts = gst_util_get_timestamp ();
+ gst_gl_context_thread_add (ctx, (GstGLContextThreadFunc) _finish_texture, NULL);
+ ts = gst_util_get_timestamp () - ts;
+ GST_CAT_DEBUG (GST_CAT_GL_DMA_MEMORY,
+ "glFinish %.2g ms", (double) ts / GST_MSECOND);
+ }
glmem = gst_buffer_peek_memory (glbuf, 0);
diff --git a/gst-libs/gst/gl/gstglsyncmeta.c b/gst-libs/gst/gl/gstglsyncmeta.c
index d74b3cd..0fc6ccb 100644
--- a/gst-libs/gst/gl/gstglsyncmeta.c
+++ b/gst-libs/gst/gl/gstglsyncmeta.c
@@ -37,6 +37,13 @@
#include "gstglcontext.h"
#include "gstglfuncs.h"
+#if GST_GL_HAVE_IONDMA
+#include "gst/gl/gstglmemorydma.h"
+#include "gst/gl/egl/gstegl.h"
+#include "gst/gl/egl/gstglcontext_egl.h"
+#include "gst/gl/egl/gstgldisplay_egl.h"
+#endif
+
GST_DEBUG_CATEGORY_STATIC (gst_gl_sync_meta_debug);
#define GST_CAT_DEFAULT gst_gl_sync_meta_debug
@@ -53,6 +60,100 @@
#define GL_TIMEOUT_IGNORED G_GUINT64_CONSTANT(0xFFFFFFFFFFFFFFFF)
#endif
+#if GST_GL_HAVE_IONDMA
+static gboolean
+_is_dma_memory (GstBuffer * buf)
+{
+ if (gst_buffer_n_memory(buf)) {
+ GstGLMemory *glmem;
+ glmem = gst_buffer_peek_memory (buf, 0);
+ return gst_is_gl_memory_dma (glmem);
+ }
+
+ return FALSE;
+}
+
+static EGLDisplay
+_get_egl_display (GstGLContext * context)
+{
+ EGLDisplay egl_display;
+ GstGLDisplayEGL *display_egl;
+ if (!context->display) {
+ return EGL_NO_DISPLAY;
+ }
+ display_egl = gst_gl_display_egl_from_gl_display (context->display);
+ egl_display =
+ (EGLDisplay) gst_gl_display_get_handle (GST_GL_DISPLAY (display_egl));
+ gst_object_unref (display_egl);
+ return egl_display;
+}
+
+static EGLSyncKHR
+_eglCreateSyncKHR (GstGLContext * context)
+{
+ EGLSyncKHR sync;
+ EGLDisplay dpy = _get_egl_display (context);
+ if (dpy == EGL_NO_DISPLAY) {
+ return EGL_NO_SYNC_KHR;
+ }
+
+ EGLSyncKHR (*gst_eglCreateSyncKHR) (EGLDisplay dpy, EGLenum type,
+ const EGLAttrib * attrib_list);
+ gst_eglCreateSyncKHR = gst_gl_context_get_proc_address (context,
+ "eglCreateSyncKHR");
+ sync = gst_eglCreateSyncKHR (dpy, EGL_SYNC_FENCE_KHR, NULL);
+ GST_LOG ("setting egl sync object %p", sync);
+ return sync;
+}
+
+static EGLint
+_eglClientWaitSyncKHR (GstGLContext * context, EGLSyncKHR sync)
+{
+ EGLDisplay dpy = _get_egl_display (context);
+ if (dpy == EGL_NO_DISPLAY) {
+ return;
+ }
+
+ EGLint (*gst_eglClientWaitSyncKHR) (EGLDisplay dpy, EGLSyncKHR sync,
+ EGLint flags, EGLTimeKHR timeout);
+ GST_LOG ("waiting on egl sync object %p", sync);
+ gst_eglClientWaitSyncKHR = gst_gl_context_get_proc_address (context,
+ "eglClientWaitSyncKHR");
+ return gst_eglClientWaitSyncKHR (dpy, sync,
+ EGL_SYNC_FLUSH_COMMANDS_BIT_KHR, 1000000000 /* 1s */ );
+}
+
+static EGLBoolean
+_eglDestroySyncKHR (GstGLContext * context, EGLSyncKHR sync)
+{
+ EGLDisplay dpy = _get_egl_display (context);
+ if (dpy == EGL_NO_DISPLAY) {
+ return EGL_FALSE;
+ }
+
+ EGLBoolean (*gst_eglDestroySyncKHR) (EGLDisplay dpy, EGLSyncKHR sync);
+ gst_eglDestroySyncKHR = gst_gl_context_get_proc_address (context,
+ "eglDestroySyncKHR");
+ GST_LOG ("deleting egl sync object %p", sync);
+ return gst_eglDestroySyncKHR (dpy, sync);
+}
+
+static void
+_wait_cpu_egl (GstGLSyncMeta * sync_meta, GstGLContext * context)
+{
+ EGLint egl_res;
+
+ if(sync_meta->egl_data == EGL_NO_SYNC_KHR) {
+ return;
+ }
+
+ do {
+ egl_res = _eglClientWaitSyncKHR (context,
+ (EGLSyncKHR) sync_meta->egl_data);
+ } while (egl_res == EGL_TIMEOUT_EXPIRED_KHR);
+}
+#endif
+
static void
_default_set_sync_gl (GstGLSyncMeta * sync_meta, GstGLContext * context)
{
@@ -70,6 +171,15 @@
if (gst_gl_context_is_shared (context))
gl->Flush ();
+
+#if GST_GL_HAVE_IONDMA
+ if (sync_meta->is_egl) {
+ if (sync_meta->egl_data) {
+ _eglDestroySyncKHR (context, (EGLSyncKHR) sync_meta->egl_data);
+ }
+ sync_meta->egl_data = _eglCreateSyncKHR (context);
+ }
+#endif
}
static void
@@ -89,6 +199,18 @@
const GstGLFuncs *gl = context->gl_vtable;
GLenum res;
+#if GST_GL_HAVE_IONDMA
+ if (sync_meta->egl_data) {
+ EGLint egl_res;
+
+ do {
+ egl_res = _eglClientWaitSyncKHR (context,
+ (EGLSyncKHR) sync_meta->egl_data);
+ } while (egl_res == EGL_TIMEOUT_EXPIRED_KHR);
+ return;
+ }
+#endif
+
if (sync_meta->data && gl->ClientWaitSync) {
do {
GST_LOG ("waiting on sync object %p", sync_meta->data);
@@ -107,6 +229,10 @@
{
GST_LOG ("copy sync object %p from meta %p to %p", src->data, src, dest);
+#if GST_GL_HAVE_IONDMA
+ dest->is_egl = src->is_egl;
+#endif
+
/* Setting a sync point here relies on GstBuffer copying
* metas after data */
gst_gl_sync_meta_set_sync_point (src, src->context);
@@ -122,6 +248,13 @@
gl->DeleteSync ((GLsync) sync_meta->data);
sync_meta->data = NULL;
}
+
+#if GST_GL_HAVE_IONDMA
+ if (sync_meta->egl_data) {
+ _eglDestroySyncKHR (context, (EGLSyncKHR) sync_meta->egl_data);
+ sync_meta->egl_data = NULL;
+ }
+#endif
}
/**
@@ -151,6 +284,9 @@
meta->context = gst_object_ref (context);
meta->data = data;
+#if GST_GL_HAVE_IONDMA
+ meta->is_egl = _is_dma_memory(buffer);
+#endif
return meta;
}
@@ -264,6 +400,11 @@
if (sync_meta->wait_cpu)
sync_meta->wait_cpu (sync_meta, context);
else
+#if GST_GL_HAVE_IONDMA
+ if (sync_meta->is_egl && sync_meta->egl_data) {
+ _wait_cpu_egl (sync_meta, context);
+ } else
+#endif
gst_gl_context_thread_add (context,
(GstGLContextThreadFunc) _wait_cpu, sync_meta);
}
@@ -353,6 +494,11 @@
sync_meta->free = NULL;
sync_meta->free_gl = NULL;
+#if GST_GL_HAVE_IONDMA
+ sync_meta->is_egl = FALSE;
+ sync_meta->egl_data = NULL;
+#endif
+
return TRUE;
}
diff --git a/gst-libs/gst/gl/gstglsyncmeta.h b/gst-libs/gst/gl/gstglsyncmeta.h
index 6daee86..8d01566 100644
--- a/gst-libs/gst/gl/gstglsyncmeta.h
+++ b/gst-libs/gst/gl/gstglsyncmeta.h
@@ -21,6 +21,10 @@
#ifndef __GST_GL_SYNC_META_H__
#define __GST_GL_SYNC_META_H__
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
#include <gst/gl/gstgl_fwd.h>
G_BEGIN_DECLS
@@ -69,6 +73,11 @@
void (*copy) (GstGLSyncMeta * src, GstBuffer * sbuffer, GstGLSyncMeta * dest, GstBuffer * dbuffer);
void (*free) (GstGLSyncMeta * sync, GstGLContext * context);
void (*free_gl) (GstGLSyncMeta * sync, GstGLContext * context);
+
+#if GST_GL_HAVE_IONDMA
+ gboolean is_egl;
+ gpointer egl_data;
+#endif
};
GST_GL_API