msdk: manage MSDK surfaces seperately

Currently a gst buffer has one mfxFrameSurface when it's allocated and
can't be changed.
This is based on that the life of gst buffer and mfxFrameSurface would
be same.
But it's not true. Sometimes even if a gst buffer of a frame is finished
on downstream,
mfxFramesurface coupled with the gst buffer is still locked, which means
it's still being used in the driver.

So this patch does this.
Every time a gst buffer is acquired from the pool, it confirms if the
surface coupled with the buffer is unlocked.
If not, replace it with new unlocked one.
In this way, user(decoder or encoder) doesn't need to manage gst buffers
including locked surface.

To do that, this patch includes the following:
1. GstMsdkContext
- Manages MSDK surfaces available, used, locked respectively as the
following:
  1\ surfaces_avail : surfaces which are free and unused anywhere
  2\ surfaces_used : surfaces coupled with a gst buffer and being used
now.
  3\ surfaces_locked : surfaces still locked even after the gst buffer
is released.

- Provide an api to get MSDK surface available.
- Provide an api to release MSDK surface.

2. GstMsdkVideoMemory
- Gets a surface available when it's allocated.
- Provide an api to get an available surface with new unlocked one.
- Provide an api to release surface in the msdk video memory.

3. GstMsdkBufferPool
- In acquire_buffer, every time a gst buffer is acquired, get new
available surface from the list.
- In release_buffer, it confirms if the buffer's surface is unlocked or
not.
  - If unlocked, it is put to the available list.
  - If still locked, it is put to the locked list.

This also fixes bug #793525.

https://bugzilla.gnome.org/show_bug.cgi?id=793413
https://bugzilla.gnome.org/show_bug.cgi?id=793525
diff --git a/sys/msdk/gstmsdkbufferpool.c b/sys/msdk/gstmsdkbufferpool.c
index 51d36c0..b42d021 100644
--- a/sys/msdk/gstmsdkbufferpool.c
+++ b/sys/msdk/gstmsdkbufferpool.c
@@ -227,6 +227,64 @@
   }
 }
 
+static GstFlowReturn
+gst_msdk_buffer_pool_acquire_buffer (GstBufferPool * pool,
+    GstBuffer ** out_buffer_ptr, GstBufferPoolAcquireParams * params)
+{
+  GstMsdkBufferPool *msdk_pool = GST_MSDK_BUFFER_POOL_CAST (pool);
+  GstMsdkBufferPoolPrivate *priv = msdk_pool->priv;
+  GstBuffer *buf = NULL;
+  GstFlowReturn ret;
+  mfxFrameSurface1 *surface;
+
+  ret =
+      GST_BUFFER_POOL_CLASS (parent_class)->acquire_buffer (pool, &buf, params);
+
+  /* When using video memory, mfx surface is still locked even though
+   * it's finished by SyncOperation. There's no way to get notified when it gets unlocked.
+   * So we need to confirm if it's unlocked every time a gst buffer is acquired.
+   * If it's still locked, we can replace it with new unlocked/unused surface.
+   */
+  if (ret != GST_FLOW_OK || !priv->use_video_memory) {
+    if (buf)
+      *out_buffer_ptr = buf;
+    return ret;
+  }
+
+  surface = gst_msdk_get_surface_from_buffer (buf);
+  if (!surface || surface->Data.Locked > 0) {
+    if (!gst_msdk_video_memory_get_surface_available (GST_MSDK_VIDEO_MEMORY_CAST
+            (gst_buffer_peek_memory (buf, 0)))) {
+      GST_WARNING_OBJECT (pool, "failed to get new surface available");
+      return GST_FLOW_ERROR;
+    }
+  }
+
+  *out_buffer_ptr = buf;
+  return GST_FLOW_OK;
+}
+
+static void
+gst_msdk_buffer_pool_release_buffer (GstBufferPool * pool, GstBuffer * buf)
+{
+  mfxFrameSurface1 *surface;
+  GstMsdkBufferPool *msdk_pool = GST_MSDK_BUFFER_POOL_CAST (pool);
+  GstMsdkBufferPoolPrivate *priv = msdk_pool->priv;
+
+  if (!priv->use_video_memory)
+    goto done;
+
+  surface = gst_msdk_get_surface_from_buffer (buf);
+  if (!surface)
+    goto done;
+
+  gst_msdk_video_memory_release_surface (GST_MSDK_VIDEO_MEMORY_CAST
+      (gst_buffer_peek_memory (buf, 0)));
+
+done:
+  return GST_BUFFER_POOL_CLASS (parent_class)->release_buffer (pool, buf);
+}
+
 static void
 gst_msdk_buffer_pool_finalize (GObject * object)
 {
@@ -258,6 +316,8 @@
   pool_class->get_options = gst_msdk_buffer_pool_get_options;
   pool_class->set_config = gst_msdk_buffer_pool_set_config;
   pool_class->alloc_buffer = gst_msdk_buffer_pool_alloc_buffer;
+  pool_class->acquire_buffer = gst_msdk_buffer_pool_acquire_buffer;
+  pool_class->release_buffer = gst_msdk_buffer_pool_release_buffer;
 }
 
 GstBufferPool *
diff --git a/sys/msdk/gstmsdkcontext.c b/sys/msdk/gstmsdkcontext.c
index 696998e..352148f 100644
--- a/sys/msdk/gstmsdkcontext.c
+++ b/sys/msdk/gstmsdkcontext.c
@@ -58,6 +58,7 @@
   gboolean is_joined;
   GstMsdkContextJobType job_type;
   gint shared_async_depth;
+  GMutex mutex;
 #ifndef _WIN32
   gint fd;
   VADisplay dpy;
@@ -208,6 +209,8 @@
   GstMsdkContextPrivate *priv = GST_MSDK_CONTEXT_GET_PRIVATE (context);
 
   context->priv = priv;
+
+  g_mutex_init (&priv->mutex);
 }
 
 static void
@@ -222,6 +225,7 @@
   }
 
   msdk_close_session (priv->session);
+  g_mutex_clear (&priv->mutex);
 
 #ifndef _WIN32
   if (priv->dpy)
@@ -365,12 +369,47 @@
     return NULL;
 }
 
+static void
+create_surfaces (GstMsdkContext * context, GstMsdkAllocResponse * resp)
+{
+  gint i;
+  mfxMemId *mem_id;
+  mfxFrameSurface1 *surface;
+
+  for (i = 0; i < resp->response->NumFrameActual; i++) {
+    mem_id = resp->mem_ids[i];
+    surface = (mfxFrameSurface1 *) g_slice_new0 (mfxFrameSurface1);
+    if (!surface) {
+      GST_ERROR ("failed to allocate surface");
+      break;
+    }
+    surface->Data.MemId = mem_id;
+    resp->surfaces_avail = g_list_prepend (resp->surfaces_avail, surface);
+  }
+}
+
+static void
+free_surface (gpointer surface)
+{
+  g_slice_free1 (sizeof (mfxFrameSurface1), surface);
+}
+
+static void
+remove_surfaces (GstMsdkContext * context, GstMsdkAllocResponse * resp)
+{
+  g_list_free_full (resp->surfaces_used, free_surface);
+  g_list_free_full (resp->surfaces_avail, free_surface);
+  g_list_free_full (resp->surfaces_locked, free_surface);
+}
+
 void
 gst_msdk_context_add_alloc_response (GstMsdkContext * context,
     GstMsdkAllocResponse * resp)
 {
   context->priv->cached_alloc_responses =
-      g_list_append (context->priv->cached_alloc_responses, resp);
+      g_list_prepend (context->priv->cached_alloc_responses, resp);
+
+  create_surfaces (context, resp);
 }
 
 gboolean
@@ -387,6 +426,8 @@
 
   msdk_resp = l->data;
 
+  remove_surfaces (context, msdk_resp);
+
   g_slice_free1 (sizeof (GstMsdkAllocResponse), msdk_resp);
   priv->cached_alloc_responses =
       g_list_delete_link (priv->cached_alloc_responses, l);
@@ -394,6 +435,121 @@
   return TRUE;
 }
 
+static gboolean
+check_surfaces_available (GstMsdkContext * context, GstMsdkAllocResponse * resp)
+{
+  GList *l;
+  mfxFrameSurface1 *surface = NULL;
+  GstMsdkContextPrivate *priv = context->priv;
+  gboolean ret = FALSE;
+
+  g_mutex_lock (&priv->mutex);
+  for (l = resp->surfaces_locked; l; l = l->next) {
+    surface = l->data;
+    if (!surface->Data.Locked) {
+      resp->surfaces_locked = g_list_remove (resp->surfaces_locked, surface);
+      resp->surfaces_avail = g_list_prepend (resp->surfaces_avail, surface);
+      ret = TRUE;
+    }
+  }
+  g_mutex_unlock (&priv->mutex);
+
+  return ret;
+}
+
+/*
+ * There are 3 lists here in GstMsdkContext as the following:
+ * 1. surfaces_avail : surfaces which are free and unused anywhere
+ * 2. surfaces_used : surfaces coupled with a gst buffer and being used now.
+ * 3. surfaces_locked : surfaces still locked even after the gst buffer is released.
+ *
+ * Note that they need to be protected by mutex to be thread-safe.
+ */
+
+mfxFrameSurface1 *
+gst_msdk_context_get_surface_available (GstMsdkContext * context,
+    mfxFrameAllocResponse * resp)
+{
+  GList *l;
+  mfxFrameSurface1 *surface = NULL;
+  GstMsdkAllocResponse *msdk_resp =
+      gst_msdk_context_get_cached_alloc_responses (context, resp);
+  gint retry = 0;
+  GstMsdkContextPrivate *priv = context->priv;
+
+retry:
+  g_mutex_lock (&priv->mutex);
+  for (l = msdk_resp->surfaces_avail; l; l = l->next) {
+    surface = l->data;
+
+    if (!surface->Data.Locked) {
+      msdk_resp->surfaces_avail =
+          g_list_remove (msdk_resp->surfaces_avail, surface);
+      msdk_resp->surfaces_used =
+          g_list_prepend (msdk_resp->surfaces_used, surface);
+      break;
+    }
+  }
+  g_mutex_unlock (&priv->mutex);
+
+  /*
+   * If a msdk context is shared by multiple msdk elements,
+   * upstream msdk element sometimes needs to wait for a gst buffer
+   * to be released in downstream.
+   *
+   * Poll the pool for a maximum of 20 milisecnds.
+   *
+   * FIXME: Is there any better way to handle this case?
+   */
+  if (!surface && retry < 20) {
+    /* If there's no surface available, find unlocked surfaces in the locked list,
+     * take it back to the available list and then search again.
+     */
+    check_surfaces_available (context, msdk_resp);
+    retry++;
+    g_usleep (1000);
+    goto retry;
+  }
+
+  return surface;
+}
+
+void
+gst_msdk_context_put_surface_locked (GstMsdkContext * context,
+    mfxFrameAllocResponse * resp, mfxFrameSurface1 * surface)
+{
+  GstMsdkContextPrivate *priv = context->priv;
+  GstMsdkAllocResponse *msdk_resp =
+      gst_msdk_context_get_cached_alloc_responses (context, resp);
+
+  g_mutex_lock (&priv->mutex);
+  if (!g_list_find (msdk_resp->surfaces_locked, surface)) {
+    msdk_resp->surfaces_used =
+        g_list_remove (msdk_resp->surfaces_used, surface);
+    msdk_resp->surfaces_locked =
+        g_list_prepend (msdk_resp->surfaces_locked, surface);
+  }
+  g_mutex_unlock (&priv->mutex);
+}
+
+void
+gst_msdk_context_put_surface_available (GstMsdkContext * context,
+    mfxFrameAllocResponse * resp, mfxFrameSurface1 * surface)
+{
+  GstMsdkContextPrivate *priv = context->priv;
+  GstMsdkAllocResponse *msdk_resp =
+      gst_msdk_context_get_cached_alloc_responses (context, resp);
+
+  g_mutex_lock (&priv->mutex);
+  if (!g_list_find (msdk_resp->surfaces_avail, surface)) {
+    msdk_resp->surfaces_used =
+        g_list_remove (msdk_resp->surfaces_used, surface);
+    msdk_resp->surfaces_avail =
+        g_list_prepend (msdk_resp->surfaces_avail, surface);
+  }
+  g_mutex_unlock (&priv->mutex);
+}
+
 GstMsdkContextJobType
 gst_msdk_context_get_job_type (GstMsdkContext * context)
 {
diff --git a/sys/msdk/gstmsdkcontext.h b/sys/msdk/gstmsdkcontext.h
index debed6f..cbaf689 100644
--- a/sys/msdk/gstmsdkcontext.h
+++ b/sys/msdk/gstmsdkcontext.h
@@ -100,7 +100,9 @@
   mfxFrameAllocResponse *response;
   mfxFrameAllocRequest request;
   mfxMemId *mem_ids;
-  gint num_used_memory;
+  GList *surfaces_avail;
+  GList *surfaces_used;
+  GList *surfaces_locked;
 };
 
 GstMsdkAllocResponse *
@@ -119,6 +121,15 @@
 gst_msdk_context_remove_alloc_response (GstMsdkContext * context,
     mfxFrameAllocResponse * resp);
 
+mfxFrameSurface1 *
+gst_msdk_context_get_surface_available (GstMsdkContext * context, mfxFrameAllocResponse * resp);
+
+void
+gst_msdk_context_put_surface_locked (GstMsdkContext * context, mfxFrameAllocResponse * resp, mfxFrameSurface1 * surface);
+
+void
+gst_msdk_context_put_surface_available (GstMsdkContext * context, mfxFrameAllocResponse * resp, mfxFrameSurface1 * surface);
+
 GstMsdkContextJobType
 gst_msdk_context_get_job_type (GstMsdkContext * context);
 
diff --git a/sys/msdk/gstmsdkvideomemory.c b/sys/msdk/gstmsdkvideomemory.c
index 831a540..2951b80 100644
--- a/sys/msdk/gstmsdkvideomemory.c
+++ b/sys/msdk/gstmsdkvideomemory.c
@@ -36,37 +36,19 @@
 #include "gstmsdkvideomemory.h"
 #include "gstmsdkallocator.h"
 
-static gboolean
-ensure_data (GstMsdkVideoMemory * mem, GstMsdkVideoAllocator * allocator)
-{
-  GstMsdkMemoryID *mem_id;
-  GstMsdkAllocResponse *resp =
-      gst_msdk_context_get_cached_alloc_responses (allocator->context,
-      allocator->alloc_response);
-
-  if (!resp) {
-    GST_WARNING ("failed to get allocation response");
-    return FALSE;
-  }
-
-  mem_id = (GstMsdkMemoryID *) resp->mem_ids[resp->num_used_memory++];
-  mem->surface->Data.MemId = mem_id;
-
-  return TRUE;
-}
-
 static mfxFrameSurface1 *
-gst_msdk_video_allocator_create_surface (GstAllocator * allocator)
+gst_msdk_video_allocator_get_surface (GstAllocator * allocator)
 {
   mfxFrameInfo frame_info = { {0,}, 0, };
   mfxFrameSurface1 *surface;
   GstMsdkVideoAllocator *msdk_video_allocator =
       GST_MSDK_VIDEO_ALLOCATOR_CAST (allocator);
 
-  surface = (mfxFrameSurface1 *) g_slice_new0 (mfxFrameSurface1);
-
+  surface =
+      gst_msdk_context_get_surface_available (msdk_video_allocator->context,
+      msdk_video_allocator->alloc_response);
   if (!surface) {
-    GST_ERROR ("failed to allocate surface");
+    GST_ERROR ("failed to get surface available");
     return NULL;
   }
 
@@ -78,6 +60,42 @@
   return surface;
 }
 
+gboolean
+gst_msdk_video_memory_get_surface_available (GstMsdkVideoMemory * mem)
+{
+  GstAllocator *allocator;
+
+  allocator = GST_MEMORY_CAST (mem)->allocator;
+  mem->surface = gst_msdk_video_allocator_get_surface (allocator);
+  return mem->surface ? TRUE : FALSE;
+}
+
+/*
+ * Every time releasing a gst buffer, we need to check the status of surface's lock,
+ * so that we could manage locked surfaces seperatedly in the context.
+ * Otherwise, we put the surface to the available list.
+ */
+void
+gst_msdk_video_memory_release_surface (GstMsdkVideoMemory * mem)
+{
+  GstMsdkVideoAllocator *msdk_video_allocator;
+
+  msdk_video_allocator =
+      GST_MSDK_VIDEO_ALLOCATOR_CAST (GST_MEMORY_CAST (mem)->allocator);
+  if (!mem->surface)
+    return;
+
+  if (mem->surface->Data.Locked > 0)
+    gst_msdk_context_put_surface_locked (msdk_video_allocator->context,
+        msdk_video_allocator->alloc_response, mem->surface);
+  else
+    gst_msdk_context_put_surface_available (msdk_video_allocator->context,
+        msdk_video_allocator->alloc_response, mem->surface);
+
+  mem->surface = NULL;
+  return;
+}
+
 GstMemory *
 gst_msdk_video_memory_new (GstAllocator * base_allocator)
 {
@@ -94,16 +112,15 @@
   if (!mem)
     return NULL;
 
-  mem->surface = gst_msdk_video_allocator_create_surface (base_allocator);
+  mem->surface = gst_msdk_video_allocator_get_surface (base_allocator);
+  if (!mem->surface)
+    return FALSE;
 
   vip = &allocator->image_info;
   gst_memory_init (&mem->parent_instance, GST_MEMORY_FLAG_NO_SHARE,
       base_allocator, NULL, GST_VIDEO_INFO_SIZE (vip), 0, 0,
       GST_VIDEO_INFO_SIZE (vip));
 
-  if (!ensure_data (mem, allocator))
-    return FALSE;
-
   return GST_MEMORY_CAST (mem);
 }
 
@@ -243,19 +260,6 @@
 }
 
 static void
-gst_msdk_video_allocator_free (GstAllocator * allocator, GstMemory * memory)
-{
-  GstMsdkVideoAllocator *msdk_video_allocator =
-      GST_MSDK_VIDEO_ALLOCATOR_CAST (allocator);
-  GstMsdkAllocResponse *resp =
-      gst_msdk_context_get_cached_alloc_responses
-      (msdk_video_allocator->context, msdk_video_allocator->alloc_response);
-
-  if (resp)
-    resp->num_used_memory--;
-}
-
-static void
 gst_msdk_video_allocator_finalize (GObject * object)
 {
   GstMsdkVideoAllocator *allocator = GST_MSDK_VIDEO_ALLOCATOR_CAST (object);
@@ -273,7 +277,6 @@
   object_class->finalize = gst_msdk_video_allocator_finalize;
 
   allocator_class->alloc = gst_msdk_video_allocator_alloc;
-  allocator_class->free = gst_msdk_video_allocator_free;
 }
 
 static void
diff --git a/sys/msdk/gstmsdkvideomemory.h b/sys/msdk/gstmsdkvideomemory.h
index 481ec9c..554b4a4 100644
--- a/sys/msdk/gstmsdkvideomemory.h
+++ b/sys/msdk/gstmsdkvideomemory.h
@@ -73,6 +73,12 @@
 gst_msdk_video_memory_new (GstAllocator * allocator);
 
 gboolean
+gst_msdk_video_memory_get_surface_available (GstMsdkVideoMemory * mem);
+
+void
+gst_msdk_video_memory_release_surface (GstMsdkVideoMemory * mem);
+
+gboolean
 gst_video_meta_map_msdk_memory (GstVideoMeta * meta, guint plane,
     GstMapInfo * info, gpointer * data, gint * stride, GstMapFlags flags);