Enable OOBE edgetpuvision APIs with 4.19 kernel

Use USB webcam, raw video, or picture file as the input.
Will remove WAR after MIPI camera and vcodec are enabled.

Stream mode is not supported since we don't have the vcodec.

Add mdpd and gstreamer1.0-gl as the dependency to use v4l2convert and
glcolorscale

Test:
edgetpu_detect --source=grace_hopper.bmp
--model=/usr/share/edgetpudemo/mobilenet_ssd_v1_coco_quant_postprocess_edgetpu.tflite
--label=/usr/share/edgetpudemo/coco_labels.txt --top_k=2

Test:
edgetpu_classify --source=bird.bmp
--model=mobilenet_v2_1.0_224_inat_bird_quant_edgetpu.tflite
--label=inat_bird_labels.txt

Test:
edgetpu_detect
--model=mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite

Test:
edgetpu_demo --device

Change-Id: Icfca5776e406e3c43b0b5f7a934c6c8f43626354
diff --git a/debian/changelog b/debian/changelog
index a8bf97f..973b7fa 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+edgetpuvision (6-2) mendel-day; urgency=medium
+
+  * Update scaler to use glcolorscale and v4l2convert.
+
+ -- Coral Team <coral-support@google.com>  Mon, 08 Jun 2020 16:25:45 -0700
+
 edgetpuvision (6-1) mendel-day; urgency=medium
 
   * Cut for day release.
diff --git a/debian/control b/debian/control
index 3150757..d40115c 100644
--- a/debian/control
+++ b/debian/control
@@ -4,7 +4,7 @@
 Priority: optional
 Build-Depends: dh-python, python3-setuptools, python3-all, debhelper (>= 9), libgstreamer1.0-dev, pkg-config
 Standards-Version: 3.9.8
-Homepage: https://coral.withgoogle.com/
+Homepage: https://coral.ai/
 
 Package: python3-edgetpuvision
 Architecture: any
@@ -14,9 +14,11 @@
          gir1.2-gst-plugins-base-1.0,
          gir1.2-gstreamer-1.0,
          gir1.2-gtk-3.0,
+         gstreamer1.0-gl,
          gstreamer1.0-plugins-bad,
          gstreamer1.0-plugins-good,
          gstreamer1.0-plugins-ugly,
+         mdpd,
          python3-cairo,
          python3-edgetpu,
          python3-gi,
diff --git a/edgetpuvision/apps.py b/edgetpuvision/apps.py
index 48bcb25..6d11131 100644
--- a/edgetpuvision/apps.py
+++ b/edgetpuvision/apps.py
@@ -56,7 +56,7 @@
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('--source',
                         help='/dev/videoN:FMT:WxH:N/D or .mp4 file or image file',
-                        default='/dev/video0:YUY2:1024x768:30/1')
+                        default='/dev/video0:YUY2:640x480:30/1')
     parser.add_argument('--loop',  default=False, action='store_true',
                         help='Loop input video file')
     parser.add_argument('--displaymode', type=Display, choices=Display, default=Display.FULLSCREEN,
diff --git a/edgetpuvision/classify.py b/edgetpuvision/classify.py
index 408ecda..e24d7fb 100644
--- a/edgetpuvision/classify.py
+++ b/edgetpuvision/classify.py
@@ -23,9 +23,11 @@
 import argparse
 import collections
 import itertools
+import numpy as np
 import time
 
 from edgetpu.classification.engine import ClassificationEngine
+from PIL import Image
 
 from . import svg
 from . import utils
@@ -121,10 +123,14 @@
 
     output = None
     while True:
-        tensor, layout, command = (yield output)
+        image, layout, command = (yield output)
 
         inference_rate = next(fps_counter)
         if draw_overlay:
+            _, _, _, c = engine.get_input_tensor_shape()
+            if c == 1:
+                image = image.convert('L')
+            tensor = np.asarray(image).flatten()
             start = time.monotonic()
             results = engine.classify_with_input_tensor(tensor, threshold=args.threshold, top_k=args.top_k)
             inference_time = time.monotonic() - start
diff --git a/edgetpuvision/gstreamer.py b/edgetpuvision/gstreamer.py
index 56670a3..57733ff 100644
--- a/edgetpuvision/gstreamer.py
+++ b/edgetpuvision/gstreamer.py
@@ -32,9 +32,10 @@
 gi.require_version('GObject', '2.0')
 gi.require_version('Gst', '1.0')
 gi.require_version('GstBase', '1.0')
+gi.require_version('GstGL', '1.0')
 gi.require_version('GstPbutils', '1.0')
 gi.require_version('GstVideo', '1.0')
-from gi.repository import GLib, GObject, Gst, GstBase, GstVideo, Gtk
+from gi.repository import GLib, GObject, Gst, GstBase, GstGL, GstVideo, Gtk
 
 GObject.threads_init()
 Gst.init([])
@@ -140,8 +141,8 @@
     # render_size capped to 1280x720
     render_size = Size(*render_size)
     width, height = render_size
-    if width > 1280:
-        render_size = render_size * 1280 / width
+    if height > 720:
+        render_size = render_size * 720 / height
     size = min_outer_size(inference_size, render_size)
     window = center_inside(render_size, size)
     return Layout(size=size, window=window,
@@ -226,10 +227,9 @@
             Gtk.main_quit()
         else:
             custom_command = command
-        # Resize the image before it is consumed by the model.
-        inference_img = Image.frombytes('RGBX', (480, 360), data, 'raw').convert('RGB')
-        image_width, image_height = inference_img.size
-        inference_img = inference_img.resize(layout.inference_size, Image.NEAREST)
+        # Read the data as an image before it is consumed by the model.
+        image_width, image_height = layout.inference_size
+        inference_img = Image.frombytes('RGB', (image_width, image_height), data, 'raw')
         svg = render_overlay(inference_img,
                              command=custom_command)
 
@@ -264,15 +264,27 @@
 def get_pipeline(source, inference_size, display):
     fmt = parse_format(source)
     if fmt:
-        layout = make_layout(inference_size, fmt.size)
+        # Cap the render size at 720p
+        _, camera_height = fmt.size
+        render_size = fmt.size * 720 / camera_height
+        layout = make_layout(inference_size, render_size)
         return layout, camera_pipeline(fmt, layout, display)
 
     filename = os.path.expanduser(source)
     if os.path.isfile(filename):
-        info = get_video_info(filename)
-        render_size = Size(info.get_width(), info.get_height())
+        # TODO: Revert this back to info.is_image() check
+        is_image = filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff',
+            '.tif', '.bmp', '.gif'))
+        if is_image:
+            info = get_video_info(filename)
+            render_size = Size(info.get_width(), info.get_height()) * 720 / info.get_height()
+        else:
+            # TODO: Remove YUV check
+            if not filename.lower().endswith(('.yuv')):
+                raise ValueError('Only support YUV raw videos')
+            render_size = Size(1280, 720)
         layout = make_layout(inference_size, render_size)
-        return layout, file_pipline(info.is_image(), filename, layout, display)
+        return layout, file_pipline(is_image, filename, layout, display)
 
     return None
 
@@ -343,14 +355,14 @@
         def on_bus_message_sync(bus, message, glsink):
             if message.type == Gst.MessageType.NEED_CONTEXT:
                 _, context_type = message.parse_context_type()
-                if context_type == 'gst.gl.GLDisplay':
-                    sinkelement = glsink.get_by_interface(GstVideo.VideoOverlay)
-                    gl_context = sinkelement.get_property('context')
-                    if gl_context:
-                        display_context = Gst.Context.new('gst.gl.GLDisplay', True)
-                        display_structure = display_context.writable_structure()
-                        display_structure.set_value('gst.gl.GLDisplay', gl_context.get_display())
-                        message.src.set_context(display_context)
+        #        if context_type == 'gst.gl.GLDisplay':
+        #            sinkelement = glsink.get_by_interface(GstVideo.VideoOverlay)
+        #            gl_context = sinkelement.get_property('context')
+        #            if gl_context:
+        #                display_context = Gst.Context.new('gst.gl.GLDisplay', True)
+        #                display_structure = display_context.writable_structure()
+        #                display_structure.set_value('gst.gl.GLDisplay', gl_context.get_display())
+        #                message.src.set_context(display_context)
             return Gst.BusSyncReply.PASS
 
         bus.set_sync_handler(on_bus_message_sync, glsink)
diff --git a/edgetpuvision/pipelines.py b/edgetpuvision/pipelines.py
index 586a176..edf2678 100644
--- a/edgetpuvision/pipelines.py
+++ b/edgetpuvision/pipelines.py
@@ -17,22 +17,41 @@
 def decoded_file_src(filename):
     return [
         Source('file', location=filename),
+        #Filter('decodebin'),
+        Filter('videoparse width=960 height=540 format=4'),
+        Caps('video/x-raw,framerate=25/1'),
+        Filter('videoconvert'),
+        Caps('video/x-raw,format=I420'),
+        Filter('glfilterbin filter=glcolorscale'),
+        Caps('video/x-raw,width=1280,height=720,format=BGRA'),
+    ]
+
+#TODO: Remove this function when video codec is available
+def decoded_img_file_src(filename):
+    return [
+        Source('file', location=filename),
         Filter('decodebin'),
-        Filter('mtkmdp width=1280 height=720 format=BGRx'),
+        Filter('videoconvert'),
+        Caps('video/x-raw,format=I420'),
+        Filter('glfilterbin filter=glcolorscale'),
+        Caps('video/x-raw,height=720,format=BGRA'),
     ]
 
 def v4l2_src(fmt):
     return [
         Source('v4l2', device=fmt.device),
-        Caps('video/x-raw', format=fmt.pixel, width=fmt.size.width, height=fmt.size.height,
+        # TODO: use YUV input when MIPI camera is ready
+        Caps('image/jpeg', width=fmt.size.width, height=fmt.size.height,
              framerate='%d/%d' % fmt.framerate),
-        Filter('mtkmdp width=1024 height=768 format=BGRx'),
+        Filter('decodebin'),
+        Filter('glfilterbin filter=glcolorscale'),
+        Caps('video/x-raw', height=720, format='BGRA'),
     ]
 
 def display_sink():
     return [
         Filter('rsvgoverlay', name='svg_overlay'),
-        Sink('wayland', name='glsink', sync='false')
+        Sink('wayland', name='glsink', sync=False)
     ]
 
 def h264_sink():
@@ -41,14 +60,16 @@
 def inference_pipeline(layout, stillimage=False):
     size = max_inner_size(layout.render_size, layout.inference_size)
     return [
-        Filter('videoconvert'),
+        Filter('v4l2convert'),
+        Caps('video/x-raw,format=RGB,width=%d,height=%d,pixel-aspect-ratio=1/1' % (
+            layout.inference_size.width, layout.inference_size.height)),
         Sink('app', name='appsink', emit_signals=True, max_buffers=1, drop=True, sync=False),
     ]
 
 # Display
 def image_display_pipeline(filename, layout):
     return (
-        [decoded_file_src(filename),
+        [decoded_img_file_src(filename),
          Filter('videoconvert'),
          Filter('imagefreeze'),
          Caps('video/x-raw', framerate='30/1'),
@@ -57,8 +78,6 @@
          Queue(),
          display_sink()],
         [Pad('t'),
-         Filter('videoconvert'),
-         Filter('mtkmdp width=480 height=360 format=RGBx'),
          Queue(max_size_buffers=1, leaky='downstream'),
          inference_pipeline(layout)],
     )
@@ -69,11 +88,9 @@
         [decoded_file_src(filename),
          Tee(name='t')],
         [Pad('t'),
-         Filter('videoconvert'),
          Queue(),
          display_sink()],
         [Pad('t'),
-         Filter('mtkmdp width=480 height=360 format=RGBx'),
          Queue(max_size_buffers=1, leaky='downstream'),
          inference_pipeline(layout)],
     )
@@ -83,11 +100,9 @@
         [v4l2_src(fmt),
          Tee(name='t')],
         [Pad('t'),
-         Filter('videoconvert'),
-         Queue(),
+         Queue(max_size_buffers=1, leaky='downstream'),
          display_sink()],
         [Pad(name='t'),
-         Filter('mtkmdp width=480 height=360 format=RGBx'),
          Queue(max_size_buffers=1, leaky='downstream'),
          inference_pipeline(layout)],
     )
@@ -95,25 +110,21 @@
 # Headless
 def image_headless_pipeline(filename, layout):
     return (
-      [decoded_file_src(filename),
+      [decoded_img_file_src(filename),
        Filter('videoconvert'),
        Filter('imagefreeze'),
-       Filter('videoconvert'),
-       Filter('mtkmdp width=480 height=360 format=RGBx'),
        inference_pipeline(layout)],
     )
 
 def video_headless_pipeline(filename, layout):
     return (
         [decoded_file_src(filename),
-         Filter('mtkmdp width=480 height=360 format=RGBx'),
          inference_pipeline(layout)],
     )
 
 def camera_headless_pipeline(fmt, layout):
     return (
         [v4l2_src(fmt),
-         Filter('mtkmdp width=480 height=360 format=RGBx'),
          inference_pipeline(layout)],
     )
 
@@ -131,7 +142,6 @@
         [Pad('t'),
          Queue(max_size_buffers=1),
          Filter('decodebin'),
-         Filter('mtkmdp width=480 height=360 format=RGBx'),
          inference_pipeline(layout)],
     )
 
@@ -153,7 +163,6 @@
           Caps('video/x-h264', stream_format='byte-stream', alignment='nal'),
           h264_sink()],
         [Pad('t'),
-         Filter('mtkmdp width=480 height=360 format=RGBx'),
          Queue(),
          inference_pipeline(layout)],
     )