Use MDP for scaling and color space conversion

Use two MDP instances for scaling and color space conversion, one for display
and one for ML

Refactor the input tensor generation to support grayscale model

Tested: edgetpu_detect with RGB and Gray FSSD models

Change-Id: I30d5051bd5227a756762590aca444c31b999127c
diff --git a/edgetpuvision/apps.py b/edgetpuvision/apps.py
index 2dbcbe6..48bcb25 100644
--- a/edgetpuvision/apps.py
+++ b/edgetpuvision/apps.py
@@ -30,7 +30,7 @@
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('--source',
                         help='/dev/videoN:FMT:WxH:N/D or .mp4 file or image file',
-                        default='/dev/video0:RGB:640x480:30/1')
+                        default='/dev/video0:YUY2:640x480:30/1')
     parser.add_argument('--bitrate', type=int, default=1000000,
                         help='Video streaming bitrate (bit/s)')
     parser.add_argument('--loop', default=False, action='store_true',
@@ -44,8 +44,8 @@
     assert camera is not None
 
     with StreamingServer(camera, args.bitrate) as server:
-        def render_overlay(tensor, layout, command):
-            overlay = gen.send((tensor, layout, command))
+        def render_overlay(image, layout, command):
+            overlay = gen.send((image, layout, command))
             server.send_overlay(overlay if overlay else EMPTY_SVG)
 
         camera.render_overlay = render_overlay
@@ -56,7 +56,7 @@
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('--source',
                         help='/dev/videoN:FMT:WxH:N/D or .mp4 file or image file',
-                        default='/dev/video0:RGB:1024x768:30/1')
+                        default='/dev/video0:YUY2:1024x768:30/1')
     parser.add_argument('--loop',  default=False, action='store_true',
                         help='Loop input video file')
     parser.add_argument('--displaymode', type=Display, choices=Display, default=Display.FULLSCREEN,
diff --git a/edgetpuvision/detect.py b/edgetpuvision/detect.py
index 44bda57..be30314 100644
--- a/edgetpuvision/detect.py
+++ b/edgetpuvision/detect.py
@@ -30,9 +30,11 @@
 import collections
 import colorsys
 import itertools
+import numpy as np
 import time
 
 from edgetpu.detection.engine import DetectionEngine
+from PIL import Image
 
 from . import svg
 from . import utils
@@ -157,10 +159,14 @@
 
     output = None
     while True:
-        tensor, layout, command = (yield output)
+        image, layout, command = (yield output)
 
         inference_rate = next(fps_counter)
         if draw_overlay:
+            _, _, _, c = engine.get_input_tensor_shape()
+            if c == 1:
+                image = image.convert('L')
+            tensor = np.asarray(image).flatten()
             start = time.monotonic()
             objs = engine .detect_with_input_tensor(tensor, threshold=args.threshold, top_k=args.top_k)
             inference_time = time.monotonic() - start
diff --git a/edgetpuvision/gstreamer.py b/edgetpuvision/gstreamer.py
index 3f80304..56670a3 100644
--- a/edgetpuvision/gstreamer.py
+++ b/edgetpuvision/gstreamer.py
@@ -26,8 +26,6 @@
 import threading
 import time
 
-import numpy as np
-
 import gi
 gi.require_version('Gtk', '3.0')
 gi.require_version('GLib', '2.0')
@@ -229,10 +227,10 @@
         else:
             custom_command = command
         # Resize the image before it is consumed by the model.
-        inference_img = Image.frombytes('RGB', caps_size(sample.get_caps()), data, 'raw')
+        inference_img = Image.frombytes('RGBX', (480, 360), data, 'raw').convert('RGB')
         image_width, image_height = inference_img.size
         inference_img = inference_img.resize(layout.inference_size, Image.NEAREST)
-        svg = render_overlay(np.asarray(inference_img).flatten(),
+        svg = render_overlay(inference_img,
                              command=custom_command)
 
         svg_overlay = pipeline.get_by_name('svg_overlay')
@@ -248,8 +246,8 @@
     inference_size = render_overlay_gen.send(None)  # Initialize.
     next(render_overlay_gen)
     return run(inference_size,
-        lambda tensor, layout, command:
-            render_overlay_gen.send((tensor, layout, command)),
+        lambda image, layout, command:
+            render_overlay_gen.send((image, layout, command)),
         source=source,
         loop=loop,
         display=display)
diff --git a/edgetpuvision/pipelines.py b/edgetpuvision/pipelines.py
index b3d84c7..586a176 100644
--- a/edgetpuvision/pipelines.py
+++ b/edgetpuvision/pipelines.py
@@ -18,10 +18,7 @@
     return [
         Source('file', location=filename),
         Filter('decodebin'),
-        Filter('mtkmdp width=1280 height=720 format=YUY2'),
-        Queue(max_size_buffers=1, leaky='downstream'),
-        Filter('videoconvert'),
-        Caps('video/x-raw', format='BGRA'),
+        Filter('mtkmdp width=1280 height=720 format=BGRx'),
     ]
 
 def v4l2_src(fmt):
@@ -29,9 +26,7 @@
         Source('v4l2', device=fmt.device),
         Caps('video/x-raw', format=fmt.pixel, width=fmt.size.width, height=fmt.size.height,
              framerate='%d/%d' % fmt.framerate),
-        Queue(),
-        Filter('videoconvert'),
-        Caps('video/x-raw', format='BGRA'),
+        Filter('mtkmdp width=1024 height=768 format=BGRx'),
     ]
 
 def display_sink():
@@ -47,7 +42,6 @@
     size = max_inner_size(layout.render_size, layout.inference_size)
     return [
         Filter('videoconvert'),
-        Caps('video/x-raw', format='RGB'),
         Sink('app', name='appsink', emit_signals=True, max_buffers=1, drop=True, sync=False),
     ]
 
@@ -55,6 +49,7 @@
 def image_display_pipeline(filename, layout):
     return (
         [decoded_file_src(filename),
+         Filter('videoconvert'),
          Filter('imagefreeze'),
          Caps('video/x-raw', framerate='30/1'),
          Tee(name='t')],
@@ -62,6 +57,8 @@
          Queue(),
          display_sink()],
         [Pad('t'),
+         Filter('videoconvert'),
+         Filter('mtkmdp width=480 height=360 format=RGBx'),
          Queue(max_size_buffers=1, leaky='downstream'),
          inference_pipeline(layout)],
     )
@@ -72,9 +69,11 @@
         [decoded_file_src(filename),
          Tee(name='t')],
         [Pad('t'),
+         Filter('videoconvert'),
          Queue(),
          display_sink()],
         [Pad('t'),
+         Filter('mtkmdp width=480 height=360 format=RGBx'),
          Queue(max_size_buffers=1, leaky='downstream'),
          inference_pipeline(layout)],
     )
@@ -84,9 +83,11 @@
         [v4l2_src(fmt),
          Tee(name='t')],
         [Pad('t'),
+         Filter('videoconvert'),
          Queue(),
          display_sink()],
         [Pad(name='t'),
+         Filter('mtkmdp width=480 height=360 format=RGBx'),
          Queue(max_size_buffers=1, leaky='downstream'),
          inference_pipeline(layout)],
     )
@@ -95,19 +96,24 @@
 def image_headless_pipeline(filename, layout):
     return (
       [decoded_file_src(filename),
+       Filter('videoconvert'),
        Filter('imagefreeze'),
+       Filter('videoconvert'),
+       Filter('mtkmdp width=480 height=360 format=RGBx'),
        inference_pipeline(layout)],
     )
 
 def video_headless_pipeline(filename, layout):
     return (
         [decoded_file_src(filename),
+         Filter('mtkmdp width=480 height=360 format=RGBx'),
          inference_pipeline(layout)],
     )
 
 def camera_headless_pipeline(fmt, layout):
     return (
         [v4l2_src(fmt),
+         Filter('mtkmdp width=480 height=360 format=RGBx'),
          inference_pipeline(layout)],
     )
 
@@ -125,6 +131,7 @@
         [Pad('t'),
          Queue(max_size_buffers=1),
          Filter('decodebin'),
+         Filter('mtkmdp width=480 height=360 format=RGBx'),
          inference_pipeline(layout)],
     )
 
@@ -146,6 +153,7 @@
           Caps('video/x-h264', stream_format='byte-stream', alignment='nal'),
           h264_sink()],
         [Pad('t'),
+         Filter('mtkmdp width=480 height=360 format=RGBx'),
          Queue(),
          inference_pipeline(layout)],
     )