Use MDP for scaling and color space conversion Use two MDP instances for scaling and color space conversion, one for display and one for ML Refactor the input tensor generation to support grayscale model Tested: edgetpu_detect with RGB and Gray FSSD models Change-Id: I30d5051bd5227a756762590aca444c31b999127c

commit: 9aad7d96229f05cf792211c524faddb575ec773d [log] [tgz]
author: Cindy Liu <hcindyl@google.com> Tue Dec 10 15:56:12 2019 -0800
committer: Cindy Liu <hcindyl@google.com> Tue Dec 10 18:04:43 2019 -0800
tree: 8fd14634e230d31386f03d0eff07fc2a746575ea
parent: a871046d709b67554e15e97c24a8e87d994c5bed [diff]
diff --git a/edgetpuvision/apps.py b/edgetpuvision/apps.py
index 2dbcbe6..48bcb25 100644
--- a/edgetpuvision/apps.py
+++ b/edgetpuvision/apps.py

@@ -30,7 +30,7 @@
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('--source',
                         help='/dev/videoN:FMT:WxH:N/D or .mp4 file or image file',
-                        default='/dev/video0:RGB:640x480:30/1')
+                        default='/dev/video0:YUY2:640x480:30/1')
     parser.add_argument('--bitrate', type=int, default=1000000,
                         help='Video streaming bitrate (bit/s)')
     parser.add_argument('--loop', default=False, action='store_true',
@@ -44,8 +44,8 @@
     assert camera is not None
 
     with StreamingServer(camera, args.bitrate) as server:
-        def render_overlay(tensor, layout, command):
-            overlay = gen.send((tensor, layout, command))
+        def render_overlay(image, layout, command):
+            overlay = gen.send((image, layout, command))
             server.send_overlay(overlay if overlay else EMPTY_SVG)
 
         camera.render_overlay = render_overlay
@@ -56,7 +56,7 @@
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('--source',
                         help='/dev/videoN:FMT:WxH:N/D or .mp4 file or image file',
-                        default='/dev/video0:RGB:1024x768:30/1')
+                        default='/dev/video0:YUY2:1024x768:30/1')
     parser.add_argument('--loop',  default=False, action='store_true',
                         help='Loop input video file')
     parser.add_argument('--displaymode', type=Display, choices=Display, default=Display.FULLSCREEN,

diff --git a/edgetpuvision/detect.py b/edgetpuvision/detect.py
index 44bda57..be30314 100644
--- a/edgetpuvision/detect.py
+++ b/edgetpuvision/detect.py

@@ -30,9 +30,11 @@
 import collections
 import colorsys
 import itertools
+import numpy as np
 import time
 
 from edgetpu.detection.engine import DetectionEngine
+from PIL import Image
 
 from . import svg
 from . import utils
@@ -157,10 +159,14 @@
 
     output = None
     while True:
-        tensor, layout, command = (yield output)
+        image, layout, command = (yield output)
 
         inference_rate = next(fps_counter)
         if draw_overlay:
+            _, _, _, c = engine.get_input_tensor_shape()
+            if c == 1:
+                image = image.convert('L')
+            tensor = np.asarray(image).flatten()
             start = time.monotonic()
             objs = engine .detect_with_input_tensor(tensor, threshold=args.threshold, top_k=args.top_k)
             inference_time = time.monotonic() - start

diff --git a/edgetpuvision/gstreamer.py b/edgetpuvision/gstreamer.py
index 3f80304..56670a3 100644
--- a/edgetpuvision/gstreamer.py
+++ b/edgetpuvision/gstreamer.py

@@ -26,8 +26,6 @@
 import threading
 import time
 
-import numpy as np
-
 import gi
 gi.require_version('Gtk', '3.0')
 gi.require_version('GLib', '2.0')
@@ -229,10 +227,10 @@
         else:
             custom_command = command
         # Resize the image before it is consumed by the model.
-        inference_img = Image.frombytes('RGB', caps_size(sample.get_caps()), data, 'raw')
+        inference_img = Image.frombytes('RGBX', (480, 360), data, 'raw').convert('RGB')
         image_width, image_height = inference_img.size
         inference_img = inference_img.resize(layout.inference_size, Image.NEAREST)
-        svg = render_overlay(np.asarray(inference_img).flatten(),
+        svg = render_overlay(inference_img,
                              command=custom_command)
 
         svg_overlay = pipeline.get_by_name('svg_overlay')
@@ -248,8 +246,8 @@
     inference_size = render_overlay_gen.send(None)  # Initialize.
     next(render_overlay_gen)
     return run(inference_size,
-        lambda tensor, layout, command:
-            render_overlay_gen.send((tensor, layout, command)),
+        lambda image, layout, command:
+            render_overlay_gen.send((image, layout, command)),
         source=source,
         loop=loop,
         display=display)

diff --git a/edgetpuvision/pipelines.py b/edgetpuvision/pipelines.py
index b3d84c7..586a176 100644
--- a/edgetpuvision/pipelines.py
+++ b/edgetpuvision/pipelines.py

@@ -18,10 +18,7 @@
     return [
         Source('file', location=filename),
         Filter('decodebin'),
-        Filter('mtkmdp width=1280 height=720 format=YUY2'),
-        Queue(max_size_buffers=1, leaky='downstream'),
-        Filter('videoconvert'),
-        Caps('video/x-raw', format='BGRA'),
+        Filter('mtkmdp width=1280 height=720 format=BGRx'),
     ]
 
 def v4l2_src(fmt):
@@ -29,9 +26,7 @@
         Source('v4l2', device=fmt.device),
         Caps('video/x-raw', format=fmt.pixel, width=fmt.size.width, height=fmt.size.height,
              framerate='%d/%d' % fmt.framerate),
-        Queue(),
-        Filter('videoconvert'),
-        Caps('video/x-raw', format='BGRA'),
+        Filter('mtkmdp width=1024 height=768 format=BGRx'),
     ]
 
 def display_sink():
@@ -47,7 +42,6 @@
     size = max_inner_size(layout.render_size, layout.inference_size)
     return [
         Filter('videoconvert'),
-        Caps('video/x-raw', format='RGB'),
         Sink('app', name='appsink', emit_signals=True, max_buffers=1, drop=True, sync=False),
     ]
 
@@ -55,6 +49,7 @@
 def image_display_pipeline(filename, layout):
     return (
         [decoded_file_src(filename),
+         Filter('videoconvert'),
          Filter('imagefreeze'),
          Caps('video/x-raw', framerate='30/1'),
          Tee(name='t')],
@@ -62,6 +57,8 @@
          Queue(),
          display_sink()],
         [Pad('t'),
+         Filter('videoconvert'),
+         Filter('mtkmdp width=480 height=360 format=RGBx'),
          Queue(max_size_buffers=1, leaky='downstream'),
          inference_pipeline(layout)],
     )
@@ -72,9 +69,11 @@
         [decoded_file_src(filename),
          Tee(name='t')],
         [Pad('t'),
+         Filter('videoconvert'),
          Queue(),
          display_sink()],
         [Pad('t'),
+         Filter('mtkmdp width=480 height=360 format=RGBx'),
          Queue(max_size_buffers=1, leaky='downstream'),
          inference_pipeline(layout)],
     )
@@ -84,9 +83,11 @@
         [v4l2_src(fmt),
          Tee(name='t')],
         [Pad('t'),
+         Filter('videoconvert'),
          Queue(),
          display_sink()],
         [Pad(name='t'),
+         Filter('mtkmdp width=480 height=360 format=RGBx'),
          Queue(max_size_buffers=1, leaky='downstream'),
          inference_pipeline(layout)],
     )
@@ -95,19 +96,24 @@
 def image_headless_pipeline(filename, layout):
     return (
       [decoded_file_src(filename),
+       Filter('videoconvert'),
        Filter('imagefreeze'),
+       Filter('videoconvert'),
+       Filter('mtkmdp width=480 height=360 format=RGBx'),
        inference_pipeline(layout)],
     )
 
 def video_headless_pipeline(filename, layout):
     return (
         [decoded_file_src(filename),
+         Filter('mtkmdp width=480 height=360 format=RGBx'),
          inference_pipeline(layout)],
     )
 
 def camera_headless_pipeline(fmt, layout):
     return (
         [v4l2_src(fmt),
+         Filter('mtkmdp width=480 height=360 format=RGBx'),
          inference_pipeline(layout)],
     )
 
@@ -125,6 +131,7 @@
         [Pad('t'),
          Queue(max_size_buffers=1),
          Filter('decodebin'),
+         Filter('mtkmdp width=480 height=360 format=RGBx'),
          inference_pipeline(layout)],
     )
 
@@ -146,6 +153,7 @@
           Caps('video/x-h264', stream_format='byte-stream', alignment='nal'),
           h264_sink()],
         [Pad('t'),
+         Filter('mtkmdp width=480 height=360 format=RGBx'),
          Queue(),
          inference_pipeline(layout)],
     )
commit	9aad7d96229f05cf792211c524faddb575ec773d	[log] [tgz]
author	Cindy Liu <hcindyl@google.com>	Tue Dec 10 15:56:12 2019 -0800
committer	Cindy Liu <hcindyl@google.com>	Tue Dec 10 18:04:43 2019 -0800
tree	8fd14634e230d31386f03d0eff07fc2a746575ea
parent	a871046d709b67554e15e97c24a8e87d994c5bed [diff]