Use MDP for scaling and color space conversion
Use two MDP instances for scaling and color space conversion, one for display
and one for ML
Refactor the input tensor generation to support grayscale model
Tested: edgetpu_detect with RGB and Gray FSSD models
Change-Id: I30d5051bd5227a756762590aca444c31b999127c
diff --git a/edgetpuvision/apps.py b/edgetpuvision/apps.py
index 2dbcbe6..48bcb25 100644
--- a/edgetpuvision/apps.py
+++ b/edgetpuvision/apps.py
@@ -30,7 +30,7 @@
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--source',
help='/dev/videoN:FMT:WxH:N/D or .mp4 file or image file',
- default='/dev/video0:RGB:640x480:30/1')
+ default='/dev/video0:YUY2:640x480:30/1')
parser.add_argument('--bitrate', type=int, default=1000000,
help='Video streaming bitrate (bit/s)')
parser.add_argument('--loop', default=False, action='store_true',
@@ -44,8 +44,8 @@
assert camera is not None
with StreamingServer(camera, args.bitrate) as server:
- def render_overlay(tensor, layout, command):
- overlay = gen.send((tensor, layout, command))
+ def render_overlay(image, layout, command):
+ overlay = gen.send((image, layout, command))
server.send_overlay(overlay if overlay else EMPTY_SVG)
camera.render_overlay = render_overlay
@@ -56,7 +56,7 @@
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--source',
help='/dev/videoN:FMT:WxH:N/D or .mp4 file or image file',
- default='/dev/video0:RGB:1024x768:30/1')
+ default='/dev/video0:YUY2:1024x768:30/1')
parser.add_argument('--loop', default=False, action='store_true',
help='Loop input video file')
parser.add_argument('--displaymode', type=Display, choices=Display, default=Display.FULLSCREEN,
diff --git a/edgetpuvision/detect.py b/edgetpuvision/detect.py
index 44bda57..be30314 100644
--- a/edgetpuvision/detect.py
+++ b/edgetpuvision/detect.py
@@ -30,9 +30,11 @@
import collections
import colorsys
import itertools
+import numpy as np
import time
from edgetpu.detection.engine import DetectionEngine
+from PIL import Image
from . import svg
from . import utils
@@ -157,10 +159,14 @@
output = None
while True:
- tensor, layout, command = (yield output)
+ image, layout, command = (yield output)
inference_rate = next(fps_counter)
if draw_overlay:
+ _, _, _, c = engine.get_input_tensor_shape()
+ if c == 1:
+ image = image.convert('L')
+ tensor = np.asarray(image).flatten()
start = time.monotonic()
objs = engine .detect_with_input_tensor(tensor, threshold=args.threshold, top_k=args.top_k)
inference_time = time.monotonic() - start
diff --git a/edgetpuvision/gstreamer.py b/edgetpuvision/gstreamer.py
index 3f80304..56670a3 100644
--- a/edgetpuvision/gstreamer.py
+++ b/edgetpuvision/gstreamer.py
@@ -26,8 +26,6 @@
import threading
import time
-import numpy as np
-
import gi
gi.require_version('Gtk', '3.0')
gi.require_version('GLib', '2.0')
@@ -229,10 +227,10 @@
else:
custom_command = command
# Resize the image before it is consumed by the model.
- inference_img = Image.frombytes('RGB', caps_size(sample.get_caps()), data, 'raw')
+ inference_img = Image.frombytes('RGBX', (480, 360), data, 'raw').convert('RGB')
image_width, image_height = inference_img.size
inference_img = inference_img.resize(layout.inference_size, Image.NEAREST)
- svg = render_overlay(np.asarray(inference_img).flatten(),
+ svg = render_overlay(inference_img,
command=custom_command)
svg_overlay = pipeline.get_by_name('svg_overlay')
@@ -248,8 +246,8 @@
inference_size = render_overlay_gen.send(None) # Initialize.
next(render_overlay_gen)
return run(inference_size,
- lambda tensor, layout, command:
- render_overlay_gen.send((tensor, layout, command)),
+ lambda image, layout, command:
+ render_overlay_gen.send((image, layout, command)),
source=source,
loop=loop,
display=display)
diff --git a/edgetpuvision/pipelines.py b/edgetpuvision/pipelines.py
index b3d84c7..586a176 100644
--- a/edgetpuvision/pipelines.py
+++ b/edgetpuvision/pipelines.py
@@ -18,10 +18,7 @@
return [
Source('file', location=filename),
Filter('decodebin'),
- Filter('mtkmdp width=1280 height=720 format=YUY2'),
- Queue(max_size_buffers=1, leaky='downstream'),
- Filter('videoconvert'),
- Caps('video/x-raw', format='BGRA'),
+ Filter('mtkmdp width=1280 height=720 format=BGRx'),
]
def v4l2_src(fmt):
@@ -29,9 +26,7 @@
Source('v4l2', device=fmt.device),
Caps('video/x-raw', format=fmt.pixel, width=fmt.size.width, height=fmt.size.height,
framerate='%d/%d' % fmt.framerate),
- Queue(),
- Filter('videoconvert'),
- Caps('video/x-raw', format='BGRA'),
+ Filter('mtkmdp width=1024 height=768 format=BGRx'),
]
def display_sink():
@@ -47,7 +42,6 @@
size = max_inner_size(layout.render_size, layout.inference_size)
return [
Filter('videoconvert'),
- Caps('video/x-raw', format='RGB'),
Sink('app', name='appsink', emit_signals=True, max_buffers=1, drop=True, sync=False),
]
@@ -55,6 +49,7 @@
def image_display_pipeline(filename, layout):
return (
[decoded_file_src(filename),
+ Filter('videoconvert'),
Filter('imagefreeze'),
Caps('video/x-raw', framerate='30/1'),
Tee(name='t')],
@@ -62,6 +57,8 @@
Queue(),
display_sink()],
[Pad('t'),
+ Filter('videoconvert'),
+ Filter('mtkmdp width=480 height=360 format=RGBx'),
Queue(max_size_buffers=1, leaky='downstream'),
inference_pipeline(layout)],
)
@@ -72,9 +69,11 @@
[decoded_file_src(filename),
Tee(name='t')],
[Pad('t'),
+ Filter('videoconvert'),
Queue(),
display_sink()],
[Pad('t'),
+ Filter('mtkmdp width=480 height=360 format=RGBx'),
Queue(max_size_buffers=1, leaky='downstream'),
inference_pipeline(layout)],
)
@@ -84,9 +83,11 @@
[v4l2_src(fmt),
Tee(name='t')],
[Pad('t'),
+ Filter('videoconvert'),
Queue(),
display_sink()],
[Pad(name='t'),
+ Filter('mtkmdp width=480 height=360 format=RGBx'),
Queue(max_size_buffers=1, leaky='downstream'),
inference_pipeline(layout)],
)
@@ -95,19 +96,24 @@
def image_headless_pipeline(filename, layout):
return (
[decoded_file_src(filename),
+ Filter('videoconvert'),
Filter('imagefreeze'),
+ Filter('videoconvert'),
+ Filter('mtkmdp width=480 height=360 format=RGBx'),
inference_pipeline(layout)],
)
def video_headless_pipeline(filename, layout):
return (
[decoded_file_src(filename),
+ Filter('mtkmdp width=480 height=360 format=RGBx'),
inference_pipeline(layout)],
)
def camera_headless_pipeline(fmt, layout):
return (
[v4l2_src(fmt),
+ Filter('mtkmdp width=480 height=360 format=RGBx'),
inference_pipeline(layout)],
)
@@ -125,6 +131,7 @@
[Pad('t'),
Queue(max_size_buffers=1),
Filter('decodebin'),
+ Filter('mtkmdp width=480 height=360 format=RGBx'),
inference_pipeline(layout)],
)
@@ -146,6 +153,7 @@
Caps('video/x-h264', stream_format='byte-stream', alignment='nal'),
h264_sink()],
[Pad('t'),
+ Filter('mtkmdp width=480 height=360 format=RGBx'),
Queue(),
inference_pipeline(layout)],
)