Add headless gstreamer pipelines.
Change-Id: I5ed5fa8cfa175e6207bcf3972142783df2e6d759
diff --git a/edgetpuvision/camera.py b/edgetpuvision/camera.py
index 61f83ee..4bc6249 100644
--- a/edgetpuvision/camera.py
+++ b/edgetpuvision/camera.py
@@ -70,10 +70,8 @@
self._fmt = fmt
def make_pipeline(self, fmt, profile, inline_headers, bitrate, intra_period):
- return (
- pipelines.v4l2_camera(self._fmt),
- pipelines.camera_streaming_pipeline(profile, bitrate, self._render_size, self._inference_size)
- )
+ return pipelines.camera_streaming_pipeline(self._fmt, profile, bitrate,
+ self._render_size, self._inference_size)
def make_camera(source, inference_size):
fmt = parse_format(source)
diff --git a/edgetpuvision/classify.py b/edgetpuvision/classify.py
index 4e86077..247e27a 100644
--- a/edgetpuvision/classify.py
+++ b/edgetpuvision/classify.py
@@ -13,9 +13,9 @@
from edgetpu.classification.engine import ClassificationEngine
-from . import gstreamer
from . import overlays
from .utils import load_labels, input_image_size, same_input_image_sizes
+from .gstreamer import Display, run_gen
def top_results(window, top_k):
@@ -25,13 +25,17 @@
total_scores[label] += score
return sorted(total_scores.items(), key=lambda kv: kv[1], reverse=True)[:top_k]
-
def accumulator(size, top_k):
window = collections.deque(maxlen=size)
window.append((yield []))
while True:
window.append((yield top_results(window, top_k)))
+def print_results(inference_rate, results):
+ print('\nInference (rate=%.2f fps):' % inference_rate)
+ print(results)
+ for label, score in results:
+ print(' %s, score=%.2f' % (label, score))
def render_gen(args):
acc = accumulator(size=args.window, top_k=args.top_k)
@@ -59,7 +63,7 @@
results = [(labels[i], score) for i, score in results]
results = acc.send(results)
if args.print:
- print(results)
+ print_results(inference_rate, results)
output = overlays.classification(results, inference_time, inference_rate, size, window)
else:
@@ -70,7 +74,6 @@
elif command == 'n':
engine = next(engines)
-
def main():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--source',
@@ -88,16 +91,16 @@
help='number of classes with highest score to display')
parser.add_argument('--threshold', type=float, default=0.1,
help='class score threshold')
- parser.add_argument('--print', action='store_true', default=False,
- help='Print detected classes to console')
- parser.add_argument('--fullscreen', default=False, action='store_true',
- help='Fullscreen rendering')
+ parser.add_argument('--display', type=Display, choices=Display, default=Display.FULLSCREEN,
+ help='Display mode')
+ parser.add_argument('--print', default=False, action='store_true',
+ help='Print inference results')
args = parser.parse_args()
- if not gstreamer.run_gen(render_gen(args),
- source=args.source,
- downscale=args.downscale,
- fullscreen=args.fullscreen):
+ if not run_gen(render_gen(args),
+ source=args.source,
+ downscale=args.downscale,
+ display=args.display):
print('Invalid source argument:', args.source)
if __name__ == '__main__':
diff --git a/edgetpuvision/detect.py b/edgetpuvision/detect.py
index 9373cf1..b87697e 100644
--- a/edgetpuvision/detect.py
+++ b/edgetpuvision/detect.py
@@ -17,14 +17,22 @@
from edgetpu.detection.engine import DetectionEngine
-from . import gstreamer
+
from . import overlays
from .utils import load_labels, input_image_size, same_input_image_sizes
+from .gstreamer import Display, run_gen
def area(obj):
x0, y0, x1, y1 = rect = obj.bounding_box.flatten().tolist()
return (x1 - x0) * (y1 - y0)
+def print_results(inference_rate, objs, labels):
+ print('\nInference (rate=%.2f fps):' % inference_rate)
+ for i, obj in enumerate(objs):
+ label = labels[obj.label_id] if labels else str(obj.label_id)
+ x = (i, label) + tuple(obj.bounding_box.flatten()) + (area(obj),)
+ print(' %d: label=%s, bbox=(%.2f %.2f %.2f %.2f), bbox_area=%.2f' % x)
+
def render_gen(args):
engines = [DetectionEngine(m) for m in args.model.split(',')]
assert same_input_image_sizes(engines)
@@ -50,6 +58,9 @@
objs = [obj for obj in objs if args.min_area <= area(obj) <= args.max_area]
+ if args.print:
+ print_results(inference_rate, objs, labels)
+
output = overlays.detection(objs, labels, inference_time, inference_rate, size, window)
else:
output = None
@@ -59,7 +70,6 @@
elif command == 'n':
engine = next(engines)
-
def main():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--source',
@@ -79,15 +89,18 @@
help='Min bounding box area')
parser.add_argument('--max_area', type=float, default=1.0,
help='Max bounding box area')
- parser.add_argument('--filter', default=None)
- parser.add_argument('--fullscreen', default=False, action='store_true',
- help='Fullscreen rendering')
+ parser.add_argument('--filter', default=None,
+ help='Comma-separated list of allowed labels')
+ parser.add_argument('--display', type=Display, choices=Display, default=Display.FULLSCREEN,
+ help='Display mode')
+ parser.add_argument('--print', default=False, action='store_true',
+ help='Print inference results')
args = parser.parse_args()
- if not gstreamer.run_gen(render_gen(args),
- source=args.source,
- downscale=args.downscale,
- fullscreen=args.fullscreen):
+ if not run_gen(render_gen(args),
+ source=args.source,
+ downscale=args.downscale,
+ display=args.display):
print('Invalid source argument:', args.source)
diff --git a/edgetpuvision/gstreamer.py b/edgetpuvision/gstreamer.py
index 7266845..2d67540 100644
--- a/edgetpuvision/gstreamer.py
+++ b/edgetpuvision/gstreamer.py
@@ -1,5 +1,6 @@
import collections
import contextlib
+import enum
import fcntl
import functools
import os
@@ -33,6 +34,14 @@
COMMAND_SAVE_FRAME = ' '
COMMAND_PRINT_INFO = 'p'
+class Display(enum.Enum):
+ FULLSCREEN = 'fullscreen'
+ WINDOW = 'window'
+ NONE = 'none'
+
+ def __str__(self):
+ return self.value
+
def set_nonblocking(fd):
flags = fcntl.fcntl(fd, fcntl.F_GETFL)
return fcntl.fcntl(fd, fcntl.F_SETFL, flags | os.O_NONBLOCK)
@@ -198,23 +207,24 @@
inference_rate=inference_rate,
command=custom_command)
overlay = pipeline.get_by_name('overlay')
- overlay.set_property('data', svg)
+ if overlay:
+ overlay.set_property('data', svg)
if save_frame:
images.put((data, caps_size(sample.get_caps()), svg))
return Gst.FlowReturn.OK
-def run_gen(render_overlay_gen, *, source, downscale, fullscreen):
+def run_gen(render_overlay_gen, *, source, downscale, display):
inference_size = render_overlay_gen.send(None) # Initialize.
return run(inference_size,
lambda tensor, size, window, inference_rate, command:
render_overlay_gen.send((tensor, size, window, inference_rate, command)),
source=source,
downscale=downscale,
- fullscreen=fullscreen)
+ display=display)
-def run(inference_size, render_overlay, *, source, downscale, fullscreen):
+def run(inference_size, render_overlay, *, source, downscale, display):
reg = Gst.Registry.get()
for feature in reg.get_feature_list_by_plugin('vpu.imx'):
# Otherwise decodebin uses vpudec to decode JPEG images and fails.
@@ -222,7 +232,7 @@
fmt = parse_format(source)
if fmt:
- run_camera(inference_size, render_overlay, fmt, fullscreen)
+ run_camera(inference_size, render_overlay, fmt, display)
return True
filename = os.path.expanduser(source)
@@ -230,28 +240,41 @@
run_file(inference_size, render_overlay,
filename=filename,
downscale=downscale,
- fullscreen=fullscreen)
+ display=display)
return True
return False
-def run_camera(inference_size, render_overlay, fmt, fullscreen):
- inference_size = Size(*inference_size)
- camera = v4l2_camera(fmt)
- caps = next(x for x in camera if isinstance(x, Caps))
- render_size = Size(caps.width, caps.height)
- pipeline = camera + camera_display_pipeline(render_size, inference_size, fullscreen)
+def run_camera(inference_size, render_overlay, fmt, display):
+ inference_size = Size(*inference_size)
+ render_size = fmt.size
+
+ if display is Display.NONE:
+ pipeline = camera_headless_pipeline(fmt, render_size, inference_size)
+ else:
+ pipeline = camera_display_pipeline(fmt, render_size, inference_size,
+ display is Display.FULLSCREEN)
+
return run_loop(pipeline, inference_size, render_size, render_overlay)
-def run_file(inference_size, render_overlay, *, filename, downscale, fullscreen):
+
+def run_file(inference_size, render_overlay, *, filename, downscale, display):
inference_size = Size(*inference_size)
info = get_video_info(filename)
render_size = Size(info.get_width(), info.get_height()) / downscale
- if info.is_image():
- pipeline = image_display_pipeline(filename, render_size, inference_size, fullscreen)
+
+ if display is Display.NONE:
+ if info.is_image():
+ pipeline = image_headless_pipeline(filename, render_size, inference_size)
+ else:
+ pipeline = video_headless_pipeline(filename, render_size, inference_size)
else:
- pipeline = video_display_pipeline(filename, render_size, inference_size, fullscreen)
+ fullscreen = display is Display.FULLSCREEN
+ if info.is_image():
+ pipeline = image_display_pipeline(filename, render_size, inference_size, fullscreen)
+ else:
+ pipeline = video_display_pipeline(filename, render_size, inference_size, fullscreen)
return run_loop(pipeline, inference_size, render_size, render_overlay)
diff --git a/edgetpuvision/pipelines.py b/edgetpuvision/pipelines.py
index 4c20d63..fb06a4a 100644
--- a/edgetpuvision/pipelines.py
+++ b/edgetpuvision/pipelines.py
@@ -1,5 +1,19 @@
from .gst import *
+def image_file(filename):
+ return (
+ Filter('filesrc', location=filename),
+ Filter('decodebin'),
+ )
+
+def video_file(filename):
+ return (
+ Filter('filesrc', location=filename),
+ Filter('qtdemux'),
+ Filter('h264parse'),
+ Filter('vpudec'),
+ )
+
def v4l2_camera(fmt):
return (
Filter('v4l2src', device=fmt.device),
@@ -33,26 +47,25 @@
Filter('appsink', name='appsink', emit_signals=True, max_buffers=1, drop=True, sync=False)
)
+# Display
def image_display_pipeline(filename, render_size, inference_size, fullscreen):
size = max_inner_size(render_size, inference_size)
return (
- Filter('filesrc', location=filename),
- Filter('decodebin'),
- Filter('videoconvert'),
- Caps('video/x-raw', format='RGB'),
- Filter('imagefreeze'),
+ image_file(filename),
Tee(pads=((
- Queue(max_size_buffers=1),
+ Queue(),
+ Filter('imagefreeze'),
Filter('videoconvert'),
Filter('videoscale'),
- Filter('rsvgoverlay', name='overlay'),
Caps('video/x-raw', width=render_size.width, height=render_size.height),
+ Filter('rsvgoverlay', name='overlay'),
display_sink(fullscreen),
),(
- Queue(max_size_buffers=1),
+ Queue(),
+ Filter('imagefreeze'),
Filter('videoconvert'),
Filter('videoscale'),
- Caps('video/x-raw', width=size.width, height=size.height),
+ Caps('video/x-raw', format='RGB', width=size.width, height=size.height),
Filter('videobox', autocrop=True),
Caps('video/x-raw', width=inference_size.width, height=inference_size.height),
Filter('appsink', name='appsink', emit_signals=True, max_buffers=1, drop=True, sync=False)
@@ -61,10 +74,7 @@
def video_display_pipeline(filename, render_size, inference_size, fullscreen):
return (
- Filter('filesrc', location=filename),
- Filter('qtdemux'),
- Filter('h264parse'),
- Filter('vpudec'),
+ video_file(filename),
Filter('glupload'),
Tee(pads=((
Queue(max_size_buffers=1),
@@ -78,9 +88,9 @@
)))
)
-def camera_display_pipeline(render_size, inference_size, fullscreen):
+def camera_display_pipeline(fmt, render_size, inference_size, fullscreen):
return (
- # TODO(dkovalev): Queue(max_size_buffers=1, leaky='downstream'),
+ v4l2_camera(fmt),
Filter('glupload'),
Tee(pads=((
Queue(max_size_buffers=1, leaky='downstream'),
@@ -93,6 +103,30 @@
)))
)
+# Headless
+def image_headless_pipeline(filename, render_size, inference_size):
+ return (
+ image_file(filename),
+ Filter('imagefreeze'),
+ Filter('glupload'),
+ inference_pipeline(render_size, inference_size),
+ )
+
+def video_headless_pipeline(filename, render_size, inference_size):
+ return (
+ video_file(filename),
+ Filter('glupload'),
+ inference_pipeline(render_size, inference_size),
+ )
+
+def camera_headless_pipeline(fmt, render_size, inference_size):
+ return (
+ v4l2_camera(fmt),
+ Filter('glupload'),
+ inference_pipeline(render_size, inference_size),
+ )
+
+# Streaming
def video_streaming_pipeline(filename, render_size, inference_size):
return (
Filter('filesrc', location=filename),
@@ -100,23 +134,21 @@
Tee(pads=((
Queue(max_size_buffers=1),
Filter('h264parse'),
- Filter('vpudec'),
- inference_pipeline(render_size, inference_size),
+ Caps('video/x-h264', stream_format='byte-stream', alignment='nal'),
+ h264_sink()
), (
Queue(max_size_buffers=1),
Filter('h264parse'),
- Caps('video/x-h264', stream_format='byte-stream', alignment='nal'),
- h264_sink()
+ Filter('vpudec'),
+ inference_pipeline(render_size, inference_size),
)))
)
-def camera_streaming_pipeline(profile, bitrate, render_size, inference_size):
+def camera_streaming_pipeline(fmt, profile, bitrate, render_size, inference_size):
size = max_inner_size(render_size, inference_size)
return (
+ v4l2_camera(fmt),
Tee(pads=((
- Queue(),
- inference_pipeline(render_size, inference_size)
- ), (
Queue(max_size_buffers=1, leaky='downstream'),
Filter('videoconvert'),
Filter('x264enc',
@@ -130,5 +162,8 @@
Filter('h264parse'),
Caps('video/x-h264', stream_format='byte-stream', alignment='nal'),
h264_sink()
+ ), (
+ Queue(),
+ inference_pipeline(render_size, inference_size)
)))
)
\ No newline at end of file