Add headless gstreamer pipelines.

Change-Id: I5ed5fa8cfa175e6207bcf3972142783df2e6d759
diff --git a/edgetpuvision/camera.py b/edgetpuvision/camera.py
index 61f83ee..4bc6249 100644
--- a/edgetpuvision/camera.py
+++ b/edgetpuvision/camera.py
@@ -70,10 +70,8 @@
         self._fmt = fmt
 
     def make_pipeline(self, fmt, profile, inline_headers, bitrate, intra_period):
-        return (
-            pipelines.v4l2_camera(self._fmt),
-            pipelines.camera_streaming_pipeline(profile, bitrate, self._render_size, self._inference_size)
-        )
+        return pipelines.camera_streaming_pipeline(self._fmt, profile, bitrate,
+                                                   self._render_size, self._inference_size)
 
 def make_camera(source, inference_size):
     fmt = parse_format(source)
diff --git a/edgetpuvision/classify.py b/edgetpuvision/classify.py
index 4e86077..247e27a 100644
--- a/edgetpuvision/classify.py
+++ b/edgetpuvision/classify.py
@@ -13,9 +13,9 @@
 
 from edgetpu.classification.engine import ClassificationEngine
 
-from . import gstreamer
 from . import overlays
 from .utils import load_labels, input_image_size, same_input_image_sizes
+from .gstreamer import Display, run_gen
 
 
 def top_results(window, top_k):
@@ -25,13 +25,17 @@
             total_scores[label] += score
     return sorted(total_scores.items(), key=lambda kv: kv[1], reverse=True)[:top_k]
 
-
 def accumulator(size, top_k):
     window = collections.deque(maxlen=size)
     window.append((yield []))
     while True:
         window.append((yield top_results(window, top_k)))
 
+def print_results(inference_rate, results):
+    print('\nInference (rate=%.2f fps):' % inference_rate)
+    print(results)
+    for label, score in results:
+        print('  %s, score=%.2f' % (label, score))
 
 def render_gen(args):
     acc = accumulator(size=args.window, top_k=args.top_k)
@@ -59,7 +63,7 @@
             results = [(labels[i], score) for i, score in results]
             results = acc.send(results)
             if args.print:
-                print(results)
+                print_results(inference_rate, results)
 
             output = overlays.classification(results, inference_time, inference_rate, size, window)
         else:
@@ -70,7 +74,6 @@
         elif command == 'n':
             engine = next(engines)
 
-
 def main():
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('--source',
@@ -88,16 +91,16 @@
                         help='number of classes with highest score to display')
     parser.add_argument('--threshold', type=float, default=0.1,
                         help='class score threshold')
-    parser.add_argument('--print', action='store_true', default=False,
-                        help='Print detected classes to console')
-    parser.add_argument('--fullscreen', default=False, action='store_true',
-                        help='Fullscreen rendering')
+    parser.add_argument('--display', type=Display, choices=Display, default=Display.FULLSCREEN,
+                        help='Display mode')
+    parser.add_argument('--print', default=False, action='store_true',
+                        help='Print inference results')
     args = parser.parse_args()
 
-    if not gstreamer.run_gen(render_gen(args),
-                         source=args.source,
-                         downscale=args.downscale,
-                         fullscreen=args.fullscreen):
+    if not run_gen(render_gen(args),
+                   source=args.source,
+                   downscale=args.downscale,
+                   display=args.display):
         print('Invalid source argument:', args.source)
 
 if __name__ == '__main__':
diff --git a/edgetpuvision/detect.py b/edgetpuvision/detect.py
index 9373cf1..b87697e 100644
--- a/edgetpuvision/detect.py
+++ b/edgetpuvision/detect.py
@@ -17,14 +17,22 @@
 
 from edgetpu.detection.engine import DetectionEngine
 
-from . import gstreamer
+
 from . import overlays
 from .utils import load_labels, input_image_size, same_input_image_sizes
+from .gstreamer import Display, run_gen
 
 def area(obj):
     x0, y0, x1, y1 = rect = obj.bounding_box.flatten().tolist()
     return (x1 - x0) * (y1 - y0)
 
+def print_results(inference_rate, objs, labels):
+    print('\nInference (rate=%.2f fps):' % inference_rate)
+    for i, obj in enumerate(objs):
+        label = labels[obj.label_id] if labels else str(obj.label_id)
+        x = (i, label) + tuple(obj.bounding_box.flatten()) + (area(obj),)
+        print('    %d: label=%s, bbox=(%.2f %.2f %.2f %.2f), bbox_area=%.2f' % x)
+
 def render_gen(args):
     engines = [DetectionEngine(m) for m in args.model.split(',')]
     assert same_input_image_sizes(engines)
@@ -50,6 +58,9 @@
 
             objs = [obj for obj in objs if args.min_area <= area(obj) <= args.max_area]
 
+            if args.print:
+                print_results(inference_rate, objs, labels)
+
             output = overlays.detection(objs, labels, inference_time, inference_rate, size, window)
         else:
             output = None
@@ -59,7 +70,6 @@
         elif command == 'n':
             engine = next(engines)
 
-
 def main():
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('--source',
@@ -79,15 +89,18 @@
                         help='Min bounding box area')
     parser.add_argument('--max_area', type=float, default=1.0,
                         help='Max bounding box area')
-    parser.add_argument('--filter', default=None)
-    parser.add_argument('--fullscreen', default=False, action='store_true',
-                        help='Fullscreen rendering')
+    parser.add_argument('--filter', default=None,
+                        help='Comma-separated list of allowed labels')
+    parser.add_argument('--display', type=Display, choices=Display, default=Display.FULLSCREEN,
+                        help='Display mode')
+    parser.add_argument('--print', default=False, action='store_true',
+                        help='Print inference results')
     args = parser.parse_args()
 
-    if not gstreamer.run_gen(render_gen(args),
-                          source=args.source,
-                          downscale=args.downscale,
-                          fullscreen=args.fullscreen):
+    if not run_gen(render_gen(args),
+                   source=args.source,
+                   downscale=args.downscale,
+                   display=args.display):
         print('Invalid source argument:', args.source)
 
 
diff --git a/edgetpuvision/gstreamer.py b/edgetpuvision/gstreamer.py
index 7266845..2d67540 100644
--- a/edgetpuvision/gstreamer.py
+++ b/edgetpuvision/gstreamer.py
@@ -1,5 +1,6 @@
 import collections
 import contextlib
+import enum
 import fcntl
 import functools
 import os
@@ -33,6 +34,14 @@
 COMMAND_SAVE_FRAME = ' '
 COMMAND_PRINT_INFO = 'p'
 
+class Display(enum.Enum):
+    FULLSCREEN = 'fullscreen'
+    WINDOW = 'window'
+    NONE = 'none'
+
+    def __str__(self):
+        return self.value
+
 def set_nonblocking(fd):
     flags = fcntl.fcntl(fd, fcntl.F_GETFL)
     return fcntl.fcntl(fd, fcntl.F_SETFL, flags | os.O_NONBLOCK)
@@ -198,23 +207,24 @@
                              inference_rate=inference_rate,
                              command=custom_command)
         overlay = pipeline.get_by_name('overlay')
-        overlay.set_property('data', svg)
+        if overlay:
+            overlay.set_property('data', svg)
 
         if save_frame:
             images.put((data, caps_size(sample.get_caps()), svg))
 
     return Gst.FlowReturn.OK
 
-def run_gen(render_overlay_gen, *, source, downscale, fullscreen):
+def run_gen(render_overlay_gen, *, source, downscale, display):
     inference_size = render_overlay_gen.send(None)  # Initialize.
     return run(inference_size,
         lambda tensor, size, window, inference_rate, command:
             render_overlay_gen.send((tensor, size, window, inference_rate, command)),
         source=source,
         downscale=downscale,
-        fullscreen=fullscreen)
+        display=display)
 
-def run(inference_size, render_overlay, *, source, downscale, fullscreen):
+def run(inference_size, render_overlay, *, source, downscale, display):
     reg = Gst.Registry.get()
     for feature in reg.get_feature_list_by_plugin('vpu.imx'):
         # Otherwise decodebin uses vpudec to decode JPEG images and fails.
@@ -222,7 +232,7 @@
 
     fmt = parse_format(source)
     if fmt:
-        run_camera(inference_size, render_overlay, fmt, fullscreen)
+        run_camera(inference_size, render_overlay, fmt, display)
         return True
 
     filename = os.path.expanduser(source)
@@ -230,28 +240,41 @@
         run_file(inference_size, render_overlay,
                  filename=filename,
                  downscale=downscale,
-                 fullscreen=fullscreen)
+                 display=display)
         return True
 
     return False
 
-def run_camera(inference_size, render_overlay, fmt, fullscreen):
-    inference_size = Size(*inference_size)
 
-    camera = v4l2_camera(fmt)
-    caps = next(x for x in camera if isinstance(x, Caps))
-    render_size = Size(caps.width, caps.height)
-    pipeline = camera + camera_display_pipeline(render_size, inference_size, fullscreen)
+def run_camera(inference_size, render_overlay, fmt, display):
+    inference_size = Size(*inference_size)
+    render_size = fmt.size
+
+    if display is Display.NONE:
+        pipeline = camera_headless_pipeline(fmt, render_size, inference_size)
+    else:
+        pipeline = camera_display_pipeline(fmt, render_size, inference_size,
+                                           display is Display.FULLSCREEN)
+
     return run_loop(pipeline, inference_size, render_size, render_overlay)
 
-def run_file(inference_size, render_overlay, *, filename, downscale, fullscreen):
+
+def run_file(inference_size, render_overlay, *, filename, downscale, display):
     inference_size = Size(*inference_size)
     info = get_video_info(filename)
     render_size = Size(info.get_width(), info.get_height()) / downscale
-    if info.is_image():
-        pipeline = image_display_pipeline(filename, render_size, inference_size, fullscreen)
+
+    if display is Display.NONE:
+        if info.is_image():
+            pipeline = image_headless_pipeline(filename, render_size, inference_size)
+        else:
+            pipeline = video_headless_pipeline(filename, render_size, inference_size)
     else:
-        pipeline = video_display_pipeline(filename, render_size, inference_size, fullscreen)
+        fullscreen = display is Display.FULLSCREEN
+        if info.is_image():
+            pipeline = image_display_pipeline(filename, render_size, inference_size, fullscreen)
+        else:
+            pipeline = video_display_pipeline(filename, render_size, inference_size, fullscreen)
 
     return run_loop(pipeline, inference_size, render_size, render_overlay)
 
diff --git a/edgetpuvision/pipelines.py b/edgetpuvision/pipelines.py
index 4c20d63..fb06a4a 100644
--- a/edgetpuvision/pipelines.py
+++ b/edgetpuvision/pipelines.py
@@ -1,5 +1,19 @@
 from .gst import *
 
+def image_file(filename):
+    return (
+        Filter('filesrc', location=filename),
+        Filter('decodebin'),
+    )
+
+def video_file(filename):
+    return (
+        Filter('filesrc', location=filename),
+        Filter('qtdemux'),
+        Filter('h264parse'),
+        Filter('vpudec'),
+    )
+
 def v4l2_camera(fmt):
     return (
         Filter('v4l2src', device=fmt.device),
@@ -33,26 +47,25 @@
         Filter('appsink', name='appsink', emit_signals=True, max_buffers=1, drop=True, sync=False)
     )
 
+# Display
 def image_display_pipeline(filename, render_size, inference_size, fullscreen):
     size = max_inner_size(render_size, inference_size)
     return (
-        Filter('filesrc', location=filename),
-        Filter('decodebin'),
-        Filter('videoconvert'),
-        Caps('video/x-raw', format='RGB'),
-        Filter('imagefreeze'),
+        image_file(filename),
         Tee(pads=((
-            Queue(max_size_buffers=1),
+            Queue(),
+            Filter('imagefreeze'),
             Filter('videoconvert'),
             Filter('videoscale'),
-            Filter('rsvgoverlay', name='overlay'),
             Caps('video/x-raw', width=render_size.width, height=render_size.height),
+            Filter('rsvgoverlay', name='overlay'),
             display_sink(fullscreen),
         ),(
-            Queue(max_size_buffers=1),
+            Queue(),
+            Filter('imagefreeze'),
             Filter('videoconvert'),
             Filter('videoscale'),
-            Caps('video/x-raw', width=size.width, height=size.height),
+            Caps('video/x-raw', format='RGB', width=size.width, height=size.height),
             Filter('videobox', autocrop=True),
             Caps('video/x-raw', width=inference_size.width, height=inference_size.height),
             Filter('appsink', name='appsink', emit_signals=True, max_buffers=1, drop=True, sync=False)
@@ -61,10 +74,7 @@
 
 def video_display_pipeline(filename, render_size, inference_size, fullscreen):
     return (
-        Filter('filesrc', location=filename),
-        Filter('qtdemux'),
-        Filter('h264parse'),
-        Filter('vpudec'),
+        video_file(filename),
         Filter('glupload'),
         Tee(pads=((
             Queue(max_size_buffers=1),
@@ -78,9 +88,9 @@
         )))
     )
 
-def camera_display_pipeline(render_size, inference_size, fullscreen):
+def camera_display_pipeline(fmt, render_size, inference_size, fullscreen):
     return (
-        # TODO(dkovalev): Queue(max_size_buffers=1, leaky='downstream'),
+        v4l2_camera(fmt),
         Filter('glupload'),
         Tee(pads=((
             Queue(max_size_buffers=1, leaky='downstream'),
@@ -93,6 +103,30 @@
         )))
     )
 
+# Headless
+def image_headless_pipeline(filename, render_size, inference_size):
+    return (
+      image_file(filename),
+      Filter('imagefreeze'),
+      Filter('glupload'),
+      inference_pipeline(render_size, inference_size),
+    )
+
+def video_headless_pipeline(filename, render_size, inference_size):
+    return (
+        video_file(filename),
+        Filter('glupload'),
+        inference_pipeline(render_size, inference_size),
+    )
+
+def camera_headless_pipeline(fmt, render_size, inference_size):
+    return (
+        v4l2_camera(fmt),
+        Filter('glupload'),
+        inference_pipeline(render_size, inference_size),
+    )
+
+# Streaming
 def video_streaming_pipeline(filename, render_size, inference_size):
     return (
         Filter('filesrc', location=filename),
@@ -100,23 +134,21 @@
         Tee(pads=((
           Queue(max_size_buffers=1),
           Filter('h264parse'),
-          Filter('vpudec'),
-          inference_pipeline(render_size, inference_size),
+          Caps('video/x-h264', stream_format='byte-stream', alignment='nal'),
+          h264_sink()
         ), (
           Queue(max_size_buffers=1),
           Filter('h264parse'),
-          Caps('video/x-h264', stream_format='byte-stream', alignment='nal'),
-          h264_sink()
+          Filter('vpudec'),
+          inference_pipeline(render_size, inference_size),
         )))
     )
 
-def camera_streaming_pipeline(profile, bitrate, render_size, inference_size):
+def camera_streaming_pipeline(fmt, profile, bitrate, render_size, inference_size):
     size = max_inner_size(render_size, inference_size)
     return (
+        v4l2_camera(fmt),
         Tee(pads=((
-          Queue(),
-          inference_pipeline(render_size, inference_size)
-        ), (
           Queue(max_size_buffers=1, leaky='downstream'),
           Filter('videoconvert'),
           Filter('x264enc',
@@ -130,5 +162,8 @@
           Filter('h264parse'),
           Caps('video/x-h264', stream_format='byte-stream', alignment='nal'),
           h264_sink()
+        ), (
+          Queue(),
+          inference_pipeline(render_size, inference_size)
         )))
     )
\ No newline at end of file