Add new edgetpuvision features.
* Better gstreamer pipelines
* Inference on image files
* Streaming from .mp4 file
* Streaming from any v4l2 source
* Switching between .tflite models in runtime
* tpudemo script
* Simplified API based on python generators
Change-Id: I95b8658977fe586ff5f595e75c84ee2e2c46756f
diff --git a/.gitignore b/.gitignore
index 5ce6616..52b8614 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
-dist
build
+dist
+__pycache__
*.egg-info
+.DS_Store
diff --git a/bin/edgetpu_demo b/bin/edgetpu_demo
new file mode 100755
index 0000000..fbab807
--- /dev/null
+++ b/bin/edgetpu_demo
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+#readonly VIDEO_FILE=""
+readonly EDGETPU_DIR=$(python3 -c 'import edgetpu; import os; print(os.path.dirname(edgetpu.__file__))')
+readonly TEST_DATA_DIR="${EDGETPU_DIR}/test_data"
+readonly TPU_MODEL_FILE="${TEST_DATA}/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite"
+readonly LABELS_FILE="${TEST_DATA}/coco_labels.txt"
+
+if [ "$1" = "--device" ]; then
+ python3 -m edgetpuvision.detect \
+ --source "${VIDEO_FILE}" \
+ --model="${TPU_MODEL_FILE}" \
+ --labels="${LABELS_FILE}" \
+ --fullscreen
+elif [ "$1" = "--stream" ]; then
+ python3 -m edgetpuvision.detect_server \
+ --source "${VIDEO_FILE}" \
+ --model="${TPU_MODEL_FILE}" \
+ --labels="${LABELS_FILE}"
+else
+ echo "Run on-device inference:"
+ echo " $0 --device"
+ echo "Run streaming server:"
+ echo " $0 --stream"
+fi
diff --git a/edgetpuvision/camera.py b/edgetpuvision/camera.py
index d02c952..bcaa9b3 100644
--- a/edgetpuvision/camera.py
+++ b/edgetpuvision/camera.py
@@ -1,3 +1,4 @@
+import os
import threading
import numpy as np
@@ -5,77 +6,14 @@
from . import gstreamer
from .gst import *
-
-def inference_pipeline(render_size, inference_size):
- size = max_inner_size(render_size, inference_size)
- return (
- Filter('glfilterbin', filter='glcolorscale'),
- Caps('video/x-raw', format='RGBA', width=size.width, height=size.height),
- Filter('videoconvert'),
- Caps('video/x-raw', format='RGB', width=size.width, height=size.height),
- Filter('videobox', autocrop=True),
- Caps('video/x-raw', width=inference_size.width, height=inference_size.height),
- Filter('appsink', name='appsink', emit_signals=True, max_buffers=1, drop=True, sync=False)
- )
-
-
-def file_streaming_pipeline(filename, render_size, inference_size):
- return (
- Filter('filesrc', location=filename),
- Filter('qtdemux'),
- Filter('h264parse', config_interval=-1),
- Caps('video/x-h264', stream_format='byte-stream', profile='baseline', alignment='nal'),
- Tee(pins=((
- Queue(),
- Filter('vpudec'),
- inference_pipeline(render_size, inference_size),
- ), (
- Queue(),
- Filter('appsink', name='h264sink', emit_signals=True, max_buffers=1, drop=False, sync=False),
- )))
- )
-
-
-def camera_streaming_pipeline(render_size, inference_size, profile, bitrate):
- size = max_inner_size(render_size, inference_size)
- return (
- Filter('v4l2src', device='/dev/video1'),
- Caps('video/x-raw', format='YUY2', width=640, height=360, framerate='15/1'),
- Tee(pins=((
- Queue(),
- inference_pipeline(render_size, inference_size)
- ), (
- Queue(),
- Filter('videoconvert'),
- Filter('x264enc',
- speed_preset='ultrafast',
- tune='zerolatency',
- threads=4,
- key_int_max=5,
- bitrate=int(bitrate / 1000), # kbit per second.
- aud=False),
- Caps('video/x-h264', profile=profile),
- Filter('h264parse'),
- Caps('video/x-h264', stream_format='byte-stream', alignment='nal'),
- Filter('appsink', name='h264sink', emit_signals=True, max_buffers=1, drop=False, sync=False),
- # Tee(pins=((
- # Queue(),
- # Filter('appsink', name='h264sink', emit_signals=True, max_buffers=1, drop=False, sync=False)
- # ),(
- # Queue(),
- # Filter('vpudec'),
- # Filter('kmssink', sync=False)
- # )))
- )))
- )
-
-
-class InferenceCamera:
+class Camera:
def __init__(self, render_size, inference_size):
self._render_size = Size(*render_size)
self._inference_size = Size(*inference_size)
+
self._loop = gstreamer.loop()
self._thread = None
+
self.on_image = None
@property
@@ -87,7 +25,7 @@
def start_recording(self, obj, format, profile, inline_headers, bitrate, intra_period):
size = min_outer_size(self._inference_size, self._render_size)
- view_box = center_inside(self._render_size, size)
+ window = center_inside(self._render_size, size)
fps_counter = gstreamer.avg_fps_counter(30)
def on_buffer(data, _):
@@ -95,15 +33,14 @@
def on_image(data, _):
if self.on_image:
- self.on_image(np.frombuffer(data, dtype=np.uint8), next(fps_counter), size, view_box)
+ self.on_image(np.frombuffer(data, dtype=np.uint8), next(fps_counter), size, window)
signals = {
'h264sink': {'new-sample': gstreamer.new_sample_callback(on_buffer)},
'appsink': {'new-sample': gstreamer.new_sample_callback(on_image)},
}
- pipeline = camera_streaming_pipeline(self._render_size, self._inference_size,
- profile=profile, bitrate=bitrate)
+ pipeline = self.make_pipeline(format, profile, inline_headers, bitrate, intra_period)
self._thread = threading.Thread(target=gstreamer.run_pipeline,
args=(self._loop, pipeline, signals))
@@ -112,3 +49,37 @@
def stop_recording(self):
self._loop.quit()
self._thread.join()
+
+ def make_pipeline(self, fmt, profile, inline_headers, bitrate, intra_period):
+ raise NotImplemented
+
+class FileCamera(Camera):
+ def __init__(self, filename, inference_size):
+ info = gstreamer.get_video_info(filename)
+ super().__init__((info.get_width(), info.get_height()), inference_size)
+ self._filename = filename
+
+ def make_pipeline(self, fmt, profile, inline_headers, bitrate, intra_period):
+ return gstreamer.file_streaming_pipeline(self._filename, self._render_size, self._inference_size)
+
+class V4L2Camera(Camera):
+ def __init__(self, fmt, inference_size):
+ super().__init__(fmt.size, inference_size)
+ self._fmt = fmt
+
+ def make_pipeline(self, fmt, profile, inline_headers, bitrate, intra_period):
+ return (
+ gstreamer.v4l2_camera(self._fmt),
+ gstreamer.camera_streaming_pipeline(profile, bitrate, self._render_size, self._inference_size)
+ )
+
+def make_camera(source, inference_size):
+ fmt = parse_format(source)
+ if fmt:
+ return V4L2Camera(fmt, inference_size)
+
+ filename = os.path.expanduser(source)
+ if os.path.isfile(filename):
+ return FileCamera(filename, inference_size)
+
+ return None
diff --git a/edgetpuvision/classify.py b/edgetpuvision/classify.py
index 2e3a642..7aec55c 100644
--- a/edgetpuvision/classify.py
+++ b/edgetpuvision/classify.py
@@ -8,13 +8,14 @@
import argparse
import collections
+import itertools
import time
from edgetpu.classification.engine import ClassificationEngine
from . import gstreamer
from . import overlays
-from .utils import load_labels
+from .utils import load_labels, input_image_size, same_input_image_sizes
def top_results(window, top_k):
@@ -32,6 +33,44 @@
window.append((yield top_results(window, top_k)))
+def render_gen(args):
+ acc = accumulator(size=args.window, top_k=args.top_k)
+ acc.send(None) # Initialize.
+
+ engines = [ClassificationEngine(m) for m in args.model.split(',')]
+ assert same_input_image_sizes(engines)
+ engines = itertools.cycle(engines)
+ engine = next(engines)
+
+ labels = load_labels(args.labels)
+ draw_overlay = True
+
+ yield input_image_size(engine)
+
+ output = None
+ while True:
+ tensor, size, window, inference_rate, command = (yield output)
+
+ if draw_overlay:
+ start = time.monotonic()
+ results = engine.ClassifyWithInputTensor(tensor, threshold=args.threshold, top_k=args.top_k)
+ inference_time = time.monotonic() - start
+
+ results = [(labels[i], score) for i, score in results]
+ results = acc.send(results)
+ if args.print:
+ print(results)
+
+ output = overlays.classification(results, inference_time, inference_rate, size, window)
+ else:
+ output = None
+
+ if command == 'o':
+ draw_overlay = not draw_overlay
+ elif command == 'n':
+ engine = next(engines)
+
+
def main():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--source',
@@ -55,27 +94,7 @@
help='Fullscreen rendering.')
args = parser.parse_args()
- engine = ClassificationEngine(args.model)
- labels = load_labels(args.labels)
-
- acc = accumulator(size=args.window, top_k=args.top_k)
- acc.send(None) # Initialize.
-
- def render_overlay(rgb, size, view_box, inference_fps):
- start = time.monotonic()
- results = engine.ClassifyWithInputTensor(rgb, threshold=args.threshold, top_k=args.top_k)
- inference_time = time.monotonic() - start
-
- results = [(labels[i], score) for i, score in results]
- results = acc.send(results)
- if args.print:
- print(results)
-
- return overlays.classification(results, inference_time, inference_fps, size, view_box)
-
- _, h, w, _ = engine.get_input_tensor_shape()
-
- if not gstreamer.run((w, h), render_overlay,
+ if not gstreamer.run_gen(render_gen(args),
source=args.source,
downscale=args.downscale,
fullscreen=args.fullscreen):
diff --git a/edgetpuvision/classify_server.py b/edgetpuvision/classify_server.py
index 98cc098..65695c2 100644
--- a/edgetpuvision/classify_server.py
+++ b/edgetpuvision/classify_server.py
@@ -14,15 +14,18 @@
from edgetpu.classification.engine import ClassificationEngine
from . import overlays
-from .camera import InferenceCamera
+from .camera import make_camera
from .streaming.server import StreamingServer
-from .utils import load_labels
+from .utils import load_labels, input_image_size
def main():
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ parser.add_argument('--source',
+ help='/dev/videoN:FMT:WxH:N/D or .mp4 file',
+ default='/dev/video0:YUY2:1280x720:30/1')
parser.add_argument('--model', required=True,
help='.tflite model path.')
parser.add_argument('--labels', required=True,
@@ -36,17 +39,17 @@
engine = ClassificationEngine(args.model)
labels = load_labels(args.labels)
- _, h, w, _ = engine.get_input_tensor_shape()
+ camera = make_camera(args.source, input_image_size(engine))
+ assert camera is not None
- camera = InferenceCamera((640, 360), (w, h))
with StreamingServer(camera) as server:
- def on_image(rgb, inference_fps, size, view_box):
+ def on_image(tensor, inference_fps, size, window):
start = time.monotonic()
- results = engine.ClassifyWithInputTensor(rgb, threshold=args.threshold, top_k=args.top_k)
+ results = engine.ClassifyWithInputTensor(tensor, threshold=args.threshold, top_k=args.top_k)
inference_time = time.monotonic() - start
results = [(labels[i], score) for i, score in results]
- server.send_overlay(overlays.classification(results, inference_time, inference_fps, size, view_box))
+ server.send_overlay(overlays.classification(results, inference_time, inference_fps, size, window))
camera.on_image = on_image
signal.pause()
diff --git a/edgetpuvision/detect.py b/edgetpuvision/detect.py
index afe2c1e..f46007d 100644
--- a/edgetpuvision/detect.py
+++ b/edgetpuvision/detect.py
@@ -12,13 +12,48 @@
# --labels=${TEST_DATA}/coco_labels.txt
import argparse
+import itertools
import time
from edgetpu.detection.engine import DetectionEngine
from . import gstreamer
from . import overlays
-from .utils import load_labels
+from .utils import load_labels, input_image_size, same_input_image_sizes
+
+def render_gen(args):
+ engines = [DetectionEngine(m) for m in args.model.split(',')]
+ assert same_input_image_sizes(engines)
+ engines = itertools.cycle(engines)
+ engine = next(engines)
+
+ labels = load_labels(args.labels) if args.labels else None
+ filtered_labels = set(l.strip() for l in args.filter.split(',')) if args.filter else None
+ draw_overlay = True
+
+ yield input_image_size(engine)
+
+ output = None
+ while True:
+ tensor, size, window, inference_rate, command = (yield output)
+
+ if draw_overlay:
+ start = time.monotonic()
+ objs = engine.DetectWithInputTensor(tensor, threshold=args.threshold, top_k=args.top_k)
+ inference_time = time.monotonic() - start
+
+ if labels and filtered_labels:
+ objs = [obj for obj in objs if labels[obj.label_id] in filtered_labels]
+
+ output = overlays.detection(objs, labels, inference_time, inference_rate, size, window)
+ else:
+ output = None
+
+ if command == 'o':
+ draw_overlay = not draw_overlay
+ elif command == 'n':
+ engine = next(engines)
+
def main():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
@@ -40,25 +75,10 @@
help='Fullscreen rendering.')
args = parser.parse_args()
- engine = DetectionEngine(args.model)
- labels = load_labels(args.labels) if args.labels else None
- filtered_labels = set(x.strip() for x in args.filter.split(',')) if args.filter else None
-
- def render_overlay(rgb, size, view_box, inference_fps):
- start = time.monotonic()
- objs = engine.DetectWithInputTensor(rgb, threshold=args.threshold, top_k=args.top_k)
- inference_time = time.monotonic() - start
- if labels and filtered_labels:
- objs = [obj for obj in objs if labels[obj.label_id] in filtered_labels]
-
- return overlays.detection(objs, inference_time, inference_fps, labels, size, view_box)
-
- _, h, w, _ = engine.get_input_tensor_shape()
-
- if not gstreamer.run((w, h), render_overlay,
- source=args.source,
- downscale=args.downscale,
- fullscreen=args.fullscreen):
+ if not gstreamer.run_gen(render_gen(args),
+ source=args.source,
+ downscale=args.downscale,
+ fullscreen=args.fullscreen):
print('Invalid source argument:', args.source)
diff --git a/edgetpuvision/detect_server.py b/edgetpuvision/detect_server.py
index e8c1ca8..afcb359 100644
--- a/edgetpuvision/detect_server.py
+++ b/edgetpuvision/detect_server.py
@@ -13,20 +13,24 @@
import argparse
import logging
+import os
import signal
import time
from edgetpu.detection.engine import DetectionEngine
from . import overlays
-from .camera import InferenceCamera
+from .camera import make_camera
from .streaming.server import StreamingServer
-from .utils import load_labels
+from .utils import load_labels, input_image_size
def main():
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ parser.add_argument('--source',
+ help='/dev/videoN:FMT:WxH:N/D or .mp4 file',
+ default='/dev/video0:YUY2:1280x720:30/1')
parser.add_argument('--model',
help='.tflite model path.', required=True)
parser.add_argument('--labels',
@@ -40,21 +44,21 @@
engine = DetectionEngine(args.model)
labels = load_labels(args.labels) if args.labels else None
- filtered_labels = set(x.strip() for x in args.filter.split(',')) if args.filter else None
+ filtered_labels = set(l.strip() for l in args.filter.split(',')) if args.filter else None
- _, h, w, _ = engine.get_input_tensor_shape()
+ camera = make_camera(args.source, input_image_size(engine))
+ assert camera is not None
- camera = InferenceCamera((640, 360), (w, h))
with StreamingServer(camera) as server:
- def on_image(rgb, inference_fps, size, view_box):
+ def on_image(tensor, inference_fps, size, window):
start = time.monotonic()
- objs = engine.DetectWithInputTensor(rgb, threshold=args.threshold, top_k=args.top_k)
+ objs = engine.DetectWithInputTensor(tensor, threshold=args.threshold, top_k=args.top_k)
inference_time = time.monotonic() - start
if labels and filtered_labels:
objs = [obj for obj in objs if labels[obj.label_id] in filtered_labels]
- server.send_overlay(overlays.detection(objs, inference_time, inference_fps, labels, size, view_box))
+ server.send_overlay(overlays.detection(objs, labels, inference_time, inference_fps, size, window))
camera.on_image = on_image
signal.pause()
diff --git a/edgetpuvision/gst.py b/edgetpuvision/gst.py
index 230eafa..7853e11 100644
--- a/edgetpuvision/gst.py
+++ b/edgetpuvision/gst.py
@@ -1,20 +1,34 @@
import collections
import itertools
+import re
__all__ = ('Filter', 'Queue', 'Caps', 'Tee',
- 'Size', 'Fraction',
- 'describe', 'max_inner_size', 'min_outer_size', 'center_inside')
+ 'Size', 'Fraction', 'Format',
+ 'describe', 'max_inner_size', 'min_outer_size', 'center_inside', 'parse_format')
-Fraction = collections.namedtuple('Fraction', ['num', 'den'])
+Fraction = collections.namedtuple('Fraction', ('num', 'den'))
Fraction.__str__ = lambda self: '%s/%s' % (self.num, self.den)
-Size = collections.namedtuple('Size', ['width', 'height'])
+Size = collections.namedtuple('Size', ('width', 'height'))
Size.__mul__ = lambda self, arg: Size(int(arg * self.width), int(arg * self.height))
Size.__rmul__ = lambda self, arg: Size(int(arg * self.width), int(arg * self.height))
Size.__floordiv__ = lambda self, arg: Size(self.width // arg, self.height // arg)
Size.__truediv__ = lambda self, arg: Size(int(self.width / arg), int(self.height / arg))
Size.__str__ = lambda self: '%dx%d' % self
+Format = collections.namedtuple('Format', ('device', 'pixel', 'size', 'framerate'))
+
+V4L2_DEVICE = re.compile(r'(?P<dev>[^:]+):(?P<fmt>[^:]+):(?P<w>\d+)x(?P<h>\d+):(?P<num>\d+)/(?P<den>\d+)')
+
+def parse_format(src):
+ match = V4L2_DEVICE.search(src)
+ if match:
+ return Format(device=match.group('dev'),
+ pixel=match.group('fmt'),
+ size=Size(int(match.group('w')), int(match.group('h'))),
+ framerate=Fraction(int(match.group('num')), int(match.group('den'))))
+ return None
+
def max_inner_size(what, where):
# Example: what=(800, 600) where=(300, 300) => (300, 225)
return what * min(where.width / what.width, where.height / what.height)
@@ -52,10 +66,10 @@
return self.params[name]
class Filter(Element):
- def __init__(self, filtername, pins=None, **params):
+ def __init__(self, filtername, pads=None, **params):
super().__init__(params)
self.filtername = filtername
- self.pins = pins
+ self.pads = pads
def __str__(self):
return join(self.filtername, ' ', self.params)
@@ -76,9 +90,9 @@
return join(self.mediatype, ',', self.params, ',')
class Tee(Element):
- def __init__(self, pins=None, **params):
+ def __init__(self, pads=None, **params):
super().__init__(params)
- self.pins = pins
+ self.pads = pads
self.params = params
def __str__(self):
@@ -93,13 +107,13 @@
elif isinstance(arg, Tee):
params = params_with_name(arg.params, 't', name_gens)
return join('tee', ' ', params) + '\n' + \
- '\n'.join('%s%s. ! %s' % (indent, params['name'], recur(x)) for x in arg.pins)
+ '\n'.join('%s%s. ! %s' % (indent, params['name'], recur(x)) for x in arg.pads)
elif isinstance(arg, Filter):
body = join(arg.filtername, ' ', arg.params)
- if arg.pins:
+ if arg.pads:
params = params_with_name(arg.params, 'f', name_gens)
return body + '\n' + \
- '\n'.join('%s%s.%s ! %s' % (indent, params['name'], pin_name, recur(x)) for pin_name, x in arg.pins.items())
+ '\n'.join('%s%s.%s ! %s' % (indent, params['name'], pad_name, recur(x)) for pad_name, x in arg.pads.items())
return body
elif isinstance(arg, Queue):
return join('queue', ' ', arg.params)
diff --git a/edgetpuvision/gstreamer.py b/edgetpuvision/gstreamer.py
index 43f561a..bd3cc5e 100644
--- a/edgetpuvision/gstreamer.py
+++ b/edgetpuvision/gstreamer.py
@@ -4,7 +4,6 @@
import functools
import os
import queue
-import re
import sys
import termios
import threading
@@ -30,6 +29,8 @@
from .gst import *
+COMMAND_SAVE_FRAME = ' '
+COMMAND_PRINT_INFO = 'p'
def set_nonblocking(fd):
flags = fcntl.fcntl(fd, fcntl.F_GETFL)
@@ -87,15 +88,13 @@
Filter('appsink', name='appsink', emit_signals=True, max_buffers=1, drop=True, sync=False)
)
-
-# TODO(dkovalev): Image as an input.
def image_file_pipeline(filename, render_size, inference_size, fullscreen):
size = max_inner_size(render_size, inference_size)
return (
Filter('filesrc', location=filename),
Filter('decodebin'),
Filter('imagefreeze'),
- Tee(pins=((
+ Tee(pads=((
Queue(max_size_buffers=1),
Filter('videoconvert'),
Filter('videoscale'),
@@ -113,7 +112,6 @@
)))
)
-
def video_file_pipeline(filename, render_size, inference_size, fullscreen):
return (
Filter('filesrc', location=filename),
@@ -121,31 +119,31 @@
Filter('h264parse'),
Filter('vpudec'),
Filter('glupload'),
- Tee(pins=((
+ Tee(pads=((
Queue(max_size_buffers=1),
Filter('glfilterbin', filter='glcolorscale'),
Filter('rsvgoverlay', name='overlay'),
Caps('video/x-raw', width=render_size.width, height=render_size.height),
sink(fullscreen),
),(
- Queue(max_size_buffers=1),
+ Queue(max_size_buffers=1, leaky='downstream'),
inference_pipeline(render_size, inference_size),
)))
)
# v4l2-ctl --list-formats-ext --device /dev/video1
-def v4l2_camera(device, fmt, size, framerate):
+def v4l2_camera(fmt):
return (
- Filter('v4l2src', device=device),
- Caps('video/x-raw', format=fmt, width=size.width, height=size.height,
- framerate='%d/%d' % framerate),
+ Filter('v4l2src', device=fmt.device),
+ Caps('video/x-raw', format=fmt.pixel, width=fmt.size.width, height=fmt.size.height,
+ framerate='%d/%d' % fmt.framerate),
)
def video_camera_pipeline(render_size, inference_size, fullscreen):
return (
# TODO(dkovalev): Queue(max_size_buffers=1, leaky='downstream'),
Filter('glupload'),
- Tee(pins=((
+ Tee(pads=((
Queue(max_size_buffers=1, leaky='downstream'),
Filter('glfilterbin', filter='glcolorscale'),
Filter('rsvgoverlay', name='overlay'),
@@ -156,9 +154,56 @@
)))
)
-class Command:
- SAVE_FRAME = 'save_frame'
- PRINT_INFO = 'print_info'
+def h264sink(display_decoded=False):
+ appsink = Filter('appsink', name='h264sink', emit_signals=True, max_buffers=1, drop=False, sync=False),
+
+ if display_decoded:
+ return Tee(pads=(
+ (Queue(), appsink),
+ (Queue(), Filter('vpudec'), Filter('kmssink', sync=False))
+ ))
+
+ return appsink
+
+def file_streaming_pipeline(filename, render_size, inference_size):
+ return (
+ Filter('filesrc', location=filename),
+ Filter('qtdemux'),
+ Tee(pads=((
+ Queue(max_size_buffers=1),
+ Filter('h264parse'),
+ Filter('vpudec'),
+ inference_pipeline(render_size, inference_size),
+ ), (
+ Queue(max_size_buffers=1),
+ Filter('h264parse'),
+ Caps('video/x-h264', stream_format='byte-stream', alignment='nal'),
+ h264sink()
+ )))
+ )
+
+def camera_streaming_pipeline(profile, bitrate, render_size, inference_size):
+ size = max_inner_size(render_size, inference_size)
+ return (
+ Tee(pads=((
+ Queue(),
+ inference_pipeline(render_size, inference_size)
+ ), (
+ Queue(max_size_buffers=1, leaky='downstream'),
+ Filter('videoconvert'),
+ Filter('x264enc',
+ speed_preset='ultrafast',
+ tune='zerolatency',
+ threads=4,
+ key_int_max=5,
+ bitrate=int(bitrate / 1000), # kbit per second.
+ aud=False),
+ Caps('video/x-h264', profile=profile),
+ Filter('h264parse'),
+ Caps('video/x-h264', stream_format='byte-stream', alignment='nal'),
+ h264sink()
+ )))
+ )
def save_frame(rgb, size, overlay=None, ext='png'):
tag = '%010d' % int(time.monotonic() * 1000)
@@ -188,18 +233,15 @@
return Size(structure.get_value('width'),
structure.get_value('height'))
-def get_video_size(uri):
+def get_video_info(filename):
#Command line: gst-discoverer-1.0 -v ~/cars_highway.mp4
+ uri = 'file://' + filename
discoverer = GstPbutils.Discoverer()
info = discoverer.discover_uri(uri)
- # TODO(dkovalev): Image as an input.
- #stream_info = info.get_stream_info()
- #return Size(stream_info.get_width(), stream_info.get_height())
-
streams = info.get_video_streams()
assert len(streams) == 1
- return caps_size(streams[0].get_caps())
+ return streams[0]
def loop():
return GLib.MainLoop.new(None, False)
@@ -255,7 +297,7 @@
pipeline.set_state(Gst.State.PLAYING)
try:
loop.run()
- except KeyboardInterrupt as e:
+ except KeyboardInterrupt:
pass
finally:
pipeline.set_state(Gst.State.NULL)
@@ -263,61 +305,67 @@
def on_keypress(fd, flags, commands):
for ch in sys.stdin.read():
- if ch == ' ':
- commands.put(Command.SAVE_FRAME)
- elif ch == 'i':
- commands.put(Command.PRINT_INFO)
+ commands.put(ch)
return True
def on_new_sample(sink, pipeline, render_overlay, render_size, images, commands, fps_counter):
with pull_sample(sink) as (sample, data):
- fps = next(fps_counter)
- svg = render_overlay(np.frombuffer(data, dtype=np.uint8), inference_fps=fps)
- if svg:
- overlay = pipeline.get_by_name('overlay')
- overlay.set_property('data', svg)
+ inference_rate = next(fps_counter)
+ custom_command = None
+ save_frame = False
command = get_nowait(commands)
- if command is Command.SAVE_FRAME:
- images.put((data, caps_size(sample.get_caps()), svg))
- elif command is Command.PRINT_INFO:
+ if command == COMMAND_SAVE_FRAME:
+ save_frame = True
+ elif command == COMMAND_PRINT_INFO:
print('Timestamp: %.2f' % time.monotonic())
- print('Inference FPS: %s' % fps)
+ print('Inference FPS: %s' % inference_rate)
print('Render size: %d x %d' % render_size)
print('Inference size: %d x %d' % caps_size(sample.get_caps()))
+ else:
+ custom_command = command
+
+ svg = render_overlay(np.frombuffer(data, dtype=np.uint8),
+ inference_rate=inference_rate,
+ command=custom_command)
+ overlay = pipeline.get_by_name('overlay')
+ overlay.set_property('data', svg)
+
+ if save_frame:
+ images.put((data, caps_size(sample.get_caps()), svg))
return Gst.FlowReturn.OK
-V4L2_DEVICE = re.compile(r'(?P<dev>[^:]+):(?P<fmt>[^:]+):(?P<w>\d+)x(?P<h>\d+):(?P<num>\d+)/(?P<den>\d+)')
-
+def run_gen(render_overlay_gen, *, source, downscale, fullscreen):
+ inference_size = render_overlay_gen.send(None) # Initialize.
+ return run(inference_size,
+ lambda tensor, size, window, inference_rate, command:
+ render_overlay_gen.send((tensor, size, window, inference_rate, command)),
+ source=source,
+ downscale=downscale,
+ fullscreen=fullscreen)
def run(inference_size, render_overlay, *, source, downscale, fullscreen):
- match = V4L2_DEVICE.search(source)
- if match:
- run_camera(inference_size, render_overlay,
- device=match.group('dev'),
- fmt=match.group('fmt'),
- size=(int(match.group('w')), int(match.group('h'))),
- framerate=(int(match.group('num')), int(match.group('den'))),
- fullscreen=fullscreen)
+ fmt = parse_format(source)
+ if fmt:
+ run_camera(inference_size, render_overlay, fmt, fullscreen)
return True
- else:
- filename = os.path.expanduser(source)
- if os.path.isfile(filename):
- run_file(inference_size, render_overlay,
- filename=filename,
- downscale=downscale,
- fullscreen=fullscreen)
- return True
+
+ filename = os.path.expanduser(source)
+ if os.path.isfile(filename):
+ run_file(inference_size, render_overlay,
+ filename=filename,
+ downscale=downscale,
+ fullscreen=fullscreen)
+ return True
return False
-
-def run_camera(inference_size, render_overlay, *, device, fmt, size, framerate, fullscreen):
+def run_camera(inference_size, render_overlay, fmt, fullscreen):
inference_size = Size(*inference_size)
- camera = v4l2_camera(device, fmt, Size(*size), framerate)
+ camera = v4l2_camera(fmt)
caps = next(x for x in camera if isinstance(x, Caps))
render_size = Size(caps.width, caps.height)
pipeline = camera + video_camera_pipeline(render_size, inference_size, fullscreen)
@@ -327,9 +375,13 @@
def run_file(inference_size, render_overlay, *, filename, downscale, fullscreen):
inference_size = Size(*inference_size)
- video_size = get_video_size('file://' + filename)
- render_size = video_size / downscale
- pipeline = video_file_pipeline(filename, render_size, inference_size, fullscreen)
+ info = get_video_info(filename)
+ render_size = Size(info.get_width(), info.get_height()) / downscale
+ if info.is_image():
+ pipeline = image_file_pipeline(filename, render_size, inference_size, fullscreen)
+ else:
+ pipeline = video_file_pipeline(filename, render_size, inference_size, fullscreen)
+
return run_loop(pipeline, inference_size, render_size, render_overlay)
@@ -346,13 +398,13 @@
stack.enter_context(term_raw_mode(sys.stdin.fileno()))
size = min_outer_size(inference_size, render_size)
- view_box = center_inside(render_size, size)
+ window = center_inside(render_size, size)
run_pipeline(loop, pipeline, {'appsink': {'new-sample':
functools.partial(on_new_sample,
render_overlay=functools.partial(render_overlay,
size=size,
- view_box=view_box),
+ window=window),
render_size=render_size,
images=images,
commands=commands,
diff --git a/edgetpuvision/overlays.py b/edgetpuvision/overlays.py
index 6d5b884..3eb9a50 100644
--- a/edgetpuvision/overlays.py
+++ b/edgetpuvision/overlays.py
@@ -8,16 +8,16 @@
def _normalize_rect(rect, size):
width, height = size
x0, y0, x1, y1 = rect
- x, y, w, h = x0, y0, x1 - x0, y1 - y0
- return int(x * width), int(y * height), int(w * width), int(h * height)
+ return int(x0 * width), int(y0 * height), \
+ int((x1 - x0) * width), int((y1 - y0) * height)
-def classification(results, inference_time, inference_fps, size, view_box):
- x0, y0, _, _ = view_box
+def classification(results, inference_time, inference_rate, size, window):
+ x0, y0, _, _ = window
lines = [
'Inference time: %.2f ms (%.2f fps)' % (inference_time * 1000, 1.0 / inference_time),
- 'Inference frame rate: %.2f fps' % inference_fps
+ 'Inference frame rate: %.2f fps' % inference_rate
]
for i, (label, score) in enumerate(results):
@@ -26,31 +26,30 @@
defs = svg.Defs()
defs += CSS_STYLES
- doc = svg.Svg(viewBox='%s %s %s %s' % view_box, font_size='26px')
+ doc = svg.Svg(viewBox='%s %s %s %s' % window, font_size='26px')
doc += defs
doc += svg.normal_text(lines, x=x0 + 10, y=y0 + 10, font_size_em=1.1)
return str(doc)
-def detection(objs, inference_time, inference_fps, labels, size, view_box):
- x0, y0, _, _ = view_box
+def detection(objs, labels, inference_time, inference_rate, size, window):
+ x0, y0, _, _ = window
defs = svg.Defs()
defs += CSS_STYLES
- doc = svg.Svg(viewBox='%s %s %s %s' % view_box, font_size='26px')
+ doc = svg.Svg(viewBox='%s %s %s %s' % window, font_size='26px')
doc += defs
doc += svg.normal_text((
'Inference time: %.2f ms (%.2f fps)' % (inference_time * 1000, 1.0 / inference_time),
- 'Inference frame rate: %.2f fps' % inference_fps,
+ 'Inference frame rate: %.2f fps' % inference_rate,
'Objects: %d' % len(objs),
), x0 + 10, y0 + 10, font_size_em=1.1)
for obj in objs:
percent = int(100 * obj.score)
if labels:
- label = labels[obj.label_id]
- caption = '%d%% %s' % (percent, label)
+ caption = '%d%% %s' % (percent, labels[obj.label_id])
else:
caption = '%d%%' % percent
diff --git a/edgetpuvision/utils.py b/edgetpuvision/utils.py
index 2e2e7c9..9495973 100644
--- a/edgetpuvision/utils.py
+++ b/edgetpuvision/utils.py
@@ -1,7 +1,16 @@
import re
+LABEL_PATTERN = re.compile(r'\s*(\d+)(.+)')
+
def load_labels(path):
- p = re.compile(r'\s*(\d+)(.+)')
with open(path, 'r', encoding='utf-8') as f:
- lines = (p.match(line).groups() for line in f.readlines())
+ lines = (LABEL_PATTERN.match(line).groups() for line in f.readlines())
return {int(num): text.strip() for num, text in lines}
+
+
+def input_image_size(engine):
+ _, h, w, _ = engine.get_input_tensor_shape()
+ return w, h
+
+def same_input_image_sizes(engines):
+ return len({input_image_size(engine) for engine in engines}) == 1
diff --git a/setup.py b/setup.py
index 997fba2..eaa0313 100644
--- a/setup.py
+++ b/setup.py
@@ -18,6 +18,7 @@
'protobuf>=3.0.0',
'edgetpu',
],
+ scripts=['bin/edgetpu_demo'],
entry_points = {
'console_scripts': ['edgetpu_classify=edgetpuvision.classify:main',
'edgetpu_classify_server=edgetpuvision.classify_server:main',