Detecting object distance using Depth Camera and SSD-Mobilenet

Follow along: Detecting object distance using Depth Camera and SSD-Mobilenet

The program launching process along with parameter settings are all simplified and set up on the Jupyter Notebook Environment.

Open the 01_09_detect_object_and_calculate_distance.ipynb Jupyter Notebook.
This code is output by applying numpy and matplotlib.
This code uses a pre-trained model.

(The Jetson Board used for these examples are => Jetson Nano)

01_09_detect_object_and_calculate_distance.ipynb
Running the cell code.

Ctrl + Enter

Load the modules needed to run your code.

import cv2
import numpy as np
import matplotlib.pyplot as plt
import pyrealsense2 as rs
print("Environment Ready")

Load the bag file with data and realsense pipeline.

# Setup RealSense pipeline and configuration:
pipe = rs.pipeline()
cfg = rs.config()
cfg.enable_device_from_file("object_detection.bag")
profile = pipe.start(cfg)

# Skip 5 first frames to give the Auto-Exposure time to adjust
for x in range(5):
    pipe.wait_for_frames()

# Store next frameset for later processing:
frameset = pipe.wait_for_frames()
color_frame = frameset.get_color_frame()
depth_frame = frameset.get_depth_frame()

# Cleanup:
pipe.stop()
print("Frames Captured")

Display the loaded image in rgb color.

# Display color frame:
color = np.asanyarray(color_frame.get_data())
plt.rcParams["axes.grid"] = False
plt.rcParams['figure.figsize'] = [12, 6]
plt.imshow(color)

Display the loaded image in colorized depth.

# Colorize depth frame using the RealSense colorizer:
colorizer = rs.colorizer()
colorized_depth = np.asanyarray(colorizer.colorize(depth_frame).get_data())
plt.imshow(colorized_depth)

Display an image by comparing an RGB color image to a color depth image.

# Create alignment primitive with color as its target stream:
align = rs.align(rs.stream.color)
frameset = align.process(frameset)

# Update color and depth frames with aligned data:
aligned_depth_frame = frameset.get_depth_frame()
colorized_depth = np.asanyarray(colorizer.colorize(aligned_depth_frame).get_data())

# Display color and aligned depth frames together:
images = np.hstack((color, colorized_depth))
plt.imshow(images)

Load the model to use SSD-MobileNet and display the detected class.

# Standard OpenCV boilerplate for running the neural network:
height, width = color.shape[:2]
expected = 300
aspect = width / height
resized_image = cv2.resize(color, (round(expected * aspect), expected))
crop_start = round(expected * (aspect - 1) / 2)
crop_img = resized_image[0:expected, crop_start:crop_start+expected]

# Load pre-trained MobileNet SSD model:
net = cv2.dnn.readNetFromCaffe("MobileNetSSD_deploy.prototxt", "MobileNetSSD_deploy.caffemodel")
inScaleFactor = 0.007843
meanVal = 127.53
classNames = ("background", "aeroplane", "bicycle", "bird", "boat",
            "bottle", "bus", "car", "cat", "chair",
            "cow", "diningtable", "dog", "horse",
            "motorbike", "person", "pottedplant",
            "sheep", "sofa", "train", "tvmonitor")

# Preprocess the image for the network:
blob = cv2.dnn.blobFromImage(crop_img, inScaleFactor, (expected, expected), meanVal, False)
net.setInput(blob, "data")
detections = net.forward("detection_out")

# Extract object information from the detection results:
label = detections[0, 0, 0, 1]
conf = detections[0, 0, 0, 2]
xmin = detections[0, 0, 0, 3]
ymin = detections[0, 0, 0, 4]
xmax = detections[0, 0, 0, 5]
ymax = detections[0, 0, 0, 6]
className = classNames[int(label)]

# Draw bounding box and label on the cropped image:
cv2.rectangle(crop_img, (int(xmin * expected), int(ymin * expected)),
            (int(xmax * expected), int(ymax * expected)), (255, 255, 255), 2)
cv2.putText(crop_img, className,
            (int(xmin * expected), int(ymin * expected) - 5),
            cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255))

plt.imshow(crop_img)

Use the depth to guess the size of the detected object.

# Scale detection coordinates to depth frame:
scale = height / expected
xmin_depth = int((xmin * expected + crop_start) * scale)
ymin_depth = int((ymin * expected) * scale)
xmax_depth = int((xmax * expected + crop_start) * scale)
ymax_depth = int((ymax * expected) * scale)

# Draw bounding box on the colorized depth frame:
cv2.rectangle(colorized_depth, (xmin_depth, ymin_depth),
            (xmax_depth, ymax_depth), (255, 255, 255), 2)
plt.imshow(colorized_depth)

Measure the distance of the detected object.

# Crop depth data and convert to meters:
depth = np.asanyarray(aligned_depth_frame.get_data())
depth = depth[xmin_depth:xmax_depth, ymin_depth:ymax_depth].astype(float)
depth_scale = profile.get_device().first_depth_sensor().get_depth_scale()
depth = depth * depth_scale
dist, _, _, _ = cv2.mean(depth)
print("Detected a {0} {1:.3} meters away.".format(className, dist))