Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ endif()

find_package(CUDA)
find_package(NVJPEG)
find_package(autoware_type_adapters)
find_library(CUDART_LIBRARY cudart ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
find_library(CULIBOS culibos ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
if (${NVJPEG_FOUND} AND CUDART_LIBRARY AND CULIBOS)
Expand Down Expand Up @@ -150,6 +151,10 @@ if (JETSON)
rectifier
jpeg_compressor
)

ament_target_dependencies(accelerated_image_processor
autoware_type_adapters
)

rclcpp_components_register_node(accelerated_image_processor
PLUGIN "gpu_imgproc::GpuImgProc"
Expand Down
10 changes: 9 additions & 1 deletion include/accelerator/jpeg_compressor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

#include <sensor_msgs/msg/image.hpp>
#include <sensor_msgs/msg/compressed_image.hpp>
#include "type_adapters/image_container.hpp"
#include "type_adapters/compressed_image_container.hpp"
#include <string>

// This needs to be included before other CUDA headers in some environments
Expand Down Expand Up @@ -29,7 +31,10 @@ class NvJPEGEncoder;
namespace JpegCompressor {
using Image = sensor_msgs::msg::Image;
using CompressedImage = sensor_msgs::msg::CompressedImage;

using ImageContainer = autoware::type_adaptation::type_adapters::ImageContainer;
using CompressedImageContainer = autoware::type_adaptation::type_adapters::CompressedImageContainer;
using ImageContainerUniquePtr = autoware::type_adaptation::type_adapters::ImageContainerUniquePtr;
using CompressedImageContainerUniquePtr = autoware::type_adaptation::type_adapters::CompressedImageContainerUniquePtr;
enum class ImageFormat {
RGB,
BGR
Expand All @@ -42,6 +47,7 @@ class CPUCompressor {
~CPUCompressor();

CompressedImage::UniquePtr compress(const Image &msg, int quality = 90, int format = TJPF_RGB, int sampling = TJ_420);
CompressedImage::UniquePtr compress(const ImageContainer &msg, int quality = 90, int format = TJPF_RGB, int sampling = TJ_420);
private:
tjhandle handle_;
unsigned char *jpegBuf_;
Expand All @@ -56,6 +62,7 @@ class JetsonCompressor {
~JetsonCompressor();

CompressedImage::UniquePtr compress(const Image &msg, int quality = 90, ImageFormat format = ImageFormat::RGB);
CompressedImage::UniquePtr compress(const ImageContainer &msg, int quality = 90, ImageFormat format = ImageFormat::RGB);
void setCudaStream(cuda::stream::handle_t &raw_cuda_stream);
private:
NvJPEGEncoder *encoder_;
Expand All @@ -80,6 +87,7 @@ class NVJPEGCompressor {
~NVJPEGCompressor();

CompressedImage::UniquePtr compress(const Image &msg, int quality = 90, ImageFormat format = ImageFormat::RGB);
CompressedImage::UniquePtr compress(const ImageContainer &msg, int quality = 90, ImageFormat format = ImageFormat::RGB);
void setCudaStream(const cudaStream_t &raw_cuda_stream);

private:
Expand Down
7 changes: 6 additions & 1 deletion include/accelerator/rectifier.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#include <sensor_msgs/msg/image.hpp>
#include <sensor_msgs/msg/camera_info.hpp>

#include "type_adapters/image_container.hpp"
#ifdef OPENCV_AVAILABLE
#include <opencv2/core.hpp>
#endif
Expand All @@ -16,6 +16,8 @@

using CameraInfo = sensor_msgs::msg::CameraInfo;
using Image = sensor_msgs::msg::Image;
using ImageContainer = autoware::type_adaptation::type_adapters::ImageContainer;
using ImageContainerUniquePtr = autoware::type_adaptation::type_adapters::ImageContainerUniquePtr;

namespace Rectifier {

Expand Down Expand Up @@ -44,6 +46,7 @@ class NPPRectifier {
cudaStream_t& GetCudaStream() {return stream_;}

Image::UniquePtr rectify(const Image &msg);
ImageContainerUniquePtr rectify(const ImageContainer &msg);
private:
Npp32f *pxl_map_x_;
Npp32f *pxl_map_y_;
Expand All @@ -66,6 +69,7 @@ class OpenCVRectifierCPU {
~OpenCVRectifierCPU();

Image::UniquePtr rectify(const Image &msg);
ImageContainerUniquePtr rectify(const ImageContainer &msg);
private:
cv::Mat map_x_;
cv::Mat map_y_;
Expand All @@ -83,6 +87,7 @@ class OpenCVRectifierGPU {
~OpenCVRectifierGPU();

Image::UniquePtr rectify(const Image &msg);
ImageContainerUniquePtr rectify(const ImageContainer &msg);
private:
cv::cuda::GpuMat map_x_;
cv::cuda::GpuMat map_y_;
Expand Down
10 changes: 10 additions & 0 deletions include/gpu_imgproc/gpu_imgproc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

#include "accelerator/rectifier.hpp"
#include "accelerator/jpeg_compressor.hpp"
#include "type_adapters/image_container.hpp"
#include "type_adapters/compressed_image_container.hpp"
// #include <sensor_msgs/msg/compressed_image.hpp>

namespace gpu_imgproc {
Expand All @@ -15,8 +17,13 @@ class GpuImgProc : public rclcpp::Node {
virtual ~GpuImgProc();

private:
using ImageContainer = autoware::type_adaptation::type_adapters::ImageContainer;
using CompressedImageContainer = autoware::type_adaptation::type_adapters::CompressedImageContainer;
using ImageContainerUniquePtr = autoware::type_adaptation::type_adapters::ImageContainerUniquePtr;
using CompressedImageContainerUniquePtr = autoware::type_adaptation::type_adapters::CompressedImageContainerUniquePtr;
void imageCallback(const sensor_msgs::msg::Image::SharedPtr msg);
void cameraInfoCallback(const sensor_msgs::msg::CameraInfo::SharedPtr msg);
void gpuImageCallback(const std::shared_ptr<ImageContainer> msg);
void determineQosCallback(bool do_rectify);

#if NPP_AVAILABLE
Expand All @@ -40,9 +47,11 @@ class GpuImgProc : public rclcpp::Node {
#endif

rclcpp::Subscription<sensor_msgs::msg::Image>::SharedPtr img_sub_;
rclcpp::Subscription<ImageContainer>::SharedPtr gpu_image_sub_;
rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr info_sub_;

rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr rectified_pub_;
rclcpp::Publisher<ImageContainer>::SharedPtr gpu_rectified_pub_;
rclcpp::Publisher<sensor_msgs::msg::CompressedImage>::SharedPtr compressed_pub_;
rclcpp::Publisher<sensor_msgs::msg::CompressedImage>::SharedPtr rect_compressed_pub_;

Expand All @@ -51,6 +60,7 @@ class GpuImgProc : public rclcpp::Node {
Rectifier::Implementation rectifier_impl_;
Rectifier::MappingImpl mapping_impl_;
bool rectifier_active_;
bool type_adaptation_active_;
double alpha_;
int32_t jpeg_quality_;
};
Expand Down
1 change: 1 addition & 0 deletions package.xml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
<depend>cv_bridge</depend>
<depend>libturbojpeg</depend>
<depend>image_geometry</depend>
<depend>autoware_type_adapters</depend>

<test_depend>ament_cmake_gtest</test_depend>
<test_depend>ament_lint_auto</test_depend>
Expand Down
134 changes: 133 additions & 1 deletion src/accelerator/jpeg_compressor.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
#include <cstdio>
#include <cstring>

#include <rclcpp/rclcpp.hpp>
#include <chrono>
#include <thread>

#include <cuda_runtime.h>

#include "accelerator/jpeg_compressor.hpp"

Expand Down Expand Up @@ -71,6 +74,12 @@ CompressedImage::UniquePtr CPUCompressor::compress(const Image &msg, int quality

return compressed_msg;
}
CompressedImage::UniquePtr CPUCompressor::compress(const ImageContainer &msg, int quality, int format, int sampling) {
sensor_msgs::msg::Image image_msg;
msg.get_sensor_msgs_image(image_msg);
return compress(image_msg, quality, format, sampling);
}

#endif

#ifdef JETSON_AVAILABLE
Expand Down Expand Up @@ -127,8 +136,22 @@ CompressedImage::UniquePtr JetsonCompressor::compress(const Image &msg, int qual
TEST_ERROR(buffer_->allocateMemory() != 0, "NvBuffer allocation failed");

encoder_->setCropRect(0, 0, width, height);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a common bug fix, not directly related to type adaptation, but needed.

// Synchronize CUDA device to ensure memory allocation is complete
std::this_thread::sleep_for(std::chrono::milliseconds(300));
cudaError_t cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
// Handle error
fprintf(stderr, "cudaDeviceSynchronize failed: %s\n", cudaGetErrorString(cudaStatus));
// You may want to throw an exception or handle the error appropriately
}
}

// RCLCPP_ERROR(
// rclcpp::get_logger("v4l2_camera"),
// "dev_yuv_step_bytes_: %d", dev_yuv_step_bytes_[0]
// );

TEST_ERROR(cudaMemcpy2DAsync(static_cast<void*>(dev_image_), dev_image_step_bytes_,
static_cast<const void*>(img.data()), msg.step,
msg.step * sizeof(Npp8u),
Expand All @@ -148,6 +171,7 @@ CompressedImage::UniquePtr JetsonCompressor::compress(const Image &msg, int qual
NvBuffer::NvBufferPlane &plane_y = buffer_->planes[0];
NvBuffer::NvBufferPlane &plane_u = buffer_->planes[1];
NvBuffer::NvBufferPlane &plane_v = buffer_->planes[2];

TEST_ERROR(cudaMemcpy2DAsync(plane_y.data, plane_y.fmt.stride,
dev_yuv_[0], dev_yuv_step_bytes_[0], width, height,
cudaMemcpyDeviceToHost, stream_.handle()) != cudaSuccess,
Expand Down Expand Up @@ -178,6 +202,107 @@ CompressedImage::UniquePtr JetsonCompressor::compress(const Image &msg, int qual
return compressed_msg;
}

CompressedImage::UniquePtr JetsonCompressor::compress(const ImageContainer &msg, int quality, ImageFormat format)
{
setCudaStream(msg.cuda_stream()->stream());
// Fill elements of nppStreamContext
{
npp_stream_context_.hStream = stream_.handle();
cudaGetDevice(&npp_stream_context_.nCudaDeviceId);
cudaDeviceProp dev_prop;
cudaGetDeviceProperties(&dev_prop, npp_stream_context_.nCudaDeviceId);
npp_stream_context_.nMultiProcessorCount = dev_prop.multiProcessorCount;
npp_stream_context_.nMaxThreadsPerMultiProcessor = dev_prop.maxThreadsPerMultiProcessor;
npp_stream_context_.nMaxThreadsPerBlock = dev_prop.maxThreadsPerBlock;
npp_stream_context_.nSharedMemPerBlock = dev_prop.sharedMemPerBlock;
cudaDeviceGetAttribute(&npp_stream_context_.nCudaDevAttrComputeCapabilityMajor,
cudaDevAttrComputeCapabilityMajor, npp_stream_context_.nCudaDeviceId);
cudaDeviceGetAttribute(&npp_stream_context_.nCudaDevAttrComputeCapabilityMinor,
cudaDevAttrComputeCapabilityMinor, npp_stream_context_.nCudaDeviceId);
cudaStreamGetFlags(npp_stream_context_.hStream, &npp_stream_context_.nStreamFlags);
}

int width = msg.width();
int height = msg.height();
// const auto &img = msg.data;
size_t out_buf_size = width * height * 3 / 2;

CompressedImage::UniquePtr compressed_msg = std::make_unique<CompressedImage>();
compressed_msg->header = msg.header();
compressed_msg->format = "jpeg";

if (image_size < out_buf_size) {
// Allocate Npp8u buffers
dev_image_ = nppiMalloc_8u_C3(width, height, &dev_image_step_bytes_);
image_size = out_buf_size;

dev_yuv_[0] = nppiMalloc_8u_C1(width, height, &dev_yuv_step_bytes_[0]); // Y
dev_yuv_[1] = nppiMalloc_8u_C1(width/2, height/2, &dev_yuv_step_bytes_[1]); // U
dev_yuv_[2] = nppiMalloc_8u_C1(width/2, height/2, &dev_yuv_step_bytes_[2]); // V
buffer_.emplace(V4L2_PIX_FMT_YUV420M, width, height, 0);
TEST_ERROR(buffer_->allocateMemory() != 0, "NvBuffer allocation failed");

encoder_->setCropRect(0, 0, width, height);

// Synchronize CUDA device to ensure memory allocation is complete
std::this_thread::sleep_for(std::chrono::milliseconds(300));
cudaError_t cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
// Handle error
fprintf(stderr, "cudaDeviceSynchronize failed: %s\n", cudaGetErrorString(cudaStatus));
// You may want to throw an exception or handle the error appropriately
}
}

TEST_ERROR(cudaMemcpy2DAsync(static_cast<void*>(dev_image_), dev_image_step_bytes_,
static_cast<const void*>(msg.cuda_mem()), msg.step(),
msg.width() * sizeof(Npp8u) * 3,
msg.height(), cudaMemcpyDeviceToDevice, stream_.handle()) != cudaSuccess,
"2D memory allocation failed");

NppiSize roi = {static_cast<int>(msg.width()), static_cast<int>(msg.height())};
if (format == ImageFormat::RGB) {
TEST_ERROR(nppiRGBToYUV420_8u_C3P3R_Ctx(dev_image_, dev_image_step_bytes_,
dev_yuv_.data(), dev_yuv_step_bytes_.data(), roi,
npp_stream_context_) != NPP_SUCCESS,
"failed to convert rgb8 to yuv420");
} else {
// XXX: need to BGR -> RGB
std::cerr << "not supported" << std::endl;
}

NvBuffer::NvBufferPlane &plane_y = buffer_->planes[0];
NvBuffer::NvBufferPlane &plane_u = buffer_->planes[1];
NvBuffer::NvBufferPlane &plane_v = buffer_->planes[2];
TEST_ERROR(cudaMemcpy2DAsync(plane_y.data, plane_y.fmt.stride,
dev_yuv_[0], dev_yuv_step_bytes_[0], width, height,
cudaMemcpyDeviceToHost, stream_.handle()) != cudaSuccess,
"memory copy from Device to Host for Y plane failed");
TEST_ERROR(cudaMemcpy2DAsync(plane_u.data, plane_u.fmt.stride,
dev_yuv_[1], dev_yuv_step_bytes_[1], width/2, height/2,
cudaMemcpyDeviceToHost, stream_.handle()) != cudaSuccess,
"memory copy from Device to Host for U plane failed");
TEST_ERROR(cudaMemcpy2DAsync(plane_v.data, plane_v.fmt.stride,
dev_yuv_[2], dev_yuv_step_bytes_[2], width/2, height/2,
cudaMemcpyDeviceToHost, stream_.handle()) != cudaSuccess,
"memory copy from Device to Host for V plane failed");
stream_.synchronize();

unsigned char * out_data = new unsigned char[out_buf_size];

TEST_ERROR(
encoder_->encodeFromBuffer(buffer_.value(), JCS_YCbCr, &out_data,
out_buf_size, quality),
"NvJpeg Encoder Error");

compressed_msg->data.resize(static_cast<size_t>(out_buf_size / sizeof(uint8_t)));
memcpy(compressed_msg->data.data(), out_data, out_buf_size);

delete[] out_data;
out_data = nullptr;
return compressed_msg;
}

void JetsonCompressor::setCudaStream(cuda::stream::handle_t &raw_cuda_stream) {
stream_ = cuda::stream::wrap(cuda::device::current::get().id(),
cuda::context::current::get().handle(),
Expand Down Expand Up @@ -236,6 +361,13 @@ CompressedImage::UniquePtr NVJPEGCompressor::compress(const Image &msg, int qual
return compressed_msg;
}

CompressedImage::UniquePtr NVJPEGCompressor::compress(const ImageContainer &msg, int quality, ImageFormat format)
{
sensor_msgs::msg::Image image_msg;
msg.get_sensor_msgs_image(image_msg);
return compress(image_msg, quality, format);
}

void NVJPEGCompressor::setNVImage(const Image &msg) {
unsigned char *p = nullptr;
CHECK_CUDA(cudaMallocAsync((void **)&p, msg.data.size(), stream_));
Expand Down
Loading