東方算程譚

επιστημηがヨタをこく、弾幕とは無縁のCUDAなタワゴト

NPP : Canny変換(そのに)

Canny変換は明るさの変化点を見つけることで輪郭を検出します。 カラー画像で色は違うけど明るさの同じ領域が接しているとモノクロ化したときに明るさに変化がないため輪郭が検出できなくなるんですね。

カラー画像をRGB3枚の画像にバラし、それぞれにCanny変換をかけて再合成してみました。

/*
 * DO NOT FORGET nvcc option : --expt-extended-lambda
 */

// std
#include <iostream>

// OpenCV
#include <opencv2/opencv.hpp>

// CUDA
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <npp.h>

// カーネル関数 二次元のtransform 
//    dst[y][x] = fun(src[y][x]) 
//       where : 0 <= x < width, 0 <= y < height
template<typename T, typename U, typename Function>
__global__ void kernel_transform2D(unsigned int  width, unsigned int height, 
                                        const T* src,         size_t src_pitch,
                                              U* dst,         size_t dst_pitch,
                                       Function  fun) {
  unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
  unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
  if ( x < width && y < height ) {
    U* dst_ptr = ((U*)((char*)dst + dst_pitch*y)) + x;
    *dst_ptr = fun(((const T*)((const char*)src + src_pitch*y))[x], *dst_ptr);
  }
}

void color2gray(unsigned int  width, unsigned int height, 
                      uchar3* src,         size_t src_pitch,
                       uchar* dst,         size_t dst_pitch) {
  kernel_transform2D<<<dim3((width+31)/32, (height+7)/8), dim3(32,8)>>>(
    width, height, 
    src, src_pitch, 
    dst, dst_pitch,
    [] __device__ (const uchar3 v, uchar) -> uchar { 
       int t = (v.x + v.y*7 + v.z*2)/10; 
       if ( t <   0 ) t = 0; 
       if ( t > 255 ) t = 255; 
       return (uchar)t; 
    }
  );
}

void color2gray_channel(unsigned int  width, unsigned int height, 
                        uchar3* src, size_t src_pitch,
                        uchar*  dst, size_t dst_pitch,
                       int    channel) {
  kernel_transform2D<<<dim3((width+31)/32, (height+7)/8), dim3(32,8)>>>(
    width, height, 
    src, src_pitch, 
    dst, dst_pitch,
    [=] __device__ (const uchar3 v, uchar) -> uchar 
      { return ((const uchar*)&v)[channel]; }
  );
}

void gray2color_channel(unsigned int  width, unsigned int height, 
                        uchar*  src, size_t src_pitch,
                        uchar3* dst, size_t dst_pitch,
                        int channel) {
  kernel_transform2D<<<dim3((width+31)/32, (height+7)/8), dim3(32,8)>>>(
    width, height, 
    src, src_pitch, 
    dst, dst_pitch,
    [=] __device__ (const uchar v, uchar3 c) -> uchar3 
      { uchar3 t = c; ((uchar*)&t)[channel] = v; return t; }
  );
}

int main(int argc, char *argv[]) {
  cv::VideoCapture camera(0);

  cv::namedWindow("original", CV_WINDOW_AUTOSIZE);
  cv::namedWindow("canny", CV_WINDOW_AUTOSIZE);

  cv::Mat frame;
  cv::Mat canny;

  uchar3* d_frame;
  uchar*  d_gray_base;
  uchar*  d_canny_base;

  uchar*  d_gray[3];
  uchar*  d_canny[3];
  size_t  d_frame_pitch;
  size_t  d_gray_pitch;
  size_t  d_canny_pitch;
  Npp8u*  d_buffer;
  NppiSize size;

 
  // 一発目のキャプチャでフレームのサイズがわかるから
  // (そして多分その後ずっと変わらんだろから)
  // それを基にdevice-memoryを確保
  camera >> frame;

  size.width = (int)frame.size().width;
  size.height = (int)frame.size().height;

  cudaMallocPitch(&d_frame,      &d_frame_pitch, size.width*sizeof(uchar3), size.height);
  cudaMallocPitch(&d_gray_base,  &d_gray_pitch,  size.width,                size.height*3);
  cudaMallocPitch(&d_canny_base, &d_canny_pitch, size.width,                size.height*3);

  for ( size_t i = 0; i < 3; ++i ) {
    d_gray[i]  = d_gray_base  + d_gray_pitch *size.height*i;
    d_canny[i] = d_canny_base + d_canny_pitch*size.height*i;
  }

  // Cannyに引き渡すパラメータ
  NppiSize  nroi    = size;
  NppiPoint noffset = { 0, 0 };
  // 以下のパラメータはイイカンジになるよう適宜調整。
  Npp16s                 nlow_threshold  = 50;
  Npp16s                 nhigh_threshold = 150;
  NppiDifferentialKernel nkernel   = NPP_FILTER_SOBEL;
  NppiMaskSize           nmasksize = NPP_MASK_SIZE_3_X_3;


  // Canny変換に必要なバッファを確保
  {
  int buffer_size;
  nppiFilterCannyBorderGetBufferSize(size, &buffer_size);
  cudaMalloc(&d_buffer, buffer_size);
  }

  canny = frame.clone();
  std::cout 
    << "width,height   = " << size.width << ',' << size.height  
    << "\nstep           = " << frame.step 
    << "\ndepth, channel = " << frame.depth() << ',' << frame.channels()
    << "\n***** [ESC] to exit. *****\n";

  while ( cv::waitKey(10) != 0x1b ) {
    // [1] 画像を frame にキャプチャ
    camera >> frame;
    cv::imshow("original", frame);

    // [2] frame から d_frame へコピー
    cudaMemcpy2D(d_frame, d_frame_pitch, frame.data, frame.step, 
                 size.width*sizeof(uchar3), size.height, cudaMemcpyDefault);

    for ( int i = 0; i < 3; ++i ) {
      // [3] d_frame をモノクロ化して d_gray へ
      color2gray_channel(size.width, size.height, d_frame, d_frame_pitch, d_gray[i], d_gray_pitch,i);

      // [4] d_gray に Canny変換カマして d_canny へ
      nppiFilterCannyBorder_8u_C1R(d_gray[i],  (int)d_gray_pitch,  size, noffset,
                                   d_canny[i], (int)d_canny_pitch, nroi,
                                   nkernel, nmasksize,
                                   nlow_threshold, nhigh_threshold,
                                   nppiNormL2, NPP_BORDER_REPLICATE, 
                                   d_buffer);

      // [5] d_canny をカラー化(RGBを同じ値にするだけ)して d_frame へ
      gray2color_channel(size.width, size.height, d_canny[i], d_canny_pitch, d_frame, d_frame_pitch, i);
    }

    // [6] d_frame を canny へコピー
    cudaMemcpy2D(canny.data, canny.step, d_frame, d_frame_pitch, 
                 size.width*sizeof(uchar3), size.height, cudaMemcpyDefault);

    // [7] 描画!
    cv::imshow("canny", canny);
  }

  // あとしまつ
  cudaFree(d_frame);
  cudaFree(d_gray_base);
  cudaFree(d_canny_base);
  cudaFree(d_buffer);
}

こんなんができましたよ。

f:id:Episteme:20161112000224p:plain