問題
以下の3枚の画像(256×256)を読み込んで、2値化せよ。
png画像
main.cu
#include <stdlib.h>
#include <stdio.h>
#include <iostream>
#include <cstdio>
#include <cutil.h>
#include <cv.h>
#include <highgui.h>
#include "main_kernel.h"
using namespace std;
namespace
{
const int _NUM = 3;
}
char *filename[]={"a.png","b.png","c.png"};
char *window_name[]={"no.1","no.2","no.3"};
int main( int argc, char **argv)
{
IplImage *imgA[_NUM];
CUT_DEVICE_INIT();
/*** [cudaArray関連] ***/
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 0, 0, 0, cudaChannelFormatKindSigned);
/** テクスチャのパラメータ設定 **/
tex.addressMode[0] = cudaAddressModeWrap;
tex.addressMode[1] = cudaAddressModeWrap;
tex.filterMode = cudaFilterModePoint; //フィルタ
tex.normalized = false; // 正規化された座標でアクセス
for(int loop = 0; loop < _NUM ;++loop)
{
imgA[loop] = cvLoadImage(filename[loop],CV_LOAD_IMAGE_GRAYSCALE);
if(imgA[loop] == NULL)
{
printf("%s : file not found",filename[loop]);
CUT_EXIT(argc, argv);
exit(0);
}
/** GPU用にデータ確保 **/
char* d_data = NULL;
CUDA_SAFE_CALL( cudaMalloc( (void**) &d_data, ( imgA[loop]-> imageSize * sizeof(char) ) ) );
if(d_data == NULL)
{
puts("can't get memory for gpu");
CUT_EXIT(argc, argv);
exit(0);
}
cudaArray* cu_array;
CUDA_SAFE_CALL( cudaMallocArray( &cu_array, &channelDesc, imgA[loop]->width, imgA[loop]->height ));//メモリ確保
if(cu_array == NULL)
{
puts("can't get memory for gpu array");
CUT_EXIT(argc, argv);
exit(0);
}
CUDA_SAFE_CALL( cudaMemcpyToArray( cu_array, 0, 0, imgA[loop]->imageData, imgA[loop]-> imageSize * sizeof(char), cudaMemcpyHostToDevice)); //コピー
/** 配列をテクスチャにバインド **/
CUDA_SAFE_CALL( cudaBindTextureToArray( tex, cu_array, channelDesc));
dim3 dimBlock(16, 16, 1); //Db
dim3 dimGrid(imgA[loop]->width / dimBlock.x, imgA[loop]->height / dimBlock.y, 1); //Dg
main_kernel<<<dimGrid, dimBlock, 0>>>(d_data,imgA[loop]->width,imgA[loop]->height);
CUT_CHECK_ERROR("Kernel execution failed");
CUDA_SAFE_CALL( cudaThreadSynchronize() ); //同期をとる
CUDA_SAFE_CALL( cudaMemcpy( imgA[loop]->imageData, d_data, imgA[loop]-> imageSize * sizeof(char), cudaMemcpyDeviceToHost) );
CUDA_SAFE_CALL(cudaFree(d_data)); d_data = NULL;
CUDA_SAFE_CALL(cudaFreeArray(cu_array)); cu_array =NULL;
}
for(int i = 0; i < _NUM;++i)
{
cvNamedWindow(window_name[i],CV_WINDOW_AUTOSIZE);
cvShowImage(window_name[i],imgA[i]);
}
cvWaitKey(0); // 0秒待つ => ずっと入力待ち
cvDestroyAllWindows();
for(int i = 0; i < _NUM;++i)
{
cvReleaseImage( &imgA[i] );imgA[i]=NULL;
}
CUT_EXIT(argc, argv);
return 0;
}
main_kernel.h
#ifndef _MAIN_KERNEL_H_
#define _MAIN_KERNEL_H_
#include <cstdio>
#include <iostream>
#include <cutil.h>
#include <ctime>
#include <cstdlib>
using namespace std;
texture<signed char, 2, cudaReadModeElementType> tex;
__global__ void main_kernel(char *o_data,int width,int height)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if( unsigned(tex2D(tex, x, y)) > unsigned char(127))
{
o_data[y*width + x] = char(255);
}else
{
o_data[y*width + x] = char(0);
}
}
#endif