CUDA::GL::パーティクル

現在地 >> メニュー >> CUDA >> CUDA::GL::パーティクル

問題

次のようなパーティクルを定義し、次の位置計算をGPUで行え。

typedef struct
{
float x,y,z; //現在地
float Xstart,Ystart,Zstart; //初期位置
}Particle;

その他条件：
　移動量などは適当でよい。(描画のたびに0.5だけ移動するなど)

答え

#include <windows.h>
#include <iostream>
#include <string>
#include <cstdlib>
#include <ctime>
#include <GL/glut.h>
#include <cuda_runtime.h>

using namespace std;

const int P_NUM = 100;

/******* [ データ構造 ] *********/
typedef struct
{
float x,y,z;
float Xstart,Ystart,Zstart;
}Particle;
Particle ball[P_NUM];

/********* [ 各種プロトタイプ宣言 ] **************/
void INIT_PARTICLE();
void display();
void reshape(int w, int h);
void timer(int value);
void DRAW_XYZ();

void DRAW_PARTICLES();
void CALC_NEXT_STATUS();

/*******************[エラー処理]**********************/
namespace
{
void errorexit(string message)
{
cerr << message << endl;
exit(1);
}

}

/************************************************************************/
/* ここから「G」PUでの処理 */
/************************************************************************/
__global__ void INIT_PARICLE_DATA(Particle *o_data)
{
const int tid = threadIdx.x;
o_data[tid].x = o_data[tid].Xstart;
o_data[tid].y = o_data[tid].Ystart;
o_data[tid].z = o_data[tid].Zstart;
}

__global__ void CALC_PARTICLE_NEXT(Particle *o_data)
{
const int tid = threadIdx.x;
o_data[tid].y -= 0.05;

if(o_data[tid].y < -2)
{
o_data[tid].x = o_data[tid].Xstart;
o_data[tid].y = o_data[tid].Ystart;
o_data[tid].z = o_data[tid].Zstart;
}
}

/*****************************************************************/
/* ここから「C」PU での処理 */
/****************************************************************/

/**********************[CUDAの初期化]*********************/
void InitCUDA(void)
{
int count = 0;
int i = 0;
cudaGetDeviceCount(&count);
if(count == 0) {
errorexit("There is no device");
}

for( i = 0; i < count; i++) {
cudaDeviceProp prop;
if(cudaGetDeviceProperties(& prop, i) == cudaSuccess) {
if(prop.major >= 1) {
break;
}
}
}

if(i == count)
{
errorexit( "There is no device supporting CUDA 1.x.");
}
cudaSetDevice(i);

}

/************[OpenGLの初期設定]********************/
inline void GLUT_INIT()
{
glutInitDisplayMode(GLUT_RGBA| GLUT_DOUBLE | GLUT_DEPTH);
glutInitWindowSize(300,300);
}

inline void GLUT_CALL_FUNC()
{
glutDisplayFunc(display);
glutReshapeFunc(reshape);
glutTimerFunc(1,timer,0);
}

inline void MY_INIT()
{
glClearColor(1.0, 1.0, 1.0, 1.0);
INIT_PARTICLE();
}

/******************** [メイン関数] *************************/
int main(int argc, char** argv)
{

InitCUDA();

glutInit(&argc,argv);
GLUT_INIT();
glutCreateWindow("window name");
GLUT_CALL_FUNC();

MY_INIT();

glutMainLoop();

return 0;
}

/********[ここからコールバック]****************************************/

void display()
{

glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glLoadIdentity();
gluLookAt(7.0, 8.0, 9.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0);

glEnable(GL_DEPTH_TEST);

DRAW_XYZ();

DRAW_PARTICLES();

CALC_NEXT_STATUS();

glutSwapBuffers();

}

void reshape(int w, int h)
{
glViewport(0, 0, w, h);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluPerspective(30.0, (double)w / (double)h, 1.0, 100.0);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
}

void timer(int value)
{
glutPostRedisplay();
glutTimerFunc(1,timer,0); //タイマー関数
}

/****************[ここから、各種関数]********************/
void DRAW_XYZ()
{
glBegin(GL_LINES);

glColor3d(0,1,0);//x
glVertex2d(-100,0);
glVertex2d(100, 0);

glColor3d(1,0,0);//y
glVertex2d(0,0);
glVertex2d(0,100);

glColor3d(0,0,1);//z
glVertex3d(0,0,-100);
glVertex3d(0,0, 100);
glEnd();

}

Particle* gpu_data=NULL;
void INIT_PARTICLE()
{

srand(unsigned int(time(NULL) ) );

for(int loop = 0;loop < P_NUM;++loop) //初期位置設定
{
ball[loop].Xstart = (rand() % 500) / 100.0;
ball[loop].Ystart = (rand() % 500) / 100.0;
ball[loop].Zstart = (rand() % 500) / 100.0;
}

Particle* gpu_p = NULL; //GPU
cudaMalloc( reinterpret_cast<void**>( &gpu_p), sizeof(Particle) * P_NUM);
if(gpu_p == NULL)
{
errorexit("can't cudaMalloc");
}

cudaMemcpy( gpu_p, ball, sizeof(Particle) * P_NUM , cudaMemcpyHostToDevice);

dim3 block(1,1,1);//ブロック1個
dim3 threads(P_NUM,1,1); //スレッド100個

INIT_PARICLE_DATA<<<block, threads,0>>>(gpu_p); //初期位置設定

cudaThreadSynchronize();

/* GPU => CPUへコピー */
cudaMemcpy( ball, gpu_p, sizeof(Particle) * P_NUM, cudaMemcpyDeviceToHost);
cudaFree(gpu_p); gpu_p = NULL;

/* あとで使うデータ用領域を確保 */
cudaMalloc( reinterpret_cast<void**>( &gpu_data), sizeof(Particle) * P_NUM);
if(gpu_data == NULL)
{
errorexit("can't cudaMalloc");
}

}

void DRAW_PARTICLES()
{
glColor3f(1,0,1);
glPointSize(3);
for(int loop = 0; loop < P_NUM;++loop)
{
glBegin(GL_POINTS);
glVertex3f(ball[loop].x,ball[loop].y,ball[loop].z);
glEnd();
}
}

void CALC_NEXT_STATUS()
{
cudaMemcpy( gpu_data, ball, sizeof(Particle) * P_NUM , cudaMemcpyHostToDevice);
dim3 block(1,1,1);//ブロック1個
dim3 threads(P_NUM,1,1); //スレッド100個

CALC_PARTICLE_NEXT<<<block, threads,0>>>(gpu_data);
cudaThreadSynchronize();
cudaMemcpy( ball, gpu_data, sizeof(Particle) * P_NUM, cudaMemcpyDeviceToHost);

}