为什么使用了DMA计算就报错了,但是athread没有这个问题
-
主要看slaveCore.c文件
----------------------- main.f program add implicit none include 'mpif.h' integer:: i,j,s,ierr integer:: error integer:: mpi_is_initialized ! 向量大小 integer,parameter:: n = 65 ! 要运行的步数 integer,parameter:: step = 1 DOUBLE PRECISION::st,ed DOUBLE PRECISION:: a(n) DOUBLE PRECISION:: b(n) DOUBLE PRECISION:: c1 DOUBLE PRECISION:: eps = 1e-06 DOUBLE PRECISION:: x = 0 DOUBLE PRECISION:: ans(n) DOUBLE PRECISION:: c(n) call mpi_initialized(mpi_is_initialized,ierr) if(mpi_is_initialized .eq. 0) then call mpi_init(ierr) call athread_init(ierr); endif ! 初始化所有变量 c1 = 6.0 x = 1.0 do i = 1,n a(i) = x b(i) = x if((10.0 - x) .lt. eps) then x = 1.0 else x = x + 1.0 endif enddo ! 串行时间 st = mpi_wtime() do s = 1,step do i = 1,n ans(i) = a(i) * b(i) enddo enddo ed = mpi_wtime() write(*,*) "serial time : ", ed - st st = mpi_wtime() do s = 1,step call add_sw(a,b,c,%val(n)); enddo ed = mpi_wtime() write(*,*) "slaveCore time : ", ed - st error = 0 do i = 1,n if(ans(i) - c(i) > eps) then error = error + 1 endif enddo write(*,*) "error times : ",error call athread_halt(ierr) call mpi_finalize(ierr) end program ----------------------- mk.sh CFLAG="-OPT:IEEE_arith=1 -O0" FFLAG="-OPT:IEEE_arith=1 -O0" eFILE="example" oFILE="main.o slave.o slaveCore.o" rm -f $eFILE $oFILE sw5cc -host $CFLAG -c slave.c -o slave.o sw5cc -slave -msimd $CFLAG -c slaveCore.c -o slaveCore.o mpif90 $FFLAG -c main.f -o main.o mpif90 $FFLAG $oFILE -o $eFILE ----------------------- slave.c #include<stdio.h> #include "classHead.h" #include<athread.h> extern SLAVE_FUN(add_sw_slave)(); void add_sw_(double* a,double* b,double* c,int n) { add_INFO info; info.a = a; info.b = b; info.c = c; info.n = n; athread_spawn(add_sw_slave,&info); athread_join(); return; } ----------------------- slaveCore.c #include<stdio.h> #include<string.h> #include<stdlib.h> #include"slave.h" #include<dma.h> #include "classHead.h" #define S 8 #define CORE 64 void add_sw_slave(void * _ptr) { #define mask 255 // 每个double数组最大元素为1024 #define MAX_SIZE 1024 volatile int get_r,put_r; volatile int get_reply[2],put_reply[2]; double a_slave[2][MAX_SIZE],b_slave[2][MAX_SIZE],c_slave[2][MAX_SIZE]; add_INFO info; int start; int nstart; int space; int nspace; int i,j,k; int index; int block = CORE * MAX_SIZE; dma_desc dma_get = 0; dma_desc dma_put = 0; get_r = 0; athread_get(PE_MODE,_ptr,&info,sizeof(add_INFO),&get_r,mask,0,0); while (get_r != 1); //DMA_GET_SET(dma_get,PE_MODE,&get_r,sizeof(add_INFO)); //DMA_GET_RUN(dma_get,_ptr,&info); //DMA_GET_WAIT(&get_r,1); //DMA_PUT_SET(dma_put,PE_MODE,&get_r, MAX_SIZE * S); for(i = 0, index =0;i < info.n;i += block,index++) { int last = (index - 1) % 2; int now = index % 2; int next = (index + 1) % 2; // 该从核开始的索引 start = i + _MYID * MAX_SIZE; // 该从核下一次的索引 nstart = start + block; // 如果越界就不算了 if(start >= info.n) break; space = MIN(MAX_SIZE,info.n - start); if(index == 0) { get_reply[now] = 0; //DMA_SET_SIZE_REPLY(dma_get,space * S,&get_reply[now]); //DMA_GET_RUN(dma_get,info.a + start,a_slave[now]); //DMA_GET_RUN(dma_get,info.b + start,b_slave[now]); athread_get(PE_MODE,info.a + start,a_slave[now],space * S,&get_reply[now],0,0,0); athread_get(PE_MODE,info.b + start,b_slave[now],space * S,&get_reply[now],0,0,0); } if(nstart < info.n) { nspace = MIN(MAX_SIZE,info.n - nstart); get_reply[next] = 0; //DMA_SET_SIZE_REPLY(dma_get,nspace * S,&get_reply[next]); //DMA_GET_RUN(dma_get,info.a + nstart,a_slave[next]); //DMA_GET_RUN(dma_get,info.b + nstart,b_slave[next]); athread_get(PE_MODE,info.a + nstart,a_slave[next],nspace * S,&get_reply[next],0,0,0); athread_get(PE_MODE,info.b + nstart,b_slave[next],nspace * S,&get_reply[next],0,0,0); } //while (get_reply[now] != 2); DMA_GET_WAIT(&get_reply[now],2); for(j = 0;j < space ;j++) { c_slave[now][j] = a_slave[now][j] * b_slave[now][j]; } if(_MYID == 0) printf("%d -- %d -- %d \n",index,space,start); put_reply[now] = 0; /** * 出错代码行:注释下面两行之后打开 athread_put能正确使用 */ DMA_SET_SIZE_REPLY(dma_put,space * S,&put_reply[now]); DMA_PUT_RUN(dma_put,c_slave[now],info.c + start); //athread_put(PE_MODE,c_slave[now],info.c + start,space * S,&put_reply[now],0,0); if(index > 0) { //while(put_reply[last] != 1); DMA_PUT_WAIT(&put_reply[last],1); } if(nstart >= info.n) { //while(put_reply[now] != 1); DMA_PUT_WAIT(&put_reply[now],1); } } #undef mask #undef MAX_SIZE } ----------------------- classHead.h #ifndef slaveClass #define slaveClass #define COL(x) (x & 0x07) #define ROW(x) ((x & 0x38) >> 3) #define REG_PUTR(var, dst) asm volatile ("putr %0,%1\n"::"r"(var),"r"(dst)) #define REG_PUTC(var, dst) asm volatile ("putc %0,%1\n"::"r"(var),"r"(dst)) #define REG_GETR(var) asm volatile ("getr %0\n":"=r"(var)) #define REG_GETC(var) asm volatile ("getc %0\n":"=r"(var)) #define MAX(a,b) ((a)>(b)?(a):(b)) #define MIN(a,b) ((a)<(b)?(a):(b)) #define DMA_SET_SIZE_REPLY(d,len,reply) \ ({ \ dma_set_size(&d,len); \ dma_set_reply(&d,reply); \ }) #define DMA_GET_SET(d,mode,reply,len) \ ({ \ dma_set_op(&d,DMA_GET); \ dma_set_mode(&d,mode); \ dma_set_reply(&d,reply); \ dma_set_size(&d,len); \ }) #define DMA_GET_RUN(d,src,dest) \ ({ \ dma(d,(long)(src),(long)(dest)); \ }) #define DMA_GET_WAIT(reply,n) \ ({ \ dma_wait((reply),n); \ }) #define DMA_PUT_SET(d,mode,reply,len) \ ({ \ dma_set_op(&d,DMA_PUT); \ dma_set_mode(&d,mode); \ dma_set_reply(&d,reply); \ dma_set_size(&d,len); \ }) #define DMA_PUT_RUN(d,src,dest) \ ({ \ dma(d,(long)(src),(long)(dest)); \ }) #define DMA_PUT_WAIT(reply,n) \ ({ \ dma_wait((reply),n); \ }) typedef struct { int n; double * a; double * b; double * c; }add_INFO; #endif
-
试试这个宏,把回答字当最后一个参数传给
dma_rpl
.
https://gitee.com/swmore/swcache-assets/blob/master/dma_macros.h#L20