为什么使用了DMA计算就报错了,但是athread没有这个问题



  • a36ffb12-12e9-4a4f-bfcf-d8f0557f529f-image.png

    主要看slaveCore.c文件

    -----------------------  main.f
          program add
          implicit none
          include 'mpif.h'
            
          integer:: i,j,s,ierr
          integer:: error
          integer:: mpi_is_initialized
    
          ! 向量大小
          integer,parameter:: n = 65
          ! 要运行的步数
          integer,parameter:: step = 1
          DOUBLE PRECISION::st,ed
    
          DOUBLE PRECISION:: a(n)
          DOUBLE PRECISION:: b(n)
          DOUBLE PRECISION:: c1
          DOUBLE PRECISION:: eps = 1e-06
          DOUBLE PRECISION:: x = 0
          DOUBLE PRECISION:: ans(n)
          DOUBLE PRECISION:: c(n)
          
          call mpi_initialized(mpi_is_initialized,ierr)
          if(mpi_is_initialized .eq. 0) then
            call mpi_init(ierr)
            call athread_init(ierr);
          endif
          
          ! 初始化所有变量
          c1 = 6.0
          x = 1.0
          do i = 1,n
            a(i) = x
            b(i) = x
            if((10.0 - x) .lt. eps) then
                x = 1.0
            else 
                x = x + 1.0
            endif
          enddo
          
          ! 串行时间
          st = mpi_wtime()
          do s = 1,step
            do i = 1,n
              ans(i) = a(i) * b(i)
            enddo
          enddo
          ed = mpi_wtime()
          write(*,*) "serial time : ", ed - st
          
          st = mpi_wtime()
          do s = 1,step
            call add_sw(a,b,c,%val(n));
          enddo
          ed = mpi_wtime()
    
          write(*,*) "slaveCore time : ", ed - st
    
          error = 0
          do i = 1,n
            if(ans(i) - c(i) > eps) then
              error = error + 1
            endif
          enddo
    
          write(*,*) "error times : ",error
          call athread_halt(ierr)
          call mpi_finalize(ierr)
    
          end program
    
    -----------------------  mk.sh
    
    CFLAG="-OPT:IEEE_arith=1 -O0"
    FFLAG="-OPT:IEEE_arith=1 -O0"
    
    eFILE="example"
    oFILE="main.o slave.o slaveCore.o"
    
    rm -f $eFILE $oFILE
    sw5cc -host $CFLAG -c slave.c  -o slave.o
    sw5cc -slave -msimd $CFLAG -c slaveCore.c -o slaveCore.o
    mpif90 $FFLAG -c main.f -o main.o
    mpif90 $FFLAG $oFILE -o $eFILE
    
    -----------------------  slave.c
    #include<stdio.h>
    #include "classHead.h"
    #include<athread.h>
    
    extern SLAVE_FUN(add_sw_slave)();
    
    void add_sw_(double* a,double* b,double* c,int n)
    {
        add_INFO info;
        info.a = a;
        info.b = b;
        info.c = c;
        info.n = n;
    
        athread_spawn(add_sw_slave,&info);
        athread_join();
        return;
    }
    
    -----------------------  slaveCore.c
    #include<stdio.h>
    #include<string.h>
    #include<stdlib.h>
    #include"slave.h"
    #include<dma.h>
    #include "classHead.h"
    
    #define S 8
    #define CORE 64
    
    void add_sw_slave(void * _ptr)
    {
    #define mask 255    
    // 每个double数组最大元素为1024
    #define MAX_SIZE 1024
    
        volatile int get_r,put_r;
        volatile int get_reply[2],put_reply[2];
        double a_slave[2][MAX_SIZE],b_slave[2][MAX_SIZE],c_slave[2][MAX_SIZE];
    
        add_INFO info;
        
        int start;
        int nstart;
        int space;
        int nspace;
        int i,j,k;
        int index;
        int block = CORE * MAX_SIZE;
    
        dma_desc  dma_get = 0;
        dma_desc  dma_put = 0;
    
        get_r = 0;
        athread_get(PE_MODE,_ptr,&info,sizeof(add_INFO),&get_r,mask,0,0);
        while (get_r != 1);
        //DMA_GET_SET(dma_get,PE_MODE,&get_r,sizeof(add_INFO));
        //DMA_GET_RUN(dma_get,_ptr,&info);
        //DMA_GET_WAIT(&get_r,1);
    
    
        //DMA_PUT_SET(dma_put,PE_MODE,&get_r, MAX_SIZE * S);
    
        for(i = 0, index =0;i < info.n;i += block,index++)
        {   
            int last = (index - 1) % 2;
            int now = index % 2;
            int next = (index + 1) % 2; 
    
            // 该从核开始的索引
            start = i + _MYID * MAX_SIZE;
            // 该从核下一次的索引
            nstart = start + block;
            
            // 如果越界就不算了
            if(start >= info.n) break;
    
            space = MIN(MAX_SIZE,info.n - start);
            if(index == 0)
            {
                get_reply[now] = 0;
                //DMA_SET_SIZE_REPLY(dma_get,space * S,&get_reply[now]);
                //DMA_GET_RUN(dma_get,info.a + start,a_slave[now]);
                //DMA_GET_RUN(dma_get,info.b + start,b_slave[now]);
                athread_get(PE_MODE,info.a + start,a_slave[now],space * S,&get_reply[now],0,0,0);
                athread_get(PE_MODE,info.b + start,b_slave[now],space * S,&get_reply[now],0,0,0);
            }
            if(nstart < info.n)
            {
                nspace = MIN(MAX_SIZE,info.n - nstart);
                get_reply[next] = 0;
                //DMA_SET_SIZE_REPLY(dma_get,nspace * S,&get_reply[next]);
                //DMA_GET_RUN(dma_get,info.a + nstart,a_slave[next]);
                //DMA_GET_RUN(dma_get,info.b + nstart,b_slave[next]);
                athread_get(PE_MODE,info.a + nstart,a_slave[next],nspace * S,&get_reply[next],0,0,0);
                athread_get(PE_MODE,info.b + nstart,b_slave[next],nspace * S,&get_reply[next],0,0,0);
            }
            //while (get_reply[now] != 2);
            DMA_GET_WAIT(&get_reply[now],2);
            
            for(j = 0;j < space ;j++)
            {
                c_slave[now][j] = a_slave[now][j] * b_slave[now][j];
            }
            
    
            if(_MYID == 0) printf("%d -- %d -- %d \n",index,space,start);
            put_reply[now] = 0;
    
            /**
             * 出错代码行:注释下面两行之后打开 athread_put能正确使用
             */
            DMA_SET_SIZE_REPLY(dma_put,space * S,&put_reply[now]);
            DMA_PUT_RUN(dma_put,c_slave[now],info.c + start);
            //athread_put(PE_MODE,c_slave[now],info.c + start,space * S,&put_reply[now],0,0);
            
            if(index > 0) 
            {
                //while(put_reply[last] != 1);
                DMA_PUT_WAIT(&put_reply[last],1);
            }
            if(nstart >= info.n) 
            {
                //while(put_reply[now] != 1);
                DMA_PUT_WAIT(&put_reply[now],1);
            }
        }
    #undef mask
    #undef MAX_SIZE
    }
    
    -----------------------  classHead.h
    #ifndef slaveClass
    #define slaveClass
    
    #define COL(x) (x & 0x07)
    #define ROW(x) ((x & 0x38) >> 3)
    
    #define REG_PUTR(var, dst) asm volatile ("putr %0,%1\n"::"r"(var),"r"(dst))
    #define REG_PUTC(var, dst) asm volatile ("putc %0,%1\n"::"r"(var),"r"(dst))
    #define REG_GETR(var) asm volatile ("getr %0\n":"=r"(var))
    #define REG_GETC(var) asm volatile ("getc %0\n":"=r"(var))
    
    #define MAX(a,b) ((a)>(b)?(a):(b))
    #define MIN(a,b) ((a)<(b)?(a):(b))
    
    #define DMA_SET_SIZE_REPLY(d,len,reply) \
    ({                                      \
        dma_set_size(&d,len);               \
        dma_set_reply(&d,reply);            \
    })
    
    
    #define DMA_GET_SET(d,mode,reply,len) \
    ({                                    \
        dma_set_op(&d,DMA_GET);           \
        dma_set_mode(&d,mode);            \
        dma_set_reply(&d,reply);          \
        dma_set_size(&d,len);             \
    })
    #define DMA_GET_RUN(d,src,dest)       \
    ({                                    \
        dma(d,(long)(src),(long)(dest));  \
    })                                    
    #define DMA_GET_WAIT(reply,n)         \
    ({                                    \
        dma_wait((reply),n);              \
    })
    
    #define DMA_PUT_SET(d,mode,reply,len) \
    ({                                    \
        dma_set_op(&d,DMA_PUT);           \
        dma_set_mode(&d,mode);            \
        dma_set_reply(&d,reply);          \
        dma_set_size(&d,len);             \
    })
    #define DMA_PUT_RUN(d,src,dest)       \
    ({                                    \
        dma(d,(long)(src),(long)(dest));  \
    })                                
    #define DMA_PUT_WAIT(reply,n)         \
    ({                                    \
        dma_wait((reply),n);              \
    })
    
    typedef struct
    {
        int n;
        double * a;
        double * b;
        double * c;
    }add_INFO;
    
    #endif
    


  • 试试这个宏,把回答字当最后一个参数传给dma_rpl.
    https://gitee.com/swmore/swcache-assets/blob/master/dma_macros.h#L20


登录后回复