mpif90变量突变



  • 这是我最近定位到的一个问题,变量突然变NAN,而且有时候提交时候,一旦提交时候开启了从核,在一些节点里面能够正常运行自己的程序,大多数时候变量变成Nan没法正常运行程序,我自己修改的代码都在x86测试过,没出过问题,正常运行。在神威时候不打开从核选项,就能正常运行。我现在改的这个代码连从核都没涉及到,这个真的是太奇怪了GDB用不了,我只能一点一点print,然后找到对应的位置。刚好就是个for循环,刚好就是for第2次循环的时候,变量突然突变,很头大。。。下面是程序的一个截断代码,后面都没执行,只是for这里执行了。
    我写不了重现demo,重现demo都没出现问题,没办法重现。。。

    程序代码:

          do 100 e=1,nel
    !                                                                       
            if (ifaxis) call setaxdy ( ifrzer(e) )
    !                                                                       
            IF (ldim.EQ.2) THEN
    !                                                                       
    !       2-d case ...............                                        
    !                                                                       
               if (iffast(e)) then
    !                                                                       
    !          Fast 2-d mode: constant properties and undeformed element    
    !                                                                       
               h1 = helm1(1,1,1,e)
               call mxm   (wddx,lx1,u(1,1,1,e),lx1,tm1,nyz)
               call mxm   (u(1,1,1,e),lx1,wddyt,ly1,tm2,ly1)
               call col2  (tm1,g4m1(1,1,1,e),nxyz)
               call col2  (tm2,g5m1(1,1,1,e),nxyz)
               call add3  (au(1,1,1,e),tm1,tm2,nxyz)
               call cmult (au(1,1,1,e),h1,nxyz)
    !                                                                       
               else
    !                                                                       
    !                                                                       
    
               print *,"---1  ",u(1,1,1,1)," ",u(1,1,1,2)
               call mxm  (dxm1,lx1,u(1,1,1,e),lx1,dudr,nyz)
               print *,"---2  ",u(1,1,1,1)," ",u(1,1,1,2)
               call mxm  (u(1,1,1,e),lx1,dytm1,ly1,duds,ly1)
               print *,"---3  ",u(1,1,1,1)," ",u(1,1,1,2)
               call col3 (tmp1,dudr,g1m1(1,1,1,e),nxyz)
               print *,"---4  ",u(1,1,1,1)," ",u(1,1,1,2)
               call col3 (tmp2,duds,g2m1(1,1,1,e),nxyz)
               print *,"---5  ",u(1,1,1,1)," ",u(1,1,1,2)
               if (ifdfrm(e)) then
                  call addcol3 (tmp1,duds,g4m1(1,1,1,e),nxyz)
               print *,"---6  ",u(1,1,1,1)," ",u(1,1,1,2)
                  call addcol3 (tmp2,dudr,g4m1(1,1,1,e),nxyz)
               print *,"---7  ",u(1,1,1,1)," ",u(1,1,1,2)
               endif
               call col2 (tmp1,helm1(1,1,1,e),nxyz)
               print *,"---8  ",u(1,1,1,1)," ",u(1,1,1,2)
               call col2 (tmp2,helm1(1,1,1,e),nxyz)
               print *,"---9  ",u(1,1,1,1)," ",u(1,1,1,2)
               call mxm  (dxtm1,lx1,tmp1,lx1,tm1,nyz)
               print *,"---10  ",u(1,1,1,1)," ",u(1,1,1,2)
               call mxm  (tmp2,lx1,dym1,ly1,tm2,ly1)
               print *,"---11  ",u(1,1,1,1)," ",u(1,1,1,2)
               call add2 (au(1,1,1,e),tm1,nxyz)
               print *,"---12  ",u(1,1,1,1)," ",u(1,1,1,2)
               call add2 (au(1,1,1,e),tm2,nxyz)
               print *,"---13  ",u(1,1,1,1)," ",u(1,1,1,2)
    
            endif
    !                                                                       
            else
                .....省略,后面没执行过
    

    0_1559126942491_7ccbb487-f034-4b0e-8ad8-3e0ba0733d91-image.png

    执行脚本:bsub -I -b -q q_sw_expr -o out.log -n 1 -cgsp 32 -sw3runarg "-p -f" -share_size 4096 -host_stack 1024 ./a.out



  • -cgsp 改成 64 试试



  • @桑薇 开多少都一样。。。除非不开。。


登录后回复