mpif90变量突变
-
这是我最近定位到的一个问题,变量突然变NAN,而且有时候提交时候,一旦提交时候开启了从核,在一些节点里面能够正常运行自己的程序,大多数时候变量变成Nan没法正常运行程序,我自己修改的代码都在x86测试过,没出过问题,正常运行。在神威时候不打开从核选项,就能正常运行。我现在改的这个代码连从核都没涉及到,这个真的是太奇怪了GDB用不了,我只能一点一点print,然后找到对应的位置。刚好就是个for循环,刚好就是for第2次循环的时候,变量突然突变,很头大。。。下面是程序的一个截断代码,后面都没执行,只是for这里执行了。
我写不了重现demo,重现demo都没出现问题,没办法重现。。。程序代码:
do 100 e=1,nel ! if (ifaxis) call setaxdy ( ifrzer(e) ) ! IF (ldim.EQ.2) THEN ! ! 2-d case ............... ! if (iffast(e)) then ! ! Fast 2-d mode: constant properties and undeformed element ! h1 = helm1(1,1,1,e) call mxm (wddx,lx1,u(1,1,1,e),lx1,tm1,nyz) call mxm (u(1,1,1,e),lx1,wddyt,ly1,tm2,ly1) call col2 (tm1,g4m1(1,1,1,e),nxyz) call col2 (tm2,g5m1(1,1,1,e),nxyz) call add3 (au(1,1,1,e),tm1,tm2,nxyz) call cmult (au(1,1,1,e),h1,nxyz) ! else ! ! print *,"---1 ",u(1,1,1,1)," ",u(1,1,1,2) call mxm (dxm1,lx1,u(1,1,1,e),lx1,dudr,nyz) print *,"---2 ",u(1,1,1,1)," ",u(1,1,1,2) call mxm (u(1,1,1,e),lx1,dytm1,ly1,duds,ly1) print *,"---3 ",u(1,1,1,1)," ",u(1,1,1,2) call col3 (tmp1,dudr,g1m1(1,1,1,e),nxyz) print *,"---4 ",u(1,1,1,1)," ",u(1,1,1,2) call col3 (tmp2,duds,g2m1(1,1,1,e),nxyz) print *,"---5 ",u(1,1,1,1)," ",u(1,1,1,2) if (ifdfrm(e)) then call addcol3 (tmp1,duds,g4m1(1,1,1,e),nxyz) print *,"---6 ",u(1,1,1,1)," ",u(1,1,1,2) call addcol3 (tmp2,dudr,g4m1(1,1,1,e),nxyz) print *,"---7 ",u(1,1,1,1)," ",u(1,1,1,2) endif call col2 (tmp1,helm1(1,1,1,e),nxyz) print *,"---8 ",u(1,1,1,1)," ",u(1,1,1,2) call col2 (tmp2,helm1(1,1,1,e),nxyz) print *,"---9 ",u(1,1,1,1)," ",u(1,1,1,2) call mxm (dxtm1,lx1,tmp1,lx1,tm1,nyz) print *,"---10 ",u(1,1,1,1)," ",u(1,1,1,2) call mxm (tmp2,lx1,dym1,ly1,tm2,ly1) print *,"---11 ",u(1,1,1,1)," ",u(1,1,1,2) call add2 (au(1,1,1,e),tm1,nxyz) print *,"---12 ",u(1,1,1,1)," ",u(1,1,1,2) call add2 (au(1,1,1,e),tm2,nxyz) print *,"---13 ",u(1,1,1,1)," ",u(1,1,1,2) endif ! else .....省略,后面没执行过
执行脚本:bsub -I -b -q q_sw_expr -o out.log -n 1 -cgsp 32 -sw3runarg "-p -f" -share_size 4096 -host_stack 1024 ./a.out
-
-cgsp 改成 64 试试
-
@桑薇 开多少都一样。。。除非不开。。