Quantcast
Viewing all articles
Browse latest Browse all 3270

Simple coarray test program generates segment faults

Hi All,

Just started playing with coarrays, and have this simple program

! This looks at a simple coarray version of doing a numerical gradient
   program CoarrayTest2
      implicit none
      real(8), allocatable:: cntl(:), grad(:)[:], gradF77(:)
      real(8) fcn
      integer n[*]
      character (len=10) junk

      if (this_image() == 1 ) then
         write(*,*) 'Number of images: ', num_images()
         write(*,'(a,$)') 'Enter size of vector: '
         read(*,*) n
      end if
      sync all
      n = n[1] ! pass n to all images

      allocate ( cntl(n), grad(n)[*], gradF77(n) )

      cntl = 0. ! initialize vector
      call fcnAndGrad (n, cntl, fcn, grad)
      gradF77 = grad

      write(*,*) 'grad(', this_image(), ') = ', gradF77
      stop

   end program CoarrayTest2

   subroutine fcnAndGrad (n, cntl, fcn, grad)
      implicit none
      integer n  ! it appears that you don't have to define n as coarray in subprogram if not using?
      real(8) cntl(n)
      real(8) fcn
      real(8) grad(n)[*]
      ! local declarations
      real(8) delA, fcnDel, aHold
      integer i, myImg, noImg

      myImg = this_image()
      noImg = num_images()

      call fcnValue (n, cntl, fcn) ! get the value of fcn for nominal a
      do i=myImg, n, noImg
         delA = real(i)
         aHold = cntl(i)
         cntl(i) = cntl(i) + delA ! perturb a vector element
         call fcnValue (n, cntl, fcnDel)
         grad(i)[1] = (fcnDel - fcn)/delA ! only modify grad on image 1
         cntl(i) = aHold
      end do
      sync all ! wait for all images to finish
      grad = grad[1] ! broadcast completed gradient to all images
      return
   end subroutine fcnAndGrad

   subroutine fcnValue (n, cntl, fcn)
      implicit none
      integer n
      real(8) cntl(n), fcn

      fcn = sum(cntl)**2
      return
   end subroutine fcnValue

This compiles and runs fine under Parallel Studio XE 2016 Update 2 under Windows, and it compiles fine under same version of PSXE for Linux, but when I run it (Centos 7, 3.10.0-229.4.2.el7.x86_64), it produces segment faults, suc as:

 Number of images:            8
Enter size of vector: 1
forrtl: severe (174): SIGSEGV, segmentation fault occurred
In coarray image 1
Image              PC                Routine            Line        Source
a.out              0000000000480D85  Unknown               Unknown  Unknown
a.out              000000000047E9A7  Unknown               Unknown  Unknown
a.out              0000000000455354  Unknown               Unknown  Unknown
a.out              0000000000455166  Unknown               Unknown  Unknown
a.out              0000000000435276  Unknown               Unknown  Unknown
a.out              00000000004051E0  Unknown               Unknown  Unknown
libpthread.so.0    00007FE3F0099100  Unknown               Unknown  Unknown
libicaf.so         00007FE3F05AC554  Unknown               Unknown  Unknown
a.out              00000000004041D9  Unknown               Unknown  Unknown
a.out              000000000040399E  Unknown               Unknown  Unknown
libc.so.6          00007FE3EFCEAB15  Unknown               Unknown  Unknown
a.out              00000000004038A9  Unknown               Unknown  Unknown

application called MPI_Abort(comm=0x84000000, 3) - process 0
Fatal error in MPI_Win_lock: Wrong synchronization of RMA calls , error stack:
MPI_Win_lock(167)...: MPI_Win_lock(lock_type=235, rank=4, assert=0, win=0xa0000000) failed
MPIDI_Win_lock(3068): Wrong synchronization of RMA calls
forrtl: error (69): process interrupted (SIGINT)
In coarray image 4
Image              PC                Routine            Line        Source
a.out              0000000000480D85  Unknown               Unknown  Unknown
a.out              000000000047E9A7  Unknown               Unknown  Unknown
a.out              0000000000455354  Unknown               Unknown  Unknown
a.out              0000000000455166  Unknown               Unknown  Unknown
a.out              0000000000435276  Unknown               Unknown  Unknown
a.out              0000000000405A2E  Unknown               Unknown  Unknown
libpthread.so.0    00007FAD7567B100  Unknown               Unknown  Unknown
libpthread.so.0    00007FAD756776D3  Unknown               Unknown  Unknown
libmpi_mt.so.4     00007FAD74982585  Unknown               Unknown  Unknown
libmpi_mt.so.4     00007FAD747DC7FB  Unknown               Unknown  Unknown
libmpi_mt.so.4     00007FAD74800751  Unknown               Unknown  Unknown
libmpi_mt.so.4     00007FAD74AA2316  Unknown               Unknown  Unknown
libicaf.so         00007FAD75B91F45  Unknown               Unknown  Unknown
libicaf.so         00007FAD75B8F0FB  Unknown               Unknown  Unknown
a.out              000000000040429A  Unknown               Unknown  Unknown
a.out              000000000040399E  Unknown               Unknown  Unknown
libc.so.6          00007FAD752CCB15  Unknown               Unknown  Unknown
a.out              00000000004038A9  Unknown               Unknown  Unknown

application called MPI_Abort(comm=0x84000000, 3) - process 3
Fatal error in MPI_Win_lock: Wrong synchronization of RMA calls , error stack:
MPI_Win_lock(167)...: MPI_Win_lock(lock_type=235, rank=5, assert=0, win=0xa0000000) failed
MPIDI_Win_lock(3068): Wrong synchronization of RMA calls

Completely trivial coarray programs do compile and run under the linux, just not the one above.  I suspect I have violated some coarray standard, but I can't see it yet, and I'm not sure why it runs fine under Windows 10 version of PSXE 2016.

thanks!


Viewing all articles
Browse latest Browse all 3270

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>