From b4d3fbc10771a62ebd6532a77928eafd66f8bae4 Mon Sep 17 00:00:00 2001 From: Adam Perdeusz Date: Wed, 9 Feb 2022 08:59:59 +0100 Subject: [PATCH] Add testcase for SWDEV-321838 Test case is causing the segfault in runtime occurs when using real arrays in `private` clause --- test/smoke-fails/Makefile | 1 + test/smoke-fails/flang-321838/Makefile | 18 + .../smoke-fails/flang-321838/flang-321838.f90 | 448 ++++++++++++++++++ 3 files changed, 467 insertions(+) create mode 100644 test/smoke-fails/flang-321838/Makefile create mode 100644 test/smoke-fails/flang-321838/flang-321838.f90 diff --git a/test/smoke-fails/Makefile b/test/smoke-fails/Makefile index 59940d7f53..cad0f76387 100644 --- a/test/smoke-fails/Makefile +++ b/test/smoke-fails/Makefile @@ -52,6 +52,7 @@ TESTS_DIR = \ flang-321277 \ flang-321277-2 \ flang-321412 \ + flang-321838 \ flang-flags-308205 \ flang_atomic_hint \ flang_dev_write \ diff --git a/test/smoke-fails/flang-321838/Makefile b/test/smoke-fails/flang-321838/Makefile new file mode 100644 index 0000000000..15cd138f11 --- /dev/null +++ b/test/smoke-fails/flang-321838/Makefile @@ -0,0 +1,18 @@ +include ../../Makefile.defs + +TESTNAME = flang-321838 +TESTSRC_MAIN = flang-321838.f90 +TESTSRC_AUX = +TESTSRC_ALL = $(TESTSRC_MAIN) $(TESTSRC_AUX) + +FLANG = flang +OMP_FLAGS += -cpp -O3 -DPRIV_TS_RS -DM_TEAMS_N_THREADS +#OMP_FLAGS += -cpp -DPRIV_TS_RS -DM_TEAMS +#OMP_FLAGS += -cpp -O3 -DPRIV_TS_RS -DONE_TEAM +#OMP_FLAGS += -cpp -O3 -DM_TEAMS_N_THREADS +#OMP_FLAGS += -cpp -O3 -DM_TEAMS +#OMP_FLAGS += -cpp -O3 -DONE_TEAM +OMP_BIN = $(AOMP)/bin/$(FLANG) +CC = $(OMP_BIN) $(VERBOSE) + +include ../Makefile.rules diff --git a/test/smoke-fails/flang-321838/flang-321838.f90 b/test/smoke-fails/flang-321838/flang-321838.f90 new file mode 100644 index 0000000000..eb5052bec1 --- /dev/null +++ b/test/smoke-fails/flang-321838/flang-321838.f90 @@ -0,0 +1,448 @@ +! Copyrieht (c) 2013, Los Alamos National Security, LLC (LANS) +! and the University Corporation for Atmospheric Research (UCAR). +! +! Unless noted otherwise source code is licensed under the BSD license. +! Additional copyright and license information can be found in the LICENSE file +! distributed with this code, or at http://mpas-dev.github.com/license.html +! + +module m_init + contains + + subroutine init_1D(x) + real :: x(:) + + x(:) = 0.5 + !write(*,'(F21.18)') x + end subroutine init_1D + + subroutine init_1D_int(x, max_val) + integer :: x(:) + real :: max_val + + x(:) = 0.5 * max_val + !write(*,'(I5)') x + end subroutine init_1D_int + + subroutine init_2D(x) + real:: x(:,:) + + x(:,:) = 0.5 + !write(*,'(F21.18)') x + end subroutine init_2D + + subroutine init_2D_int(x, max_val) + integer :: x(:,:) + real :: max_val + + x(:,:) = 0.5 * max_val + !write(*,'(I5)') x + end subroutine init_2D_int +end module m_init + + +module mpas_ut_func + integer, parameter :: RKIND = selected_real_kind(12) + real, parameter :: pii = 3.141592653589793 !< Constant: Pi + real, parameter :: a = 6371229.0 !< Constant: Spherical Earth radius [m] + real, parameter :: omega = 7.29212e-5 !< Constant: Angular rotation rate of the Earth [s-1] + real, parameter :: gravity = 9.80616 !< Constant: Acceleration due to gravity [m s-2] + real, parameter :: rgas = 287.0 !< Constant: Gas constant for dry air [J kg-1 K-1] + real, parameter :: rv = 461.6 !< Constant: Gas constant for water vapor [J kg-1 K-1] + real, parameter :: rvord = rv*rgas ! + real, parameter :: cp = 7.*rgas*2. !< Constant: Specific heat of dry air at constant pressure [J kg-1 K-1] + !real, parameter :: rvord = rv/rgas ! + !real, parameter :: cp = 7.*rgas/2. !< Constant: Specific heat of dry air at constant pressure [J kg-1 K-1] + real, parameter :: cv = cp - rgas !< Constant: Specific heat of dry air at constant volume [J kg-1 K-1] + real, parameter :: cvpm = -cv*cp ! + real, parameter :: prandtl = 1.0 !< Constant: Prandtl number + real, parameter :: NMAX = 10000000.0 + + contains + + subroutine kernel1(nVertLevels, maxEdges, nCells, nEdges, nCellsSolve, cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, & + cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd, & + dts, small_step, epssm, cf1, cf2, cf3, T_e2e, use_gpu& + ) + use m_init + implicit none + + ! + ! Dummy arguments + ! + integer, intent(in) :: nVertLevels, maxEdges + integer, intent(in) :: nCells, nEdges, nCellsSolve + integer, intent(in) :: cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd + integer, intent(in) :: cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd + + real, dimension(nVertLevels,nCells+1) :: rho_zz + real, dimension(nVertLevels,nCells+1) :: theta_m + real, dimension(nVertLevels,nEdges+1) :: ru_p + real, dimension(nVertLevels+1,nCells+1) :: rw_p + real, dimension(nVertLevels,nCells+1) :: rtheta_pp + + real, dimension(nVertLevels,nCells+1) :: rtheta_pp_old + real, dimension(nVertLevels,nCells+1) :: zz + real, dimension(nVertLevels,nCells+1) :: exner + real, dimension(nVertLevels,nEdges+1) :: cqu + real, dimension(nVertLevels,nEdges+1) :: ruAvg + real, dimension(nVertLevels+1,nCells+1) :: wwAvg + real, dimension(nVertLevels,nCells+1) :: rho_pp + real, dimension(nVertLevels,nCells+1) :: cofwt + real, dimension(nVertLevels+1,nCells+1) :: coftz + real, dimension(nVertLevels,nEdges+1) :: zxu + + real, dimension(nVertLevels,nCells+1) :: a_tri + real, dimension(nVertLevels,nCells+1) :: alpha_tri + real, dimension(nVertLevels,nCells+1) :: gamma_tri + real, dimension(nVertLevels,nCells+1) :: dss + real, dimension(nVertLevels,nEdges+1) :: tend_ru + real, dimension(nVertLevels,nCells+1) :: tend_rho + real, dimension(nVertLevels,nCells+1) :: tend_rt + real, dimension(nVertLevels+1,nCells+1) :: tend_rw + real, dimension(nVertLevels+1,nCells+1) :: zgrid + real, dimension(nVertLevels,nCells+1) :: cofwr + real, dimension(nVertLevels,nCells+1) :: cofwz + + real, dimension(nVertLevels+1,nCells+1) :: w + real, dimension(nVertLevels,nEdges+1) :: ru + real, dimension(nVertLevels,nEdges+1) :: ru_save + real, dimension(nVertLevels+1,nCells+1) :: rw + real, dimension(nVertLevels+1,nCells+1) :: rw_save + + real, dimension(nVertLevels) :: fzm + real, dimension(nVertLevels) :: fzp + real, dimension(nVertLevels) :: rdzw + real, dimension(nEdges+1) :: dcEdge + real, dimension(nEdges+1) :: invDcEdge + real, dimension(nCells+1) :: invAreaCell + real, dimension(nVertLevels) :: cofrz + real, dimension(nEdges+1) :: dvEdge + + integer, dimension(nCells+1) :: nEdgesOnCell + integer, dimension(2,nEdges+1) :: cellsOnEdge + integer, dimension(maxEdges,nCells+1) :: edgesOnCell + real, dimension(maxEdges,nCells+1) :: edgesOnCell_sign + + integer, intent(in) :: small_step + real, intent(in) :: dts, epssm, cf1, cf2, cf3 + real, dimension(nVertLevels) :: ts, rs + + ! + ! Local variables + ! + integer :: cell1, cell2, iEdge, iCell, i, k + real :: c2, rcv, rtheta_pp_tmp + real :: pgrad, flux, resm, rdts + real, intent(inout) :: T_e2e + integer, intent(in) :: use_gpu + + !print *, "small_step,nCells,nEdges,nCellsSolve,cellStart,cellEnd,vertexStart,vertexEnd,edgeStart,edgeEnd,cellSolveStart,cellSolveEnd,vertexSolveStart,vertexSolveEnd,edgeSolveStart,edgeSolveEnd,dts,epssm,cf1,cf2,cf3" + !print *, small_step, ",", nCells, ",", nEdges, ",", nCellsSolve, ",", cellStart, ",", cellEnd, ",", vertexStart, ",", vertexEnd, ",", edgeStart, ",", edgeEnd, ",", cellSolveStart, ",", cellSolveEnd, ",", vertexSolveStart, ",", vertexSolveEnd, ",", edgeSolveStart, ",", edgeSolveEnd, ",", dts, ",", epssm, ",", cf1, ",", cf2, ",", cf3 + + resm = (1.0 - epssm) / (1.0 + epssm) + rdts = 1./dts + c2 = 0.5 + + call init_2D(rho_zz) + call init_2D(theta_m) + call init_2D(ru_p) + call init_2D(rw_p) + call init_2D(rtheta_pp) + call init_2D(rtheta_pp_old) + call init_2D(zz) + call init_2D(exner) + call init_2D(cqu) + call init_2D(ruAvg) + call init_2D(wwAvg) + call init_2D(rho_pp) + call init_2D(cofwt) + call init_2D(coftz) + call init_2D(zxu) + call init_2D(a_tri) + call init_2D(alpha_tri) + call init_2D(gamma_tri) + call init_2D(dss) + call init_2D(tend_ru) + call init_2D(tend_rho) + call init_2D(tend_rt) + call init_2D(tend_rw) + call init_2D(zgrid) + call init_2D(cofwr) + call init_2D(cofwz) + call init_2D(w) + call init_2D(ru) + call init_2D(ru_save) + call init_2D(rw) + call init_2D(rw_save) + + call init_1D(fzm) + call init_1D(fzp) + call init_1D(rdzw) + call init_1D(dcEdge) + call init_1D(invDcEdge) + call init_1D(invAreaCell) + call init_1D(cofrz) + call init_1D(dvEdge) + + call init_1D_int(nEdgesOnCell, NMAX) + nEdgesOnCell(:) = max(1, mod(nEdgesOnCell(:), maxEdges)) + call init_2D_int(cellsOnEdge, NMAX) + cellsOnEdge(:,:) = max(1, mod(cellsOnEdge(:,:), nCells)) + call init_2D_int(edgesOnCell, NMAX) + edgesOnCell(:,:) = max(1, mod(edgesOnCell(:,:), nEdges)) + + call init_2D(edgesOnCell_sign) +!$acc data present(rtheta_pp, rtheta_pp_old, ru_p, ruavg, rho_pp, & +!$acc rw_p, wwavg, & +!$acc zz, cellsonedge, cqu, dcedge, exner, invdcedge, & +!$acc tend_ru, zxu, tend_rho, a_tri, alpha_tri, cofrz, & +!$acc coftz, cofwr, cofwt, cofwz, dss, dvedge, edgesoncell, edgesoncell_sign, & +!$acc fzm, fzp, gamma_tri, invareacell, nedgesoncell, rdzw, rho_zz, rw, & +!$acc rw_save, tend_rho, tend_rt, tend_rw, theta_m, w) + + if(small_step /= 1) then ! not needed on first small step + +!!!kernel 2: atm_time_integration_atm_advance_acoustic_step_work_2660_gpu +!$acc parallel vector_length(32) +!$acc loop gang + do iEdge=edgeStart,edgeEnd ! MGD do we really just need edges touching owned cells? + + cell1 = cellsOnEdge(1,iEdge) + cell2 = cellsOnEdge(2,iEdge) + + ! update edges for block-owned cells + if (cell1 <= nCellsSolve .or. cell2 <= nCellsSolve ) then +!DIR$ IVDEP +!$acc loop vector + do k=1,nVertLevels + pgrad = ((rtheta_pp(k,cell2)-rtheta_pp(k,cell1))*invDcEdge(iEdge) )/(.5*(zz(k,cell2)+zz(k,cell1))) + pgrad = cqu(k,iEdge)*0.5*c2*(exner(k,cell1)+exner(k,cell2))*pgrad + pgrad = pgrad + 0.5*zxu(k,iEdge)*gravity*(rho_pp(k,cell1)+rho_pp(k,cell2)) + ru_p(k,iEdge) = ru_p(k,iEdge) + dts*(tend_ru(k,iEdge) - pgrad) + end do + + ! accumulate ru_p for use later in scalar transport +!DIR$ IVDEP +!$acc loop vector + + do k=1,nVertLevels + ruAvg(k,iEdge) = ruAvg(k,iEdge) + ru_p(k,iEdge) + end do + + end if ! end test for block-owned cells + + end do ! end loop over edges +!$acc end parallel + end if + + if(small_step ==1) then +! else ! this is all that us needed for ru_p update for first acoustic step in RK substep +!$acc parallel vector_length(32) +!$acc loop gang + do iEdge=edgeStart,edgeEnd ! MGD do we really just need edges touching owned cells? + + cell1 = cellsOnEdge(1,iEdge) + cell2 = cellsOnEdge(2,iEdge) + + ! update edges for block-owned cells + if (cell1 <= nCellsSolve .or. cell2 <= nCellsSolve ) then +!DIR$ IVDEP +!$acc loop vector + do k=1,nVertLevels + ru_p(k,iEdge) = dts*tend_ru(k,iEdge) + end do +!DIR$ IVDEP +!$acc loop vector + do k=1,nVertLevels +!! ruAvg(k,iEdge) = ruAvg(k,iEdge) + ru_p(k,iEdge) + ruAvg(k,iEdge) = ru_p(k,iEdge) + end do + + end if ! end test for block-owned cells + end do ! end loop over edges +!$acc end parallel + end if ! test for first acoustic step + + if (small_step == 1) then ! initialize here on first small timestep. +!$acc parallel vector_length(32) +!$acc loop gang vector + do iCell=cellStart,cellEnd + rtheta_pp_old(1:nVertLevels,iCell) = 0.0 + end do +!$acc end parallel + else +!$acc parallel vector_length(32) +!$acc loop gang vector + do iCell=cellStart,cellEnd + rtheta_pp_old(1:nVertLevels,iCell) = rtheta_pp(1:nVertLevels,iCell) + end do +!$acc end parallel + end if + + +!!!OMP BARRIER -- not needed, since rtheta_pp_old not used below when small_step == 1 +!!!kernel 1: atm_time_integration_atm_advance_acoustic_step_work_2739_gpu +#ifdef ONE_TEAM +!$omp target & +#elif M_TEAMS +!$omp target teams & +#elif M_TEAMS_N_THREADS +!$omp target teams distribute & +#endif +#ifdef PRIV_TS_RS +!$omp private(ts, rs) & +#endif +!$omp +!$acc parallel vector_length(32) +!$acc loop gang private(ts, rs) + do iCell=cellSolveStart,cellSolveEnd ! loop over all owned cells to solve +!!$acc cache(ts) +!!$acc cache(rs) + + ts(:) = 0.0 + rs(:) = 0.0 + + if(small_step == 1) then ! initialize here on first small timestep. + wwAvg(1:nVertLevels+1,iCell) = 0.0 + rho_pp(1:nVertLevels,iCell) = 0.0 + rtheta_pp(1:nVertLevels,iCell) = 0.0 +!MGD moved to loop above over all cells +! rtheta_pp_old(1:nVertLevels,iCell) = 0.0 + rw_p(:,iCell) = 0.0 + end if + +!$acc loop seq + do i=1,nEdgesOnCell(iCell) + iEdge = edgesOnCell(i,iCell) + cell1 = cellsOnEdge(1,iEdge) + cell2 = cellsOnEdge(2,iEdge) +!DIR$ IVDEP +!$acc loop vector + + do k=1,nVertLevels + flux = edgesOnCell_sign(i,iCell)*dts*dvEdge(iEdge)*ru_p(k,iEdge) * invAreaCell(iCell) + rs(k) = rs(k)-flux + ts(k) = ts(k)-flux*0.5*(theta_m(k,cell2)+theta_m(k,cell1)) + end do + end do + ! vertically implicit acoustic and gravity wave integration. + ! this follows Klemp et al MWR 2007, with the addition of an implicit Rayleigh damping of w + ! serves as a gravity-wave absorbing layer, from Klemp et al 2008. + +!DIR$ IVDEP +!$acc loop vector + + do k=1, nVertLevels + rs(k) = rho_pp(k,iCell) + dts*tend_rho(k,iCell) + rs(k) & + - cofrz(k)*resm*(rw_p(k+1,iCell)-rw_p(k,iCell)) + ts(k) = rtheta_pp(k,iCell) + dts*tend_rt(k,iCell) + ts(k) & + - resm*rdzw(k)*( coftz(k+1,iCell)*rw_p(k+1,iCell) & + -coftz(k,iCell)*rw_p(k,iCell)) + end do + +!DIR$ IVDEP +!$acc loop vector + + do k=2, nVertLevels + wwavg(k,iCell) = wwavg(k,iCell) + 0.5*(1.0-epssm)*rw_p(k,iCell) + end do + +!DIR$ IVDEP +!$acc loop vector + + do k=2, nVertLevels + rw_p(k,iCell) = rw_p(k,iCell) + dts*tend_rw(k,iCell) & + - cofwz(k,iCell)*((zz(k ,iCell)*ts(k) & + -zz(k-1,iCell)*ts(k-1)) & + +resm*(zz(k ,iCell)*rtheta_pp(k ,iCell) & + -zz(k-1,iCell)*rtheta_pp(k-1,iCell))) & + - cofwr(k,iCell)*((rs(k)+rs(k-1)) & + +resm*(rho_pp(k,iCell)+rho_pp(k-1,iCell))) & + + cofwt(k ,iCell)*(ts(k)+resm*rtheta_pp(k ,iCell)) & + + cofwt(k-1,iCell)*(ts(k-1)+resm*rtheta_pp(k-1,iCell)) + end do + + ! tridiagonal solve sweeping up and then down the column + +!MGD VECTOR DEPENDENCE +!$acc loop seq + do k=2,nVertLevels + rw_p(k,iCell) = (rw_p(k,iCell)-a_tri(k,iCell)*rw_p(k-1,iCell))*alpha_tri(k,iCell) + end do + +!MGD VECTOR DEPENDENCE +!$acc loop seq + do k=nVertLevels,1,-1 + rw_p(k,iCell) = rw_p(k,iCell) - gamma_tri(k,iCell)*rw_p(k+1,iCell) + end do + + ! the implicit Rayleigh damping on w (gravity-wave absorbing) +!DIR$ IVDEP +!$acc loop vector + + do k=2,nVertLevels + rw_p(k,iCell) = (rw_p(k,iCell) + (rw_save(k ,iCell) - rw(k ,iCell)) -dts*dss(k,iCell)* & + (fzm(k)*zz (k,iCell)+fzp(k)*zz (k-1,iCell)) & + *(fzm(k)*rho_zz(k,iCell)+fzp(k)*rho_zz(k-1,iCell)) & + *w(k,iCell) )/(1.0+dts*dss(k,iCell)) & + - (rw_save(k ,iCell) - rw(k ,iCell)) + end do + + ! accumulate (rho*omega)' for use later in scalar transport +!DIR$ IVDEP +!$acc loop vector + + do k=2,nVertLevels + wwAvg(k,iCell) = wwAvg(k,iCell) + 0.5*(1.0+epssm)*rw_p(k,iCell) + end do + + ! update rho_pp and theta_pp given updated rw_p +!DIR$ IVDEP +!$acc loop vector + + do k=1,nVertLevels + rho_pp(k,iCell) = rs(k) - cofrz(k) *(rw_p(k+1,iCell)-rw_p(k ,iCell)) + rtheta_pp(k,iCell) = ts(k) - rdzw(k)*(coftz(k+1,iCell)*rw_p(k+1,iCell) & + -coftz(k ,iCell)*rw_p(k ,iCell)) + end do + end do ! end of loop over cells +#ifdef ONE_TEAM +!$omp end target +#elif M_TEAMS +!$omp end target teams +#elif CPU_M_TEAMS +!$omp end teams +#endif +!$acc end parallel +!$acc end data + open(1, file = 'mpas_test_kernel1.out') + write(1, '(F21.6)') wwAvg + write(1, '(F21.6)') rho_pp + write(1, '(F21.6)') rtheta_pp + !print *, wwAvg, rho_pp, rtheta_pp + end subroutine kernel1 +end module mpas_ut_func + +program test + use mpas_ut_func + use m_init + + implicit none + + real, parameter :: dts = 75.0 + real, parameter :: epssm = 0.1 + real, parameter :: cf1 = 2.0 + real, parameter :: cf2 = -1.5 + real, parameter :: cf3 = -0.5 + integer :: small_step = 6 + integer :: i, iter, use_gpu + real :: t_sum = 0, t_iter + character(len=12) :: args + + call get_command_argument(1, args) + read(args,'(i)') use_gpu + + iter = 1 + call kernel1(26,10,10969,33274,10238,1,10969,1,22306,1,33274,1,10238,1,20453,1,30696,dts,small_step,epssm,cf1,cf2,cf3, t_iter, use_gpu) +end program test