Job Id: 149938051.gadi-pbs
Executable: um-atmos.exe
[gadi-cpu-clx-0575:46735:0:46735] ib_mlx5_log.c:179 Remote operation error on mlx5_0:1/IB (synd 0x14 vend 0x89 hw_synd 0/0)
[gadi-cpu-clx-0575:46735:0:46735] ib_mlx5_log.c:179 DCI QP 0xbce6 wqe[115]: RDMA_READ s-- [rqpn 0xb27a rlid 3308] [rva 0x14d539fd9200 rkey 0x10eb30] [va 0x15038814b200 len 604928 lkey 0x3b079d]
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Could not find abbrev number 55.
BFD: Dwarf Error: Could not find abbrev number 48.
BFD: Dwarf Error: Could not find abbrev number 55.
BFD: Dwarf Error: Offset (721528368) greater than or equal to .debug_str size (468988).
BFD: Dwarf Error: Could not find abbrev number 71.
BFD: Dwarf Error: Could not find abbrev number 56.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Invalid abstract instance DIE ref.
BFD: Dwarf Error: Could not find abbrev number 55.
BFD: Dwarf Error: Could not find abbrev number 48.
BFD: Dwarf Error: Could not find abbrev number 55.
BFD: Dwarf Error: Offset (721528368) greater than or equal to .debug_str size (468988).
BFD: Dwarf Error: Could not find abbrev number 71.
BFD: Dwarf Error: Could not find abbrev number 56.
==== backtrace (tid: 46735) ====
0 0x000000000002708e uct_ib_mlx5_completion_with_err() ???:0
1 0x00000000000550b9 uct_dc_mlx5_ep_handle_failure() ???:0
2 0x0000000000027475 uct_ib_mlx5_check_completion() ???:0
3 0x0000000000056d0f uct_dc_mlx5_iface_progress_ll.lto_priv.0() :0
4 0x00000000000483ca ucp_worker_progress() ???:0
5 0x000000000020c74e mca_pml_ucx_send() :0
6 0x00000000000bb41e ompi_coll_base_sendrecv_actual() :0
7 0x00000000000fad31 ompi_coll_tuned_bcast_intra_do_this() :0
8 0x00000000000e8d4c ompi_coll_tuned_bcast_intra_dec_fixed() :0
9 0x000000000007fb22 MPI_Bcast() ???:0
10 0x000000000004a753 ompi_bcast_f() /jobfs/129758397.gadi-pbs/0/openmpi/5.0.5/libtool/build/Intel/ompi/mpi/fortran/mpif-h/profile/pbcast_f.c:80
11 0x000000000378ec9f mpl_bcast_() /scratch/tm70/tm70_ci/tmp/restricted/spack-stage/spack-stage-gcom-7.9-cw4uhqfi2qmxgxhf6kowdmiabvyoeleh/spack-src/preprocess/src/gcom/mpl/mpl_bcast.F90:60
12 0x0000000003786532 gc_ibcast_() /scratch/tm70/tm70_ci/tmp/restricted/spack-stage/spack-stage-gcom-7.9-cw4uhqfi2qmxgxhf6kowdmiabvyoeleh/spack-src/preprocess/src/gcom/gc/gc_ibcast.F90:88
13 0x00000000004fa750 ppxlook_mod_mp_read_atmos_stashmaster_() /scratch/tm70/tm70_ci/tmp/restricted/spack-stage/spack-stage-um-13.1-vvaxy7rv4wnt7ega75qikne57cfgd6s2/spack-src/../spack-build/preprocess-atmos/src/um/src/control/misc/ppxlook_mod.F90:232
14 0x0000000002dda07b stash_proc_mod_mp_stash_proc_() /scratch/tm70/tm70_ci/tmp/restricted/spack-stage/spack-stage-um-13.1-vvaxy7rv4wnt7ega75qikne57cfgd6s2/spack-src/../spack-build/preprocess-atmos/src/um/src/control/top_level/stash_proc.F90:229
15 0x000000000040ce6c um_shell_mod_mp_um_shell_() /scratch/tm70/tm70_ci/tmp/restricted/spack-stage/spack-stage-um-13.1-vvaxy7rv4wnt7ega75qikne57cfgd6s2/spack-src/../spack-build/preprocess-atmos/src/um/src/control/top_level/um_shell.F90:724
16 0x0000000000409818 MAIN__() /scratch/tm70/tm70_ci/tmp/restricted/spack-stage/spack-stage-um-13.1-vvaxy7rv4wnt7ega75qikne57cfgd6s2/spack-src/../spack-build/preprocess-atmos/src/um/src/control/top_level/um_main.F90:60
17 0x00000000004097cd main() ???:0
18 0x000000000003a7e5 __libc_start_main() ???:0
19 0x00000000004096ee _start() ???:0
=================================
forrtl: error (76): Abort trap signal
Image PC Routine Line Source
libpthread-2.28.s 00001503A3698990 Unknown Unknown Unknown
libc-2.28.so 00001503A32FD52F gsignal Unknown Unknown
libc-2.28.so 00001503A32D0E65 abort Unknown Unknown
libucs.so.0.0.0 000015039E8DB4E7 Unknown Unknown Unknown
libucs.so.0.0.0 000015039E8E1F5F ucs_log_default_h Unknown Unknown
libucs.so.0.0.0 000015039E8D92BD ucs_log_dispatch Unknown Unknown
libuct_ib.so.0.0. 0000150392E9208E uct_ib_mlx5_compl Unknown Unknown
libuct_ib.so.0.0. 0000150392EC00B9 uct_dc_mlx5_ep_ha Unknown Unknown
libuct_ib.so.0.0. 0000150392E92475 uct_ib_mlx5_check Unknown Unknown
libuct_ib.so.0.0. 0000150392EC1D0F Unknown Unknown Unknown
libucp.so.0.0.0 000015039F5443CA ucp_worker_progre Unknown Unknown
libmpi.so.40.40.5 00001503A117C74E Unknown Unknown Unknown
libmpi.so.40.40.5 00001503A102B41E Unknown Unknown Unknown
libmpi.so.40.40.5 00001503A106AD31 Unknown Unknown Unknown
libmpi.so.40.40.5 00001503A1058D4C Unknown Unknown Unknown
libmpi.so.40.40.5 00001503A0FEFB22 MPI_Bcast Unknown Unknown
libmpi_mpifh_Inte 00001503A5B45753 pmpi_bcast Unknown Unknown
um-atmos.exe 000000000378EC9F mpl_bcast_ 60 mpl_bcast.F90
um-atmos.exe 0000000003786532 gc_ibcast_ 88 gc_ibcast.F90
um-atmos.exe 00000000004FA750 ppxlook_mod_mp_re 232 ppxlook_mod.F90
um-atmos.exe 0000000002DDA07B stash_proc_mod_mp 229 stash_proc.F90
um-atmos.exe 000000000040CE6C um_shell_mod_mp_u 724 um_shell.F90
um-atmos.exe 0000000000409818 MAIN__ 60 um_main.F90
um-atmos.exe 00000000004097CD Unknown Unknown Unknown
libc-2.28.so 00001503A32E97E5 __libc_start_main Unknown Unknown
um-atmos.exe 00000000004096EE Unknown Unknown Unknown
--------------------------------------------------------------------------
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
forrtl: error (78): process killed (SIGTERM)
Image PC Routine Line Source
libpthread-2.28.s 000014DE66F0C990 Unknown Unknown Unknown
libc-2.28.so 000014DE66B5BB8B __sched_yield Unknown Unknown
libopen-pal.so.80 000014DE63A40B41 opal_progress Unknown Unknown
libopen-pal.so.80 000014DE63AAEBE5 ompi_sync_wait_mt Unknown Unknown
libmpi.so.40.40.5 000014DE64851921 Unknown Unknown Unknown
libmpi.so.40.40.5 000014DE6489F438 Unknown Unknown Unknown
libmpi.so.40.40.5 000014DE648DED31 Unknown Unknown Unknown
libmpi.so.40.40.5 000014DE648CCD4C Unknown Unknown Unknown
libmpi.so.40.40.5 000014DE64863B22 MPI_Bcast Unknown Unknown
libmpi_mpifh_Inte 000014DE693B9753 pmpi_bcast Unknown Unknown
um-atmos.exe 000000000378EC9F mpl_bcast_ 60 mpl_bcast.F90
um-atmos.exe 0000000003786532 gc_ibcast_ 88 gc_ibcast.F90
um-atmos.exe 00000000004FA79C ppxlook_mod_mp_re 233 ppxlook_mod.F90
um-atmos.exe 0000000002DDA07B stash_proc_mod_mp 229 stash_proc.F90
um-atmos.exe 000000000040CE6C um_shell_mod_mp_u 724 um_shell.F90
um-atmos.exe 0000000000409818 MAIN__ 60 um_main.F90
um-atmos.exe 00000000004097CD Unknown Unknown Unknown
libc-2.28.so 000014DE66B5D7E5 __libc_start_main Unknown Unknown
um-atmos.exe 00000000004096EE Unknown Unknown Unknown