Logo Search packages:      
Sourcecode: abinit version File versions  Download package

nonlop_pl.F90

!{\src2tex{textfont=tt}}
!!****f* ABINIT/nonlop_pl
!! nonlop_pl
!!
!! NAME
!! nonlop_pl
!!
!! FUNCTION
!! * Compute application of a nonlocal operator Vnl in order to get:
!!    - contracted elements (energy, forces, stresses, ...), if signs=1
!!    - a function in reciprocal space (|out> = Vnl|in>), if signs=2
!!   Operator Vnl, as the following form:
!!    $Vnl=sum_{R,lmn,l''m''n''} {|P_{Rlmn}> Enl^{R}_{lmn,l''m''n''} <P_{Rl''m''n''}|}$
!!   Operator Vnl is -- in the typical case -- the nonlocal potential.
!!   - With norm-conserving pseudopots, $Enl^{R}_{lmn,l''m''n''}$ is the
!!     Kleinmann-Bylander energy $Ekb^{R}_{ln}$.
!!   - The |P_{Rlmn}> are the projector functions.
!! * This routine uses Legendre polynomials Pl to express Vnl.
!!
!! COPYRIGHT
!! Copyright (C) 1998-2007 ABINIT group (DCA, XG, GMR, GZ, MT, FF, DRH)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt.
!!
!! INPUTS
!!  choice: chooses possible output:
!!    choice=1 => a non-local energy contribution
!!          =2 => a gradient with respect to atomic position(s)
!!          =3 => a gradient with respect to strain(s)
!!          =23=> a gradient with respect to atm. pos. and strain(s)
!!          =4 => a gradient and 2nd derivative with respect to atomic pos.
!!          =5 => a gradient with respect to k wavevector
!!          =6 => 2nd derivatives with respect to strain
!!  dimekb1,dimekb2=dimensions of ekb (see ekb)
!!  dimffnlin=second dimension of ffnlin (1+number of derivatives)
!!  dimffnlout=second dimension of ffnlout (1+number of derivatives)
!!  ekb(dimekb1,dimekb2)= (Real) Kleinman-Bylander energies (hartree)
!!                        dimekb1=lmnmax  -  dimekp2=ntypat
!!  ffnlin(npwin,dimffnlin,lmnmax,ntypat)=nonlocal form factors to be used
!!          for the application of the nonlocal operator to the |in> vector
!!  ffnlout(npwout,dimffnlout,lmnmax,ntypat)=nonlocal form factors to be used
!!          for the application of the nonlocal operator to the |out> vector
!!  gmet(3,3)=metric tensor for G vecs (in bohr**-2)
!!  gprimd(3,3)=dimensional reciprocal space primitive translations
!!   (bohr^-1)
!!  idir=direction of the - atom to be moved in the case (choice=2,signs=2),
!!                        - k point direction in the case (choice=5,signs=2)
!!                        - strain component (1:6) in the case (choice=2,signs=2) or (choice=6,signs=1)
!!  indlmn(6,i,ntypat)= array giving l,m,n,lm,ln,s for i=ln
!!  istwf_k=option parameter that describes the storage of wfs
!!  kgin(3,npwin)=integer coords of planewaves in basis sphere, for the |in> vector
!!  kgout(3,npwout)=integer coords of planewaves in basis sphere, for the |out> vector
!!  kpgin(npw,npkgin)= (k+G) components and related data, for the |in> vector
!!  kpgout(npw,nkpgout)=(k+G) components and related data, for the |out> vector
!!  kptin(3)=k point in terms of recip. translations, for the |in> vector
!!  kptout(3)=k point in terms of recip. translations, for the |out> vector
!!  lmnmax=max. number of (l,m,n) components over all types of atoms
!!  matblk=dimension of the arrays ph3din and ph3dout
!!  mgfft=maximum size of 1D FFTs
!!  mpi_enreg=informations about MPI parallelization
!!  natom=number of atoms in cell
!!  nattyp(ntypat)=number of atoms of each type
!!  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/input_variables/vargs.htm#ngfft
!!  nkpgin,nkpgout=second sizes of arrays kpgin/kpgout
!!  nloalg(5)=governs the choice of the algorithm for nonlocal operator
!!  nnlout=dimension of enlout: choice=1=>nnlout=1   choice=2=>nnlout=3*natom
!!                              choice=3=>nnlout=6   choice=4=>nnlout=6*natom
!!                              choice=5=>nnlout=1   choice=6=>nnlout=6*(3*natom+6)
!!                              choice=23=>nnlout=6+3*natom
!!  npwin=number of planewaves for given k point, for the |in> vector
!!  npwout=number of planewaves for given k point, for the |out> vector
!!  nspinor=number of spinorial components of the wavefunctions
!!  ntypat=number of types of atoms in cell
!!  phkxredin(2,natom)=phase factors exp(2 pi kptin.xred)
!!  phkxredout(2,natom)=phase factors exp(2 pi kptout.xred)
!!  ph1d(2,3*(2*mgfft+1)*natom)=1D structure factors phase information
!!  ph3din(2,npwin,matblk)=3D structure factors, for each atom and plane wave (in)
!!  ph3dout(2,npwout,matblk)=3-dim structure factors, for each atom and plane wave (out)
!!  pspso(ntypat)=spin-orbit characteristic for each atom type
!!  signs= if 1, get contracted elements (energy, forces, stress, ...)
!!         if 2, applies the non-local operator to a function in reciprocal space
!!  ucvol=unit cell volume (bohr^3)
!!  vectin(2,nspinor*npwin)=input cmplx wavefunction coefficients <G|Cnk>
!!
!! OUTPUT
!!  ==== if (signs==1) ====
!!     enlout(nnlout)= contribution of this state to the nl part
!!                     of the following properties:
!!       if choice=1 : enlout(1)               -> the energy
!!       if choice=2 : enlout(1:3*natom)       -> the forces
!!       if choice=3 : enlout(1:6)             -> the stresses
!!       if choice=23: enlout(1:6+3*natom)     -> the forces and the stresses
!!       if choice=4 : enlout(1:6*natom)       -> the frozen wf part of dynam. matrix
!!       if choice=6 : enlout(1:6*(3*natom+6)) -> the frozen wf part of elastic tensor
!!  ==== if (signs==2) ====
!!     vectout(2,nspinor*npwout)= result of the aplication of the nl operator
!!                                or one of its derivative to the input vect.:
!!       if choice=1 : Vnl |vectin>
!!       if choice=2 : dVnl/d(xred(idir,iatom) |vectin> (xred=reduced atm. pos.)
!!       if choice=3 : dVnl/d(strain(idir)) |vectin>    (symmetric strain =>idir=1...6)
!!       if choice=5 : dVnl/dk(idir) |vectin>           (k wavevector)
!!
!! NOTES
!! In the case signs=1, the array vectout is not used, nor modified
!! so that the same array as vectin can be used as a dummy argument;
!! the same is true for the pairs npwin-npwout, ffnlin-ffnlout,
!! kgin-kgout, ph3din-ph3dout, phkredin-phkxredout).
!!
!! Calculation includes contributions to grads of Etot wrt coord and
!! wrt strains for l=0,1,2,3.
!!
!! WARNINGS
!!  - Warning 1: This routine is in a transient state, during the
!!    time of the implementation of the spin-orbit coupling...
!!    In particular, the OMP parallelisation is still missing,
!!    but it matters here only when nspinor==2.
!!  - Warning 2: the order of atoms is governed by atindx
!!
!! PARENTS
!!      nonlop
!!
!! CHILDREN
!!      cont13,cont22,cont22cso,cont22so,cont24,cont3,cont33cso,cont33so,cont35
!!      contistr01,contistr03,contistr12,contstr21,contstr23,contstr25
!!      contstr25a,contstr26,ddkten,leave_new,metcon,metcon_so,metric_so,metstr
!!      opernl2,opernl3,opernl4a,opernl4b,ph1d3d,scalewf_nonlop,strconv,strsocv
!!      timab,trace2,wrtout,xcomm_init,xsum_mpi
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

subroutine nonlop_pl(choice,dimekb1,dimekb2,dimffnlin,dimffnlout,ekb,enlout,&
&                     ffnlin,ffnlout,gmet,gprimd,idir,indlmn,istwf_k,kgin,kgout,kpgin,kpgout,&
&                     kptin,kptout,lmnmax,matblk,mgfft,mpi_enreg,mpsang,mpssoang,&
&                     natom,nattyp,ngfft,nkpgin,nkpgout,nloalg,nnlout,npwin,npwout,nspinor,ntypat,&
&                     phkxredin,phkxredout,ph1d,ph3din,ph3dout,pspso,signs,&
&                     ucvol,vectin,vectout)

 use defs_basis
 use defs_datatypes

!This section has been created automatically by the script Abilint (TD). Do not modify these by hand.
#ifdef HAVE_FORTRAN_INTERFACES
 use interfaces_01manage_mpi
 use interfaces_12geometry
 use interfaces_12nlstrain
 use interfaces_13nonlocal, except_this_one => nonlop_pl
 use interfaces_lib01hidempi
#else
 use defs_xfuncmpi
#endif
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!This type is defined in defs_mpi
!The (inout) classification below is misleading; mpi_enreg is temporarily
! changed but reset to its initial condition before exiting.
!scalars
 integer,intent(in) :: choice,dimekb1,dimekb2,dimffnlin,dimffnlout,idir,istwf_k
 integer,intent(in) :: lmnmax,matblk,mgfft,mpsang,mpssoang,natom,nkpgin,nkpgout
 integer,intent(in) :: nnlout,npwin,npwout,nspinor,ntypat,signs
 real(dp),intent(in) :: ucvol
 type(MPI_type),intent(inout) :: mpi_enreg
!arrays
 integer,intent(in) :: indlmn(6,lmnmax,ntypat),kgin(3,npwin),kgout(3,npwout)
 integer,intent(in) :: nattyp(ntypat),ngfft(18),nloalg(5),pspso(ntypat)
 real(dp),intent(in) :: ekb(dimekb1,dimekb2)
 real(dp),intent(in) :: ffnlin(npwin,dimffnlin,lmnmax,ntypat)
 real(dp),intent(in) :: ffnlout(npwout,dimffnlout,lmnmax,ntypat),gmet(3,3)
 real(dp),intent(in) :: gprimd(3,3),kpgin(npwin,nkpgin),kpgout(npwout,nkpgout)
 real(dp),intent(in) :: kptin(3),kptout(3),ph1d(2,3*(2*mgfft+1)*natom)
 real(dp),intent(in) :: phkxredin(2,natom),phkxredout(2,natom)
 real(dp),intent(inout) :: ph3din(2,npwin,matblk),ph3dout(2,npwout,matblk)
 real(dp),intent(inout) :: vectin(2,nspinor*npwin)
 real(dp),intent(out) :: enlout(nnlout),vectout(2,nspinor*npwout)

!Local variables-------------------------------
!mlang is the maximum number of different angular momenta
!(mlang=4 means s,p,d,f)
! Note : in a future version, one should adjust mlang to mpsang.
!mlang2 is the maximum number of unique tensor components for a tensor
!of rank (mlang-1) with index range 1-3
!mlang3 is the maximum number of unique tensor components summed over
!all tensors of rank 0 through mlang-1.
!mlang4 is the total number of additional unique tensor components
!related to strain gradients, ranks 2 through mlang+1.
!mlang6 is the total number of additional unique tensor components
!related to strain 2nd derivaives, ranks 4 through mlang+3.
!mlang1 is the total number of certain additional unique tensor components
!related to internal strain, ranks 1 through mlang
!mlang5 is the total number of other additional unique tensor components
!related to internal strain, ranks 1 through mlang
!scalars
 integer,parameter :: mlang=4
 integer,save :: mlang1=((mlang+1)*(mlang+2)*(mlang+3))/6-1
 integer,save :: mlang2=(mlang*(mlang+1))/2
 integer,save :: mlang3=(mlang*(mlang+1)*(mlang+2))/6
 integer,save :: mlang4=((mlang+2)*(mlang+3)*(mlang+4))/6-4
 integer,save :: mlang5=((mlang+3)*(mlang+4)*(mlang+5))/6-10
 integer,save :: mlang6=((mlang+4)*(mlang+5)*(mlang+6))/6-20
 integer :: compact,ia,ia1,ia2,ia3,ia4,ia5,ierr,iest,ig,ii,ilang,ilang2,ilmn
 integer :: iln,iln0,indx,iproj,ipsang,ipssoang,ishift,ispin,ispinor,ispinp
 integer :: istr,istr1,istr2,iterm,itypat,jj,jjk,jjs,jjs1,jjs2,jjs3,jjs4,jjstr
 integer :: lpsang,mincat,mproj,mu,mumax,n1,n2,n3,ndgxdt,ndgxdtfac,nincat,nlang
 integer :: nlangso,nproj,nspinso,old_paral_level,rank,shift1,shift2,shift3
 integer :: sign,spaceComm
 real(dp) :: e2nl,e2nldd,enlk,ph12i,ph12r,ph1i,ph1r,ph2i,ph2r,ph3i,ph3r
 character(len=500) :: message
!arrays
 integer,allocatable :: indlmn_s(:,:,:),jproj(:)
 real(dp) :: amet(2,3,3,2,2),amet_lo(3,3),e2nl_tmp(6),eisnl(3),rank2(6)
 real(dp) :: rank2c(2,6),strsnl(6),strsso(6,3),strssoc(6),trace(2),tsec(2)
 real(dp),allocatable :: d2gxdis(:,:,:,:,:),d2gxdis_s(:,:,:,:)
 real(dp),allocatable :: d2gxds2(:,:,:,:,:),d2gxds2_s(:,:,:,:)
 real(dp),allocatable :: dgxdis(:,:,:,:,:),dgxdis_s(:,:,:,:),dgxds(:,:,:,:,:)
 real(dp),allocatable :: dgxds_s(:,:,:,:),dgxdsfac(:,:,:,:,:)
 real(dp),allocatable :: dgxdt(:,:,:,:,:,:),dgxdt_s(:,:,:,:,:)
 real(dp),allocatable :: dgxdtfac(:,:,:,:,:),ekb_s(:,:),gxa(:,:,:,:,:)
 real(dp),allocatable :: gxa_s(:,:,:,:),gxafac(:,:,:,:),pauli(:,:,:,:)
 real(dp),allocatable :: temp(:,:),tmpfac(:,:),vectin_s(:,:),vectout_s(:,:)
 real(dp),allocatable :: wt(:,:)

! **********************************************************************

! DEBUG
! write(6,*)' nonlop_pl: enter '
! ENDDEBUG

!Test: spin orbit not allowed for choice=5,6
 if (nspinor==2 .and. (choice==5.or.choice==6) ) then
  write(message, '(4a)' ) ch10,&
&   ' nonlop : BUG -',ch10,&
&   '  For nspinor=2, choice=5 is not yet allowed.'
  call wrtout(06,message,'PERS')
  call leave_new('PERS')
 end if

!Define dimension of work arrays.
 mincat=min(nloalg(4),maxval(nattyp))
 mproj=maxval(indlmn(3,:,:))
 allocate(temp(2,mlang4),tmpfac(2,mlang4))
 allocate(wt(mlang,mproj),jproj(mlang))
 n1=ngfft(1) ; n2=ngfft(2) ; n3=ngfft(3)
 allocate(ekb_s(mlang,mproj))
 allocate(indlmn_s(6,lmnmax,ntypat))

!Eventually compute the spin-orbit metric tensor:
 if (mpssoang>mpsang) then
  allocate(pauli(2,2,2,3))
  call metric_so(amet,gprimd,pauli)
 end if

!Allocate array gxa (contains projected scalars).
 allocate(gxa(2,mlang3,mincat,mproj,nspinor))
 if(nspinor==2)allocate(gxa_s(2,mlang3,mincat,mproj))
 allocate(gxafac(2,mlang3,mincat,mproj))
 gxa(:,:,:,:,:)=zero

!Allocate dgxdt (contains derivatives of gxa with respect to atomic
!displacements or ddk).
! If choice==2 : first-order atomic displacements
!  If signs==2, only one direction is considered
!  If signs==1, the three directions are considered
! If choice==4 and signs==1 : second-order atomic displacements,
!  the nine components are considered
! If choice==5 and signs==2 : ddk
!  component 1 -> from ffnl(:,2,...)
!  component 2 -> from ffnl(:,1,...) (too much space is booked for this
!                 case, since the number of angular momenta is smaller
!                 than mlang3, but it is easier)
 ndgxdt=0
 if(signs==2 .and. choice==2) ndgxdt=1
 if(signs==1 .and. (choice==2.or.choice==23)) ndgxdt=3
 if(choice==4) ndgxdt=9
 if(choice==5) ndgxdt=2
 allocate(dgxdt(2,ndgxdt,mlang3,mincat,mproj,nspinor))
 if(nspinor==2)allocate(dgxdt_s(2,ndgxdt,mlang3,mincat,mproj))
 dgxdt(:,:,:,:,:,:)=zero
 ndgxdtfac=0
 if(signs==2 .and. choice==2) ndgxdtfac=1
 if(choice==4) ndgxdtfac=3
 if(choice==5) ndgxdtfac=2
 allocate(dgxdtfac(2,ndgxdtfac,mlang3,mincat,mproj))

!Allocate dgxds (contains derivatives of gxa with respect to strains).
 allocate(dgxds(2,mlang4,mincat,mproj,nspinor))
 dgxds(:,:,:,:,:)=zero
 allocate(dgxdsfac(2,mlang4,mincat,mproj,nspinor))
 if(choice==6) then
  allocate(dgxdis(2,mlang1,mincat,mproj,nspinor))
  allocate(d2gxdis(2,mlang5,mincat,mproj,nspinor))
  allocate(d2gxds2(2,mlang6,mincat,mproj,nspinor))
 end if
 if(nspinor==2)allocate(dgxds_s(2,mlang4,mincat,mproj))
 if(nspinor==2 .and. choice==6) then
  allocate(dgxdis_s(2,mlang1,mincat,mproj))
  allocate(d2gxdis_s(2,mlang5,mincat,mproj))
  allocate(d2gxds2_s(2,mlang6,mincat,mproj))
 end if

!Zero out some arrays
 if(choice==2 .or. choice==4 .or. choice==5 .or. choice==6 .or. choice==23) enlout(:)=0.0d0

#if defined FC_NEC
 if(signs==2)then
  vectout(1,:)=0.0d0 ; vectout(2,:)=0.0d0
 end if
#else
 if(signs==2) vectout(:,:)=0.0d0
#endif

 if(choice==3.or.choice==23) then
  strsnl(:)=0.0d0
  if(mpssoang>mpsang) strsso(:,:)=0.0d0
 end if
 enlk=0.0d0

!In the case vectin is a spinor, split its second part.
!Also, eventually take into account the storage format of the wavefunction
!(the original vector will be restored before leaving the routine,
! except for the vectin(2,1) component with istwf_k==2,
! that should vanish)
!Treat the second spinor part first
 if(nspinor==2)then
  allocate(vectin_s(2,npwin),vectout_s(2,npwout))

! Initialize it
!$OMP PARALLEL DO PRIVATE(ig) SHARED(npwin,vectin,vectin_s)
#if defined FC_NEC
!CDIR NODEP
#endif
  do ig=1,npwin
   vectin_s(1,ig)=vectin(1,ig+npwin)
   vectin_s(2,ig)=vectin(2,ig+npwin)
  end do

!$OMP END PARALLEL DO
! Take into account the storage
  if(istwf_k/=1)then
   call scalewf_nonlop(istwf_k,mpi_enreg,npwin,1,vectin_s)
  end if
 end if ! nspinor==2

!Treat the first spinor part now
 if(istwf_k/=1)then
  call scalewf_nonlop(istwf_k,mpi_enreg,npwin,1,vectin)
 end if


!Big loop on atom types.
 ia1=1
 do itypat=1,ntypat

! Get atom loop indices for different types:
  ia2=ia1+nattyp(itypat)-1

! Cut the sum on different atoms in blocks, to allow memory saving.
! Inner summations on atoms will be done from ia3 to ia4.
! Note that the maximum range from ia3 to ia4 is mincat (maximum
! increment of atoms).
  do ia3=ia1,ia2,mincat
   ia4=min(ia2,ia3+mincat-1)
!  Give the increment of number of atoms in this subset.
   nincat=ia4-ia3+1

!  Prepare the phase factors for the atoms between ia3 and ia4 :
!  For nloalg(1)==0, they were not prepared previously,it is needed to
!  compute them again.
   if(nloalg(1)<=0)then
!   For nloalg(1)==0, it is needed to compute the phase factors.
    if(mincat>matblk)then
     write(message, '(a,a,a,a,a,a,i4,a,i4,a)' ) ch10,&
&     ' nonlop : BUG -',ch10,&
&     '  With nloalg<=0, mincat must be less than matblk.',ch10,&
&     '  Their value is ',mincat,' and ',matblk,'.'
     call wrtout(6,message,'PERS')
     call leave_new('PERS')
    end if
    call ph1d3d(ia3,ia4,kgin,kptin,matblk,natom,npwin,&
&               n1,n2,n3,phkxredin,ph1d,ph3din)
   end if

!  Here begins the different treatment for the scalar-relativistic
!  part(s) and the spin-orbit part.
!  Loop on ispinor : 1 for scalar-Relativistic, 2 for spin-orbit
   nspinso=1;if (mpssoang>mpsang) nspinso=2
   do ispinor=1,nspinso

!   Select scalar-relativistic or spin-orbit KB-energies:
    ekb_s(:,:)=zero ; wt(:,:)=zero
!   Loop over (l,n) values (loop over l,m,n and test on l,n)
    iln0=0 ; jproj(:)=0 ; nlang=0
    indlmn_s(:,:,itypat)=0
    do ilmn=1,lmnmax
     if(ispinor/=indlmn(6,ilmn,itypat))cycle
     indlmn_s(:,ilmn,itypat)=indlmn(:,ilmn,itypat)
     iln =indlmn(5,ilmn,itypat)
     if (iln>iln0) then
      iln0=iln
      ipsang=indlmn(1,ilmn,itypat)+1
!DEBUG
!     write(6,*)' nonlop : ipsang,ilmn,itypat,ispinor=',ipsang,ilmn,itypat,ispinor
!ENDDEBUG
      iproj=indlmn(3,ilmn,itypat)
!     This shift is not needed anymore
!     if (ispinor==2) ipsang=indlmn(1,ilmn,itypat)-mpsang+2
      ekb_s(ipsang,iproj)=ekb(iln,itypat)
      wt(ipsang,iproj)=4.d0*pi/ucvol*dble(2*ipsang-1)*ekb_s(ipsang,iproj)
      jproj(ipsang)=max(jproj(ipsang),iproj)
      if(iproj>0)nlang=max(nlang,ipsang)
     end if
    end do ! ilmn


!   If nlang is not 0, then some non-local part is to be applied for
!   that type of atom.
    if (nlang/=0) then
!    Operate with the non-local potential on the wavefunction, in order
!    to get projected quantities. Call different routines opernl2,
!    opernl3, opernl4x which corresponds to different writings of the
!    same numerical operations. There is still optimisation left for
!    the case istwf_k/=1 (up to a factor 2 in speed).
!    call timab(74+choice,1,tsec)
     sign=1
!    Only the first spinorial component of vectin is taken into account first
     ispin=1
     if(nloalg(1)==2 .or. nloalg(1)==-2) then
      call opernl2(choice,dgxdis,dgxds,d2gxdis,d2gxds2,dgxdt,&
&      ffnlin,gmet,gxa,ia3,idir,indlmn_s,ispinor,istwf_k,itypat,&
&      jproj,kgin,kpgin,kptin,lmnmax,matblk,mincat,mlang1,mlang3,mlang4,&
       mlang5,mlang6,mproj,ndgxdt,dimffnlin,nincat,nkpgin,nlang,nloalg,npwin,&
&      ntypat,ph3din,sign,vectin)
     else if(nloalg(1)==3 .or. nloalg(1)==-3) then
      call opernl3(choice,dgxdis,dgxds,d2gxdis,d2gxds2,dgxdt,&
&      ffnlin,gmet,gxa,ia3,idir,indlmn_s,ispinor,istwf_k,itypat,&
&      jproj,kgin,kpgin,kptin,lmnmax,matblk,mincat,mlang1,mlang3,mlang4,&
       mlang5,mlang6,mproj,ndgxdt,dimffnlin,nincat,nkpgin,nlang,nloalg,npwin,&
&      ntypat,ph3din,sign,vectin)
     else if(nloalg(1)==4 .or. nloalg(1)==-4) then
      call opernl4a(choice,dgxdis,dgxds,d2gxdis,d2gxds2,dgxdt,&
&      ffnlin,gmet,gxa,ia3,idir,indlmn_s,ispinor,istwf_k,itypat,&
&      jproj,kgin,kpgin,kptin,lmnmax,matblk,mincat,mlang1,mlang3,mlang4,&
       mlang5,mlang6,mproj,ndgxdt,dimffnlin,nincat,nkpgin,nlang,nloalg,npwin,&
&      ntypat,ph3din,vectin)
     end if
!    This duplication of the opernl calls is needed to avoid copying
!    vectin, with a detrimental effect on speed.
     if(nspinor==2)then
      ispin=2
      if(nloalg(1)==2 .or. nloalg(1)==-2) then
       call opernl2(choice,dgxdis_s,dgxds_s,d2gxdis_s,d2gxds2_s,dgxdt_s,&
&       ffnlin,gmet,gxa_s,ia3,idir,indlmn_s,ispinor,istwf_k,itypat,&
&       jproj,kgin,kpgin,kptin,lmnmax,matblk,mincat,mlang1,mlang3,mlang4,&
&       mlang5,mlang6,mproj,ndgxdt,dimffnlin,nincat,nkpgin,nlang,nloalg,npwin,&
&       ntypat,ph3din,sign,vectin_s)
      else if(nloalg(1)==3 .or. nloalg(1)==-3) then
       call opernl3(choice,dgxdis_s,dgxds_s,d2gxdis_s,d2gxds2_s,dgxdt_s,&
&       ffnlin,gmet,gxa_s,ia3,idir,indlmn_s,ispinor,istwf_k,itypat,&
&       jproj,kgin,kpgin,kptin,lmnmax,matblk,mincat,mlang1,mlang3,mlang4,&
&       mlang5,mlang6,mproj,ndgxdt,dimffnlin,nincat,nkpgin,nlang,nloalg,npwin,&
&       ntypat,ph3din,sign,vectin_s)
      else if(nloalg(1)==4 .or. nloalg(1)==-4) then
       call opernl4a(choice,dgxdis_s,dgxds_s,d2gxdis_s,d2gxds2_s,dgxdt_s,&
&       ffnlin,gmet,gxa_s,ia3,idir,indlmn_s,ispinor,istwf_k,itypat,&
&       jproj,kgin,kpgin,kptin,lmnmax,matblk,mincat,mlang1,mlang3,mlang4,&
&       mlang5,mlang6,mproj,ndgxdt,dimffnlin,nincat,nkpgin,nlang,nloalg,npwin,&
&       ntypat,ph3din,vectin_s)
      end if
      dgxds(:,:,:,:,ispin)=dgxds_s(:,:,:,:)
      dgxdt(:,:,:,:,:,ispin)=dgxdt_s(:,:,:,:,:)
      gxa(:,:,:,:,ispin)=gxa_s(:,:,:,:)
     end if
      old_paral_level=mpi_enreg%paral_level
      mpi_enreg%paral_level=3
      call xcomm_init(mpi_enreg,spaceComm)
      if(mpi_enreg%mode_para=='b') spaceComm=mpi_enreg%comm_fft
!      call timab(48,1,tsec)
      call xsum_mpi(dgxds,spaceComm,ierr)

      if (choice /=1 .and. choice /=3)then
       call xsum_mpi(dgxdt,spaceComm,ierr)
      end if
      call xsum_mpi(gxa,spaceComm,ierr)
!      call timab(48,2,tsec)
      mpi_enreg%paral_level=old_paral_level
!    XG030513 : MPIWF, at this place, one should perform the reduction
!    and spread of data of gxa, dgxdt and dgxds

!    call timab(74+choice,2,tsec)


!    Loop over spins:
     do ispin=1,nspinor

!     Perform contractions for the various tensors (d)gx?, producing the
!     contracted tensors (d)gx?fac to be passed back to opernl:
      do ia=1,nincat
       do ilang=1,nlang
        nproj=jproj(ilang)
        if(nproj/=0) then
         ilang2=(ilang*(ilang+1))/2
         do iproj=1,nproj

!         The rank of the tensor gxa equals l:
          rank=ilang-1
!         jjs gives the starting address of the relevant components
          jjs=1+((ilang-1)*ilang*(ilang+1))/6
          if (ilang>4) then
            write(message, '(a,a,a,a,i8,a)' )ch10,&
&           ' nonlop: BUG -',ch10,&
&           '  ilang must fall in range [1..4] but value is ',ilang,'.'
            call wrtout(06,message,'PERS')
            call leave_new('PERS')
          end if

!         Metric & spinorial contraction from gxa to gxafac. The treatment
!         is different for the scalar-relativistic and spin-orbit parts.
          if(ispinor==1) then
!          ------ Scalar-Relativistic ------
           temp(:,1:((rank+1)*(rank+2))/2)= &
&           gxa(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin)
           call metcon(rank,gmet,temp,tmpfac)
           gxafac(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)= &
&           wt(ilang,iproj)*tmpfac(:,1:((rank+1)*(rank+2))/2)
          else
!          ------ Spin-orbit ------
           gxafac(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)=0.d0
!          Contraction over spins:
           do ispinp=1,nspinor
!           => Imaginary part (multiplying by i, then by the Im of amet):
            temp(1,1:((rank+1)*(rank+2))/2)= &
&            -gxa(2,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispinp)
            temp(2,1:((rank+1)*(rank+2))/2)= &
&             gxa(1,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispinp)
            amet_lo(:,:)=amet(2,:,:,ispin,ispinp)
            call metcon_so(rank,gmet,amet_lo,temp,tmpfac)
            gxafac(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)= &
&            gxafac(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)+ &
&            wt(ilang,iproj)*tmpfac(:,1:((rank+1)*(rank+2))/2)
!           => Real part:
            temp(:,1:((rank+1)*(rank+2))/2)= &
&            gxa(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispinp)
            amet_lo(:,:)=amet(1,:,:,ispin,ispinp)
            call metcon_so(rank,gmet,amet_lo,temp,tmpfac)
            gxafac(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)= &
&            gxafac(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)+ &
&            wt(ilang,iproj)*tmpfac(:,1:((rank+1)*(rank+2))/2)
           end do
          end if

!         Eventual tensorial compaction/decompaction for ddk
!         perturbation:
          if(choice==5 .and. ilang>=2) then
           jjk=1+((ilang-2)*(ilang-1)*ilang)/6
           compact=-1
           temp(:,1:(rank*(rank+1))/2)= &
&              dgxdt(:,2,jjk:jjk-1+(rank*(rank+1))/2,ia,iproj,ispin)
           call ddkten(compact,idir,rank,temp,tmpfac)
           dgxdt(:,1,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin)= &
&              dgxdt(:,1,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin)&
&             +tmpfac(:,1:((rank+1)*(rank+2))/2)
           compact=1
           tmpfac(:,1:((rank+1)*(rank+2))/2)= &
&             gxafac(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)
           call ddkten(compact,idir,rank,temp,tmpfac)
           dgxdtfac(:,2,jjk:jjk-1+(rank*(rank+1))/2,ia,iproj)= &
&             temp(:,1:(rank*(rank+1))/2)
          end if

!         Section for strain perturbation
!         no spin-orbit yet

          if(choice==3 .and. signs==2) then
           istr=idir
           if(ispinor==1) then
!          ------ Scalar-Relativistic ------
!           jjstr is the starting address for dgxds and dgxdsfac
            jjstr=-3+((ilang+1)*(ilang+2)*(ilang+3))/6
!           diagonal enlk contribution
!           note sign change (12/05/02)
            if(istr>3) then
             gxafac(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)= 0.d0
            else
             gxafac(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)=&
&             -gxafac(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)
            end if
            dgxdsfac(:,jjstr:jjstr-1+((rank+3)*(rank+4))/2,ia,iproj,ispin)= &
&            0.d0
            iterm=1
            temp(:,1:((rank+1)*(rank+2))/2)= &
&            gxa(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin)
            call metstr(istr,rank,iterm,gmet,gprimd,temp,tmpfac)
            dgxdsfac(:,jjstr:jjstr-1+((rank+3)*(rank+4))/2,ia,iproj,ispin)= &
&            wt(ilang,iproj)*tmpfac(:,1:((rank+3)*(rank+4))/2)
            iterm=2
            temp(:,1:((rank+1)*(rank+2))/2)= &
&            gxa(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin)
            call metstr(istr,rank,iterm,gmet,gprimd,temp,tmpfac)
             gxafac(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)= &
&             +gxafac(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)+ &
&             wt(ilang,iproj)*tmpfac(:,1:((rank+1)*(rank+2))/2)
            iterm=3
            temp(:,1:((rank+3)*(rank+4))/2)= &
             dgxds(:,jjstr:jjstr-1+((rank+3)*(rank+4))/2,ia,iproj,ispin)
            call metstr(istr,rank,iterm,gmet,gprimd,temp,tmpfac)
             gxafac(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)= &
&             gxafac(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)+ &
&             wt(ilang,iproj)*tmpfac(:,1:((rank+1)*(rank+2))/2)
           end if
!         end section for strain perturbation
          end if

!         Eventual metric & spinorial contraction from dgxdt to dgxdtfac.
!         either for the dynamical matrix, or for the application of the
!         gradient of the operator. The treatment is different for the
!         scalar-relativistic and spin-orbit parts.
          if ((choice==2.and.signs==2).or. &
&             (choice==5.and.signs==2).or. &
&             (choice==4)) then
           mumax=ndgxdtfac;if (choice==5) mumax=1
           do mu=1,mumax
            if(ispinor==1) then
!            ------ Scalar-Relativistic ------
             temp(:,1:((rank+1)*(rank+2))/2)= &
&             dgxdt(:,mu,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin)
             call metcon(rank,gmet,temp,tmpfac)
             dgxdtfac(:,mu,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)=&
&             wt(ilang,iproj)*tmpfac(:,1:((rank+1)*(rank+2))/2)
            else
!            ------ Spin-orbit ------
             dgxdtfac(:,mu,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)=0.d0
!            Contraction over spins:
             do ispinp=1,nspinor
!             => Imaginary part (multiplying by i, then by the Im of amet):
              temp(1,1:((rank+1)*(rank+2))/2)= &
&              -dgxdt(2,mu,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispinp)
              temp(2,1:((rank+1)*(rank+2))/2)= &
&               dgxdt(1,mu,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispinp)
              amet_lo(:,:)=amet(2,:,:,ispin,ispinp)
              call metcon_so(rank,gmet,amet_lo,temp,tmpfac)
              dgxdtfac(:,mu,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)=&
&              dgxdtfac(:,mu,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)+&
&              wt(ilang,iproj)*tmpfac(:,1:((rank+1)*(rank+2))/2)
!             => Real part:
              temp(:,1:((rank+1)*(rank+2))/2)= &
&              dgxdt(:,mu,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispinp)
              amet_lo(:,:)=amet(1,:,:,ispin,ispinp)
              call metcon_so(rank,gmet,amet_lo,temp,tmpfac)
              dgxdtfac(:,mu,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)=&
&              dgxdtfac(:,mu,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj)+&
&              wt(ilang,iproj)*tmpfac(:,1:((rank+1)*(rank+2))/2)
             end do
            end if
           end do
          end if


!   ----  Accumulate the nonlocal energy.
          do ii=1,ilang2
           jj=ii-1+jjs
           enlk=enlk+(gxafac(1,jj,ia,iproj)*gxa(1,jj,ia,iproj,ispin)&
&                    +gxafac(2,jj,ia,iproj)*gxa(2,jj,ia,iproj,ispin) )
          end do

!   ----  Accumulate force contributions if requested.
!         Note that the contraction of gxa with dgxdt can be done like
!         a cartesian dot product now because the symmetrically-related
!         terms are accounted for with weights in gxa.
          if ((choice==2.or.choice==23) .and. signs==1) then
           ishift=0;if (choice==23) ishift=6
           ia5=ia+ia3-1
           do ii=1,ilang2
            jj=ii-1+jjs
            do mu=1,3
!            (includes also factor of 2 from "2*Re[stuff]")
             indx=mu+3*(ia5-1)+ishift
             enlout(indx)=enlout(indx)+two*ucvol*&
&                 ( gxafac(1,jj,ia,iproj)*dgxdt(1,mu,jj,ia,iproj,ispin)&
&                  +gxafac(2,jj,ia,iproj)*dgxdt(2,mu,jj,ia,iproj,ispin))
            end do
           end do
          end if

!   ----  Accumulate stress tensor contributions if requested.
          if ((choice==3.or.choice==23).and.signs==1) then
!          1- Compute contractions involving gxa and dgxds:
!             --- Same formula for Scalar-relativistic and Spin-orbit ---
           if (ilang==1) then
            do ii=1,6
             rank2(ii)=2.d0*&
&                (gxafac(1,1,ia,iproj)*dgxds(1,ii,ia,iproj,ispin)+ &
&                 gxafac(2,1,ia,iproj)*dgxds(2,ii,ia,iproj,ispin) )
            end do
           else if (ilang==2) then
            call cont13(gxafac(:,jjs:jjs+2,ia,iproj),&
&                       dgxds(:, 7:16,ia,iproj,ispin),rank2)
           else if (ilang==3) then
            call cont24(gxafac(:,jjs:jjs+5,ia,iproj),&
&                       dgxds(:,17:31,ia,iproj,ispin),rank2)
           else if (ilang==4) then
            call cont35(gxafac(:,jjs:jjs+9,ia,iproj),&
&                       dgxds(:,32:52,ia,iproj,ispin),rank2)
           end if
           !In all cases add rank2 term into stress tensor
           strsnl(:)=strsnl(:)-rank2(:)
!          2- Compute contractions involving gxa and gxa:
           if(ispinor==1) then
!           2a ------ Scalar-Relativistic ------
            if (ilang==2) then
             strsnl(1)=strsnl(1)-wt(ilang,iproj)*2.d0*&
&             (gxa(1,jjs  ,ia,iproj,ispin)*gxa(1,jjs  ,ia,iproj,ispin)+&
&              gxa(2,jjs  ,ia,iproj,ispin)*gxa(2,jjs  ,ia,iproj,ispin))
             strsnl(2)=strsnl(2)-wt(ilang,iproj)*2.d0*&
&             (gxa(1,jjs+1,ia,iproj,ispin)*gxa(1,jjs+1,ia,iproj,ispin)+&
&              gxa(2,jjs+1,ia,iproj,ispin)*gxa(2,jjs+1,ia,iproj,ispin))
             strsnl(3)=strsnl(3)-wt(ilang,iproj)*2.d0*&
&             (gxa(1,jjs+2,ia,iproj,ispin)*gxa(1,jjs+2,ia,iproj,ispin)+&
&              gxa(2,jjs+2,ia,iproj,ispin)*gxa(2,jjs+2,ia,iproj,ispin))
             strsnl(4)=strsnl(4)-wt(ilang,iproj)*2.d0*&
&             (gxa(1,jjs+2,ia,iproj,ispin)*gxa(1,jjs+1,ia,iproj,ispin)+&
&              gxa(2,jjs+2,ia,iproj,ispin)*gxa(2,jjs+1,ia,iproj,ispin))
             strsnl(5)=strsnl(5)-wt(ilang,iproj)*2.d0*&
&             (gxa(1,jjs+2,ia,iproj,ispin)*gxa(1,jjs  ,ia,iproj,ispin)+&
&              gxa(2,jjs+2,ia,iproj,ispin)*gxa(2,jjs  ,ia,iproj,ispin))
             strsnl(6)=strsnl(6)-wt(ilang,iproj)*2.d0*&
&             (gxa(1,jjs+1,ia,iproj,ispin)*gxa(1,jjs  ,ia,iproj,ispin)+&
&              gxa(2,jjs+1,ia,iproj,ispin)*gxa(2,jjs  ,ia,iproj,ispin))
            else if (ilang==3) then
             call trace2(gxa(:,jjs:jjs+5,ia,iproj,ispin),gmet,trace)
             call cont22(gxa(:,jjs:jjs+5,ia,iproj,ispin),gmet,rank2)
             do ii=1,6
              strsnl(ii)=strsnl(ii)+wt(ilang,iproj)*&
&              (2.d0*(trace(1)*gxa(1,jjs+ii-1,ia,iproj,ispin)+&
&              trace(2)*gxa(2,jjs+ii-1,ia,iproj,ispin))-3.d0*rank2(ii))
             end do
            else if (ilang==4) then
             call cont3(gxa(:,jjs:jjs+9,ia,iproj,ispin),gmet,rank2)
             strsnl(:)=strsnl(:)-wt(ilang,iproj)*rank2(:)
            end if
           else
!           2b ------ Spin-orbit ------
            do ispinp=1,nspinor
             if (ilang==3) then
              call cont22so(gxa(:,jjs:jjs+5,ia,iproj,ispin),&
&                           gxa(:,jjs:jjs+5,ia,iproj,ispinp),&
&                           amet(:,:,:,ispin,ispinp),rank2)
              strsnl(:)=strsnl(:)-wt(ilang,iproj)*3.d0*rank2(:)
             else if (ilang==4) then
              call cont33so(gxa(:,jjs:jjs+9,ia,iproj,ispin),&
&                           gxa(:,jjs:jjs+9,ia,iproj,ispinp),&
&                           gmet,amet(:,:,:,ispin,ispinp),rank2)
              strsnl(:)=strsnl(:)-wt(ilang,iproj)*rank2(:)
             end if
            end do
           end if
!          3- Compute contractions involving gxa and gxa due to
!             gradients of antisymmetric tensor (amet):
!             --- Only in case of Spin-orbit ---
           if(ispinor==2) then
             do ispinp=1,nspinor
!                             Be carefull: no need to compute rank2c(:,1:3) !
             if (ilang==2) then
              rank2c(1,4)=gxa(1,jjs+2,ia,iproj,ispin)*gxa(1,jjs+1,ia,iproj,ispinp)&
&                        +gxa(2,jjs+2,ia,iproj,ispin)*gxa(2,jjs+1,ia,iproj,ispinp)
              rank2c(2,4)=gxa(1,jjs+2,ia,iproj,ispin)*gxa(2,jjs+1,ia,iproj,ispinp)&
&                        -gxa(2,jjs+2,ia,iproj,ispin)*gxa(1,jjs+1,ia,iproj,ispinp)
              rank2c(1,5)=gxa(1,jjs+2,ia,iproj,ispin)*gxa(1,jjs  ,ia,iproj,ispinp)&
&                        +gxa(2,jjs+2,ia,iproj,ispin)*gxa(2,jjs  ,ia,iproj,ispinp)
              rank2c(2,5)=gxa(1,jjs+2,ia,iproj,ispin)*gxa(2,jjs  ,ia,iproj,ispinp)&
&                        -gxa(2,jjs+2,ia,iproj,ispin)*gxa(1,jjs  ,ia,iproj,ispinp)
              rank2c(1,6)=gxa(1,jjs+1,ia,iproj,ispin)*gxa(1,jjs  ,ia,iproj,ispinp)&
&                        +gxa(2,jjs+1,ia,iproj,ispin)*gxa(2,jjs  ,ia,iproj,ispinp)
              rank2c(2,6)=gxa(1,jjs+1,ia,iproj,ispin)*gxa(2,jjs  ,ia,iproj,ispinp)&
&                        -gxa(2,jjs+1,ia,iproj,ispin)*gxa(1,jjs  ,ia,iproj,ispinp)
             else if (ilang==3) then
              call cont22cso(gxa(:,jjs:jjs+5,ia,iproj,ispin),&
&                            gxa(:,jjs:jjs+5,ia,iproj,ispinp),&
&                            gmet,rank2c)
             else if (ilang==4) then
              call cont33cso(gxa(:,jjs:jjs+9,ia,iproj,ispin),&
&                            gxa(:,jjs:jjs+9,ia,iproj,ispinp),&
&                            gmet,rank2c)
             end if
             if (ilang>1) then
              do jj=1,3
               do ii=4,6
                strsso(ii,jj)=strsso(ii,jj)-2.d0*wt(ilang,iproj)*&
&                        (pauli(1,ispin,ispinp,jj)*rank2c(2,ii)+&
&                         pauli(2,ispin,ispinp,jj)*rank2c(1,ii))
               end do
              end do
             end if
            end do
           end if
!         Enf if (choice==3)
          end if

!   ----  Accumulate dynamical matrix contributions if requested.
          if (choice==4) then
           ia5=ia+ia3-1
           do ii=1,ilang2
            jj=ii-1+jjs
            do mu=1,6
!            (includes also factor of 2 from "2*Re[stuff]")
             enlout(mu+6*(ia5-1))=enlout(mu+6*(ia5-1))+ucvol*2.0d0*&
&                (gxafac(1,jj,ia,iproj)*dgxdt(1,mu+3,jj,ia,iproj,ispin)&
&                +gxafac(2,jj,ia,iproj)*dgxdt(2,mu+3,jj,ia,iproj,ispin))
            end do
            do mu=1,3
             enlout(mu+6*(ia5-1))=enlout(mu+6*(ia5-1))+ucvol*2.0d0*&
&             (dgxdtfac(1,mu,jj,ia,iproj)*dgxdt(1,mu,jj,ia,iproj,ispin)&
&             +dgxdtfac(2,mu,jj,ia,iproj)*dgxdt(2,mu,jj,ia,iproj,ispin))
            end do
            enlout(4+6*(ia5-1))=enlout(4+6*(ia5-1)) +ucvol*2.d0*&
&              (dgxdtfac(1,2,jj,ia,iproj)*dgxdt(1,3,jj,ia,iproj,ispin)&
&              +dgxdtfac(2,2,jj,ia,iproj)*dgxdt(2,3,jj,ia,iproj,ispin))
            enlout(5+6*(ia5-1))=enlout(5+6*(ia5-1)) +ucvol*2.d0*&
&              (dgxdtfac(1,1,jj,ia,iproj)*dgxdt(1,3,jj,ia,iproj,ispin)&
&              +dgxdtfac(2,1,jj,ia,iproj)*dgxdt(2,3,jj,ia,iproj,ispin))
            enlout(6+6*(ia5-1))=enlout(6+6*(ia5-1)) +ucvol*2.d0*&
&              (dgxdtfac(1,1,jj,ia,iproj)*dgxdt(1,2,jj,ia,iproj,ispin)&
&              +dgxdtfac(2,1,jj,ia,iproj)*dgxdt(2,2,jj,ia,iproj,ispin))
           end do
          end if

!   ----  Accumulate elastic tensor contributions if requested.

          if(choice==6) then
           ia5=ia+ia3-1
           jjs1=((ilang)*(ilang+1)*(ilang+2))/6
           jjs2=-3+((ilang+1)*(ilang+2)*(ilang+3))/6
           jjs3=-9+((ilang+2)*(ilang+3)*(ilang+4))/6
           jjs4=-19+((ilang+3)*(ilang+4)*(ilang+5))/6

!          Contribution for two diagonal strains (basically, the nonlocal
!           enlk)
           temp(:,1:((rank+1)*(rank+2))/2)= &
&           gxa(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin)
           call metcon(rank,gmet,temp,tmpfac)
           e2nldd=0.d0
           do ii=1,((rank+1)*(rank+2))/2
            e2nldd=e2nldd+&
&            (gxa(1,jjs-1+ii,ia,iproj,ispin)*tmpfac(1,ii)+&
&             gxa(2,jjs-1+ii,ia,iproj,ispin)*tmpfac(2,ii))
           end do

!          Terms involving one ucvol derivative (digonal strain only)
!           and one derivative of nonlocal operator
!          Loop over strain index
           do istr2=1,6

!           rank->rank+2
            iterm=1
            temp(:,1:((rank+1)*(rank+2))/2)= &
&            gxa(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin)
            call metstr(istr2,rank,iterm,gmet,gprimd,temp,tmpfac)
            e2nl_tmp(istr2)=0.d0
            do ii=1,((rank+3)*(rank+4))/2
             e2nl_tmp(istr2)=e2nl_tmp(istr2)-2.d0*&
&             (dgxds(1,jjs2-1+ii,ia,iproj,ispin)*tmpfac(1,ii)+&
&              dgxds(2,jjs2-1+ii,ia,iproj,ispin)*tmpfac(2,ii))
            end do
!           rank->rank

            iterm=2
            temp(:,1:((rank+1)*(rank+2))/2)= &
&            gxa(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin)
            call metstr(istr2,rank,iterm,gmet,gprimd,temp,tmpfac)
            do ii=1,((rank+1)*(rank+2))/2
             e2nl_tmp(istr2)=e2nl_tmp(istr2)-&
&             (gxa(1,jjs-1+ii,ia,iproj,ispin)*tmpfac(1,ii)+&
&              gxa(2,jjs-1+ii,ia,iproj,ispin)*tmpfac(2,ii))
            end do
! DEBUG
!           This and subsequent similar debug sections evaluate the
!            hermitial conjugate of the contraction immeditely above
!            and the comparison was useful for development purposes.
!           rank+2->rank
!            iterm=3
!            temp(:,1:((rank+3)*(rank+4))/2)= &
!             dgxds(:,jjs2:jjs2-1+((rank+3)*(rank+4))/2,ia,iproj,ispin)
!            call metstr(istr2,rank,iterm,gmet,gprimd,temp,tmpfac)
!            e2nl_tmp(istr2)=0.d0
!            do ii=1,((rank+1)*(rank+2))/2
!             e2nl_tmp(istr2)=e2nl_tmp(istr2)-&
!&             (gxa(1,jjs-1+ii,ia,iproj,ispin)*tmpfac(1,ii)+&
!&              gxa(2,jjs-1+ii,ia,iproj,ispin)*tmpfac(2,ii))
!            end do
! ENDDEBUG
           end do

!          Terms involving two derivatives of the nonlocal operator
!          Loop over 2nd strain index
           do istr2=1,6
!           Loop over 1st strain index, upper triangle only
            do istr1=1,6
             iest=istr1+(3*natom+6)*(istr2-1)

!            Accumulate terms corresponding to two derivatives of ucvol
!             (simply the nonlocal energy contributin) for both indices
!              corresponding to diagonal strains

             if(istr1<=3 .and. istr2<=3) then
              enlout(iest)= enlout(iest)+wt(ilang,iproj)*e2nldd
             end if

!            Accumulate terms computed above from 1st derivatives
!             when one or more indices corresponds to diagonal strain
             if(istr2<=3) then
              enlout(iest)= enlout(iest)+wt(ilang,iproj)*e2nl_tmp(istr1)
             end if
             if(istr1<=3) then
              enlout(iest)= enlout(iest)+wt(ilang,iproj)*e2nl_tmp(istr2)
             end if

!            rank->rank+4
             call contstr21(istr2,istr1,rank,gmet,gprimd,e2nl,&
&             gxa(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin),&
&             d2gxds2(:,jjs4:jjs4-1+((rank+5)*(rank+6))/2,ia,iproj,ispin))
             enlout(iest)= enlout(iest)+wt(ilang,iproj)*e2nl

! DEBUG
!            rank+4->rank
!             call contstr22(istr2,istr1,rank,gmet,gprimd,e2nl,&
!&             d2gxds2(:,jjs4:jjs4-1+((rank+5)*(rank+6))/2,ia,iproj,ispin),&
!&             gxa(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin))
!             enlout(iest)= enlout(iest)+wt(ilang,iproj)*e2nl
! ENDDEBUG

!            rank->rank+2
             call contstr23(istr2,istr1,rank,gmet,gprimd,e2nl,&
&             gxa(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin),&
&             dgxds(:,jjs2:jjs2-1+((rank+3)*(rank+4))/2,ia,iproj,ispin))
             enlout(iest)= enlout(iest)+wt(ilang,iproj)*e2nl
! DEBUG

!            rank+2->rank
!             call contstr24(istr2,istr1,rank,gmet,gprimd,e2nl,&
!&             dgxds(:,jjs2:jjs2-1+((rank+3)*(rank+4))/2,ia,iproj,ispin),&
!&             gxa(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin))
!             enlout(iest)= enlout(iest)+wt(ilang,iproj)*e2nl
! ENDDEBUG

!            rank+2->rank+2
             if(rank<=2) then
              call contstr25(istr2,istr1,rank,gmet,gprimd,e2nl,&
&              dgxds(:,jjs2:jjs2-1+((rank+3)*(rank+4))/2,ia,iproj,ispin),&
&              dgxds(:,jjs2:jjs2-1+((rank+3)*(rank+4))/2,ia,iproj,ispin))
             else
              call contstr25a(istr2,istr1,rank,gmet,gprimd,e2nl,&
&              dgxds(:,jjs2:jjs2-1+((rank+3)*(rank+4))/2,ia,iproj,ispin),&
&              dgxds(:,jjs2:jjs2-1+((rank+3)*(rank+4))/2,ia,iproj,ispin))
             end if
             enlout(iest)= enlout(iest)+wt(ilang,iproj)*e2nl

!            rank->rank
             call contstr26(istr2,istr1,rank,gmet,gprimd,e2nl,&
&             gxa(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin),&
&             gxa(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin))
             enlout(iest)= enlout(iest)+wt(ilang,iproj)*e2nl

            end do !istr1

!           Contributions to internal strain (one cartesian strain and one
!            reduced-coordinate atomic displacement derivative).
            iest=7+3*(ia5-1)+(3*natom+6)*(istr2-1)

!           rank->rank+3
            call contistr03(istr2,rank,gmet,gprimd,eisnl,&
&            gxa(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin),&
&            d2gxdis(:,jjs3:jjs3-1+((rank+4)*(rank+5))/2,ia,iproj,ispin))
            enlout(iest:iest+2)= enlout(iest:iest+2)&
&            +wt(ilang,iproj)*eisnl(1:3)

!DEBUG
!           rank+3->rank
!            call contistr30(istr2,rank,gmet,gprimd,eisnl,&
!&            d2gxdis(:,jjs3:jjs3-1+((rank+4)*(rank+5))/2,ia,iproj,ispin),&
!&            gxa(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin))
!            enlout(iest:iest+2)= enlout(iest:iest+2)&
!&            +wt(ilang,iproj)*eisnl(1:3)
!ENDDEBUG

!           rank+1->rank+2
            call contistr12(istr2,rank,gmet,gprimd,eisnl,&
&            dgxdis(:,jjs1:jjs1-1+((rank+2)*(rank+3))/2,ia,iproj,ispin),&
&            dgxds(:,jjs2:jjs2-1+((rank+3)*(rank+4))/2,ia,iproj,ispin))
           enlout(iest:iest+2)= enlout(iest:iest+2)&
&            +wt(ilang,iproj)*eisnl(1:3)

!DEBUG
!           rank+2->rank+1
!            call contistr21(istr2,rank,gmet,gprimd,eisnl,&
!&            dgxds(:,jjs2:jjs2-1+((rank+3)*(rank+4))/2,ia,iproj,ispin),&
!&            dgxdis(:,jjs1:jjs1-1+((rank+2)*(rank+3))/2,ia,iproj,ispin))
!            enlout(iest:iest+2)= enlout(iest:iest+2)&
!&            +wt(ilang,iproj)*eisnl(1:3)
!ENDDEBUG

!           rank->rank+1
            call contistr01(istr2,rank,gmet,gprimd,eisnl,&
&            gxa(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin),&
&            dgxdis(:,jjs1:jjs1-1+((rank+2)*(rank+3))/2,ia,iproj,ispin))
            enlout(iest:iest+2)= enlout(iest:iest+2)&
&            +wt(ilang,iproj)*eisnl(1:3)
!
!DEBUG
!           rank+1->rank
!            call contistr10(istr2,rank,gmet,gprimd,eisnl,&
!&            dgxdis(:,jjs1:jjs1-1+((rank+2)*(rank+3))/2,ia,iproj,ispin),&
!&            gxa(:,jjs:jjs-1+((rank+1)*(rank+2))/2,ia,iproj,ispin))
!            enlout(iest:iest+2)= enlout(iest:iest+2)&
!&            +wt(ilang,iproj)*eisnl(1:3)
!ENDDEBUG

           end do !istr2
          end if !choice==6

!        End loop over iproj:
         end do
!       End condition of existence of a reference state:
        end if

!      End loop over ilang:
       end do

!     End loop over ia:
      end do

!     Operate with the non-local potential on the projected scalars,
!     in order to get matrix element [NOT needed if only force or stress
!     or dynamical matrix is being computed].

      if (signs==2) then
       if(nloalg(1)<=0 .and. choice==2)then
!       Prepare the phase k+q factors for the atoms between ia3 and ia4:
!       they were not prepared previously for nloalg<=0 and choice==2.
        call ph1d3d(ia3,ia4,kgout,kptout,matblk,natom,npwout,&
&                   n1,n2,n3,phkxredout,ph1d,ph3dout)
       end if
!      call timab(74+choice,1,tsec)
       sign=-1
!      The duplication of the opernl calls has the purpose to avoid
!      copying vectout/vectout_s
       if(ispin==1)then
        if( nloalg(1)==2 .or. nloalg(1)==-2 ) then
         call opernl2(choice,dgxdis,dgxdsfac,d2gxdis,d2gxds2,dgxdtfac,&
&         ffnlout,gmet,gxafac,ia3,idir,indlmn_s,ispinor,istwf_k,itypat,&
&         jproj,kgout,kpgout,kptout,lmnmax,matblk,mincat,mlang1,mlang3,mlang4,&
&         mlang5,mlang6,mproj,ndgxdt,dimffnlout,nincat,nkpgout,nlang,nloalg,npwout,&
&         ntypat,ph3dout,sign,vectout)
        else if( nloalg(1)==3 .or. nloalg(1)==-3 ) then
         call opernl3(choice,dgxdis,dgxdsfac,d2gxdis,d2gxds2,dgxdtfac,&
&         ffnlout,gmet,gxafac,ia3,idir,indlmn_s,ispinor,istwf_k,itypat,&
&         jproj,kgout,kpgout,kptout,lmnmax,matblk,mincat,mlang1,mlang3,mlang4,&
&         mlang5,mlang6,mproj,ndgxdt,dimffnlout,nincat,nkpgout,nlang,nloalg,npwout,&
&         ntypat,ph3dout,sign,vectout)
        else if( nloalg(1)==4 .or. nloalg(1)==-4 ) then
         call opernl4b(choice,dgxdsfac,d2gxds2,dgxdtfac,ffnlout,gmet,gxafac,&
&         ia3,idir,indlmn_s,ispinor,istwf_k,itypat,jproj,kgout,kpgout,kptout,&
&         lmnmax,matblk,mincat,mlang3,mlang4,mlang6,mproj,ndgxdt,&
&         dimffnlout,nincat,nkpgout,nlang,nloalg,npwout,ntypat,ph3dout,vectout)
        end if
       else
#if defined FC_NEC
        vectout_s(1,:)=0.d0
        vectout_s(2,:)=0.d0
#else
        vectout_s(:,:)=0.d0
#endif
        if( nloalg(1)==2 .or. nloalg(1)==-2 ) then
         call opernl2(choice,dgxdis,dgxdsfac,d2gxdis,d2gxds2,dgxdtfac,&
&         ffnlout,gmet,gxafac,ia3,idir,indlmn_s,ispinor,istwf_k,itypat,&
&         jproj,kgout,kpgout,kptout,lmnmax,matblk,mincat,mlang1,mlang3,mlang4,&
&         mlang5,mlang6,mproj,ndgxdt,dimffnlout,nincat,nkpgout,nlang,nloalg,npwout,&
&         ntypat,ph3dout,sign,vectout_s)
        else if( nloalg(1)==3 .or. nloalg(1)==-3 ) then
         call opernl3(choice,dgxdis,dgxdsfac,d2gxdis,d2gxds2,dgxdtfac,&
&         ffnlout,gmet,gxafac,ia3,idir,indlmn_s,ispinor,istwf_k,itypat,&
&         jproj,kgout,kpgout,kptout,lmnmax,matblk,mincat,mlang1,mlang3,mlang4,&
&         mlang5,mlang6,mproj,ndgxdt,dimffnlout,nincat,nkpgout,nlang,nloalg,npwout,&
&         ntypat,ph3dout,sign,vectout_s)
        else if( nloalg(1)==4 .or. nloalg(1)==-4 ) then
         call opernl4b(choice,dgxds,d2gxds2,dgxdtfac,ffnlout,gmet,gxafac,&
&         ia3,idir,indlmn_s,ispinor,istwf_k,itypat,jproj,kgout,kpgout,kptout,&
&         lmnmax,matblk,mincat,mlang3,mlang4,mlang6,mproj,ndgxdt,&
&         dimffnlout,nincat,nkpgout,nlang,nloalg,npwout,ntypat,ph3dout,vectout_s)
        end if
        vectout(1,1+npwout:2*npwout)=&
&        vectout(1,1+npwout:2*npwout)+vectout_s(1,1:npwout)
        vectout(2,1+npwout:2*npwout)=&
&        vectout(2,1+npwout:2*npwout)+vectout_s(2,1:npwout)
       end if
!      call timab(74+choice,2,tsec)
      end if

!    End loops over spins:
     end do

!   End condition of existence of a non-local part for that type of atom:
    end if

!  End loop over ispinor:
   end do

! End sum on atom subset loop, over ia3:
  end do

!End atom type loop, over itypat:
  ia1=ia2+1
 end do


!De-allocate temporary space.
 deallocate(ekb_s,gxa,gxafac,dgxds,dgxdt,dgxdtfac,wt,jproj,temp,tmpfac)
 deallocate(dgxdsfac)
 deallocate(indlmn_s)
 if(choice==6)deallocate(dgxdis,d2gxdis,d2gxds2)
 if(nspinor==2)deallocate(dgxds_s,dgxdt_s,gxa_s)
 if(nspinor==2 .and. choice==6)deallocate(dgxdis_s,d2gxdis_s,d2gxds2_s)
 if (mpssoang>mpsang) deallocate(pauli)

!Restore the original content of the vectin array.
!Note that only the first part was modified
 if(istwf_k/=1)then
   call scalewf_nonlop(istwf_k,mpi_enreg,npwin,2,vectin)
 end if

!DEBUG
!write(6,*)'  vectout='
!do ii=1,nspinor*npwout
! write(6, '(i3,2es16.6)' )ii,vectout(1:2,ii)
!end do
!ENDDEBUG

 if(nspinor==2)then
  deallocate(vectin_s,vectout_s)
 end if

!Save non-local energy
 if((choice==1).and.size(enlout)>0) enlout(1)=enlk ! on test v4/93 size(enlout) can be zero (PMA)

!Do final manipulations to produce strain gradients for
!stress tensor, in cartesian coordinates
 if ((choice==3.or.choice==23) .and. signs==1) then
! Convert strsnl from reduced to cartesian coordinates
  call strconv(strsnl,gprimd,strsnl)
! Add diagonal part and normalize by additional ucvol
! (fill up first 6 components of enlout with these gradients--
! elements 7,8,9 of enlout are not used)
  enlout(1)=(strsnl(1)-enlk)/ucvol
  enlout(2)=(strsnl(2)-enlk)/ucvol
  enlout(3)=(strsnl(3)-enlk)/ucvol
  enlout(4)=strsnl(4)/ucvol
  enlout(5)=strsnl(5)/ucvol
  enlout(6)=strsnl(6)/ucvol
! Eventually, add spin-orbit part due to gradients of
! antisymmetric tensor
  if (mpssoang>mpsang) then
   call strsocv(strsso,gprimd,strssoc)
   enlout(:)=enlout(:)+strssoc(:)/ucvol
  end if
 end if

 if ((choice<1 .or. choice>6) .and. choice/=23 ) then
  write(message, '(a,a,a,a,i4,a)' )ch10,&
&   ' nonlop : BUG -',ch10,&
&   '  Does not presently support this choice=',choice,'.'
  call wrtout(06,message,'PERS')
  call leave_new('PERS')
 end if

!DEBUG
!write(6,*)' nonlop_pl: exit '
!ENDDEBUG

end subroutine nonlop_pl
!!***

Generated by  Doxygen 1.6.0   Back to index