!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
subroutine divergence_B(bx,by,divB_flag,err_final,time_divB)
!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

	use mod_parameters, ONLY: dkind, dx,dy,dt,xlength,nx,ny, torus,  &
				tprint, divergence_tolerance, divB_option
	use mod_arrays, only : nmx,nmy, Rmaj, xV, yV

	implicit none

	real(kind=dkind), dimension(0:nmx+1,0:nmy+1) :: bx, by, divb
	real(kind=dkind) :: phi(-1:nmx+2,-1:nmy+2)

	integer :: divB_flag
	real(kind=dkind) :: err_final

	real(kind=dkind) :: err_tol
	integer :: itemax
	integer :: max_ind(1:2)
	real(kind=dkind) :: rmleng, bmax, bij, divbmax, ermax, phiold, phinew, phimax
	real(kind=dkind) :: coe1, coe2, coe3, coe4, coe5, relax, err
	real(kind=dkind), save :: time_last = 0.d0
	real(kind=dkind) :: time_divB
	integer :: id, jd, ite, iter
	integer :: i, j

!	return

	if(divB_option==-1) then
!		call divB_FFT
!!		call divB_FFT(bx,by,time_divB)
!!		if(divB_option==0) call div4_1_b_take2
		! this is executed only once, if the option was changed inside divb_FFT
	elseif(divB_option==0) then
!		call div4_1_b
!		call div4_1_b_safe
		call div4_1_b_take2
	elseif(divB_option==1) then
		call divB_phi_const
	elseif(divB_option==2) then
		call divB_phi_2BC
	elseif(divB_option==3) then
!		call div4_1_b_take2_0_edge
		call div4_1_b_take2_0_edge(bx,by,time_divB,divB_flag,err_final)
	elseif(divB_option==4) then
!		call div4_1_b_take2_0_edge_bis
		call div4_1_b_take2_0_edge(bx,by,time_divB,divB_flag,err_final)
	endif

	contains

	!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	subroutine div4_1_b
	!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@


	!return

	!!$ calculate the divergence of B and the corrective factor
	!!$ grad_Phi in such a way that the resulting B=B+grad_Phi is 
	!!$ divergence free. 

	integer :: ipass, istart, iend, istep, jstart, jend, jstep, k

		if(divB_flag==0) then
			err_tol = 1.d-8
			itemax = 10000
		elseif(divB_flag==1) then
			err_tol = divergence_tolerance
			itemax = 2000
		endif

	1 continue
	! for repeating in case of troubles

	!!$     A dimensionless form of div*B is div*B*rmlength/Bmax
	!!$     rmleng is the length of the system. 
		  rmleng=dmax1(dx*nx,dy*ny)

	!!$    calculate the maximum of B and major radius
		bmax=1.d-10

		do i=1,nx
		do j=1,ny

			bij=dsqrt(bx(i,j)**2+by(i,j)**2)
			if(bij.ge.bmax)bmax=bij

		enddo
		enddo

	!!$     calculate div*B and its maximum with its location
		  divbmax=0


	!$omp parallel default(shared)
	!$omp do private(i,j)
	do j = 1,ny
	do i = 1,nx

		divb(i,j)=0.5*(bx(i+1,j)-bx(i-1,j))/dx+  &
			  0.5*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)


	 enddo
	 enddo
	!$omp enddo
	!$omp end parallel

	divbmax = maxval(abs(divb))
	max_ind = maxloc(abs(divb))
	id = max_ind(1); jd = max_ind(2)

	!!$ dimensionless div*B
		  divbmax=divbmax*rmleng/bmax
	!!$      print*,"div b before ",divbmax

	!!$     calculate the corrective factor grad*Phi by solving
	!!$      nabla^2 Phi=-div B using the SOR method with relaxation
	!!$      factor "relax"

	!$omp parallel default(shared)
	!$omp do private(i,j)
	do j=-1,ny+2
	do i=-1,nx+2

		phi(i,j)=0.001*dabs(bmax*xlength)+1.d-10

	enddo
	enddo
	!$omp enddo
	!$omp end parallel

	coe1=2.*(dy/dx+dx/dy)
	coe2=dy/dx
	coe3=dx/dy
	coe4=4*dx*dy
	coe5=0.d0

	relax=1.5

	do ite=1,itemax

		ermax=0.d0

		k = modulo(ite,4)

		if(k==1) then

			istart = 1
			iend = nx
			istep = 2
			jstart = 1
			jend = ny
			jstep = 1

		elseif(k==2) then

			istart = nx
			iend = 1
			istep = -2
			jstart = 1
			jend = ny
			jstep = 1

		elseif(k==3) then

			istart = nx
			iend = 1
			istep = -2
			jstart = ny
			jend = 1
			jstep = -1

		elseif(k==4) then

			istart = nx
			iend = 1
			istep = -2
			jstart = ny
			jend = 1
			jstep = -1

		endif

		do ipass = 1, 2

!			istart = ipass

!			do j=1, ny

!				do i = istart, nx, 2

			do j = jstart, jend, jstep

				do i = istart, iend, istep

					coe5=2.d0*torus*dy/Rmaj(i)

					phiold=phi(i,j)

					phinew=coe2*(phi(i+2,j)+phi(i-2,j))+  &
									coe5*(phi(i+1,j)-phi(i-1,j))+  &
									coe3*(phi(i,j+2)+phi(i,j-2))+coe4*divb(i,j)

					phi(i,j)=relax*phinew/coe1+(1.-relax)*phiold

					err=dabs((phi(i,j)-phiold)/phiold)
					iter=ite

					if(err.ge.ermax)ermax=err

				enddo

				if(istart==1) then
					istart = 2
				elseif(istart==2) then
					istart = 1
				elseif(istart==nx) then
					istart = nx-1
				elseif(istart==nx-1) then
					istart = nx
				endif

			enddo

		enddo

		!!$ calculate the maximum of Phi to determine the relative error 
		phimax=0


		phimax = maxval(abs(phi))

		!!$ maximum relative error allowed

		!!$ apply the boundary conditions of zero normal field at walls (buondary points are on the wall)

		!$omp parallel default(shared)
		!$omp do private(i)

		do i=1,nx

			phi(i,ny+1) = (4.d0*phi(i,ny)-phi(i,ny-1))/3.d0	! on the wall
			phi(i,ny+2) = phi(i,ny)

			phi(i,0) = (4.d0*phi(i,1)-phi(i,2))/3.d0	! on the wall
			phi(i,-1) = phi(i,1)

		enddo
		!$omp enddo

		!$omp do private(j)
		do j=1,ny

			phi(0,j) = (4.d0*phi(1,j)-phi(2,j))/3.d0	! on the wall
			phi(-1,j) = phi(1,j)

			phi(nx+1,j) = (4.d0*phi(nx,j)-phi(nx-1,j))/3.d0	! on the wall
			phi(nx+2,j) = phi(nx,j)

		enddo
		!$omp enddo
		!$omp end parallel

		if((ermax.le.err_tol).and.(ite>1)) exit

	enddo

	if(ite>itemax) print*,"reached itemax in div b"

	!!$     update the field

	!$omp parallel default(shared)
	!$omp do private(i,j)
	do j=1,ny
		do i=1,nx

				bx(i,j) = bx(i,j)+(phi(i+1,j)-phi(i-1,j))/(2.*dx)
				by(i,j) = by(i,j)+(phi(i,j+1)-phi(i,j-1))/(2.*dy)

		 enddo
	 enddo
	!$omp enddo
	!$omp end parallel

	!!$ calculate div*B and its maximum with its location
	divbmax=0

	do j=1,ny
	do i=1,nx

		divb(i,j)=0.5*(bx(i+1,j)-bx(i-1,j))/dx+  &
		0.5*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)

	enddo
	enddo

	divbmax = maxval(abs(divb))
	max_ind = maxloc(abs(divb))
	id = max_ind(1); jd = max_ind(2)


	!!$ dimensionless div*B
	divbmax=divbmax*rmleng/bmax
	!!$      print*,"div b after ",divbmax
	!!$      pause
	if(divB_flag==1) then

		if(divbmax.ge.0.01) print*," divb_max = ",divbmax, id, jd

		if(iter.ge.150) print*," iterations in divb =",iter

		if(divbmax>1.1d-2) then
			print*, 'divB: rewinding'
			goto 1
		endif

	endif

	err_final = divbmax


	if((time_divB >= tprint).and.(time_divB > time_last))then

	!		print*, 'divB, times = ', time_divB, tprint, time_last
	!		pause

		time_last = time_divB

		open(69, file='div_B.plt')

	! open output file and print header for Tecplot
		write(69,*)'TITLE="solution at time t=',time_divB,'"'
		write(69,*)'Variables = "X", "Y","div_B"'
		write(69,*)'ZONE I=',nx,',J=',ny,',F=Point'

		do j = 1,ny
		do i = 1,nx

			write(69,*)  xV(i), yV(j), divb(i,j)

		enddo
		enddo

		close(69)

	endif

	return

	end subroutine div4_1_b


	!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	subroutine divB_phi_const
	!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

	!return

	!!$ calculate the divergence of B and the corrective factor
	!!$ grad_Phi in such a way that the resulting B=B+grad_Phi is 
	!!$ divergence free. 

		if(divB_flag==0) then
			err_tol = 1.d-8
			itemax = 10000
		elseif(divB_flag==1) then
			err_tol = divergence_tolerance
			itemax = 2000
		endif

	1 continue
	! for repeating in case of troubles

	!!$     A dimensionless form of div*B is div*B*rmlength/Bmax
	!!$     rmleng is the length of the system. 
		  rmleng=dmax1(dx*nx,dy*ny)

	!!$    calculate the maximum of B and major radius
		bmax=1.d-10

		do j=1,ny
		do i=1,nx

			bij=dsqrt(bx(i,j)**2+by(i,j)**2)
			if(bij.ge.bmax)bmax=bij

		enddo
		enddo

	!!$     calculate div*B and its maximum with its location
		  divbmax=0


	!$omp parallel default(shared)
	!$omp do private(i,j)
		do j=1,ny
			do i=1,nx
 
				divb(i,j)=0.5d0*(bx(i+1,j)-bx(i-1,j))/dx+  &
					  0.5d0*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)

			enddo
		enddo

	!$omp enddo
	!$omp end parallel

		divbmax = maxval(divb)
		max_ind = maxloc(divb)
		id = max_ind(1); jd = max_ind(2)


	!!$ dimensionless div*B
		  divbmax=divbmax*rmleng/bmax
	!!$      print*,"div b before ",divbmax

	!!$     calculate the corrective factor grad*Phi by solving
	!!$      nabla^2 Phi=-div B using the SOR method with relaxation
	!!$      factor "relax"

	!$omp parallel default(shared)
	!$omp do private(i,j)
		do j=-1,ny+2
			do i=-1,nx+2

				phi(i,j)=0.001*dabs(bmax*xlength)+1.d-10

			enddo
		enddo
	!$omp enddo
	!$omp end parallel

		coe1=2.d0*(dy/dx+dx/dy)
		coe2=dy/dx
		coe3=dx/dy
		coe4=4d0*dx*dy
		coe5=0.d0

		relax=1.5d0


		do ite=1,itemax

			ermax=0.d0

			do j = 1,ny
			do i = 1,nx

				coe5=2.d0*torus*dy/Rmaj(i)

				phiold=phi(i,j)

				phinew=coe2*(phi(i+2,j)+phi(i-2,j))+  &
								coe5*(phi(i+1,j)-phi(i-1,j))+  &
								coe3*(phi(i,j+2)+phi(i,j-2))+coe4*divb(i,j)

				phi(i,j)=relax*phinew/coe1+(1.-relax)*phiold

				err=dabs((phi(i,j)-phiold)/phiold)
				iter=ite

				if(err.ge.ermax)ermax=err

			enddo
			enddo

	!!$    calculate the maximum of Phi to determine the relative error 
			phimax = maxval(abs(phi))

	!!$     maximum relative error allowed

	!!$     apply the boundary conditions: phi = constant on the boundary

	!$omp parallel default(shared)
	!$omp do private(i)
			do i=1,nx

				phi(i,ny+1) = 0.d0
				phi(i,0 )= 0.d0
				phi(i,ny+2) = -phi(i,ny-1)
				phi(i,-1) = -phi(i,2)

			enddo
	!$omp enddo

	!$omp do private(j)
			do j=1,ny

				phi(0,j) = 0.d0
				phi(nx+1,j) = 0.d0
				phi(-1,j) = -phi(2,j)
				phi(nx+2,j) = -phi(nx-1,j)

			enddo
	!$omp enddo
	!$omp end parallel

			if((ermax.le.err_tol).and.(ite>1))go to 1212

		enddo

		print*,"reached itemax in div b"



	 1212  continue
 
	!!$     update the field

	!$omp parallel default(shared)
	!$omp do private(i,j)
		do j=1,ny
			do i=1,nx

				bx(i,j)=bx(i,j)+(phi(i+1,j)-phi(i-1,j))/(2.*dx)
				by(i,j)=by(i,j)+(phi(i,j+1)-phi(i,j-1))/(2.*dy)

			enddo
		enddo
	!$omp enddo
	!$omp end parallel

	!!$     calculate div*B and its maximum with its location
	divbmax=0

		do j=1,ny
		do i=1,nx

			divb(i,j)=0.5d0*(bx(i+1,j)-bx(i-1,j))/dx+  &
				0.5d0*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)

			if(dabs(divb(i,j)).ge.divbmax)then
				id=i
				jd=j
				divbmax=dabs(divb(i,j))
			endif

		enddo
		enddo

	!!$ dimensionless div*B
		  divbmax=divbmax*rmleng/bmax
	!!$      print*,"div b after ",divbmax
	!!$      pause
		if(divB_flag==1) then
		  if(divbmax.ge.0.01) print*," divb_max = ",divbmax, id, jd 
		  if(iter.ge.150) print*," iterations in divb =",iter
			  if(divbmax>1.1d-2) then
	      		print*, 'divB: rewinding'
	      		goto 1
			  endif
		endif

		err_final = divbmax


		if((time_divB >= tprint).and.(time_divB > time_last))then

			time_last = time_divB

			open(69, file='div_B.plt')

		! open output file and print header for Tecplot
			write(69,*)'TITLE="solution at time t=',time_divB,'"'
			write(69,*)'Variables = "X", "Y","div_B"'
			write(69,*)'ZONE I=',nx,',J=',ny,',F=Point'

			do j = 1,ny
			do i = 1,nx

				write(69,*)  xV(i), yV(j), divb(i,j)

			enddo
			enddo

			close(69)

		endif

		return

	end subroutine divB_phi_const


	!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	subroutine divB_phi_2BC
	!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

	!return

	!!$ calculate the divergence of B and the corrective factor
	!!$ grad_Phi in such a way that the resulting B=B+grad_Phi is 
	!!$ divergence free. 

		if(divB_flag==0) then
			err_tol = 1.d-8
			itemax = 10000
		elseif(divB_flag==1) then
			err_tol = divergence_tolerance
			itemax = 2000
		endif

	1 continue
	! for repeating in case of troubles

	!!$     A dimensionless form of div*B is div*B*rmlength/Bmax
	!!$     rmleng is the length of the system. 
		  rmleng=dmax1(dx*nx,dy*ny)

	!!$    calculate the maximum of B and major radius
		bmax=1.d-10

		do j=1,ny
		do i=1,nx

			bij=dsqrt(bx(i,j)**2+by(i,j)**2)
			if(bij.ge.bmax)bmax=bij

		enddo
		enddo

	!!$     calculate div*B and its maximum with its location
		  divbmax=0


	!$omp parallel default(shared)
	!$omp do private(i,j)
		do j=1,ny
			do i=1,nx
 
				divb(i,j)=0.5d0*(bx(i+1,j)-bx(i-1,j))/dx+  &
					  0.5d0*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)

                                if(dabs(divb(i,j)).ge.divbmax)then
                                     id=i
                                    jd=j
                                    divbmax=dabs(divb(i,j))
                                endif

			enddo
		enddo

	!$omp enddo
	!$omp end parallel

		divbmax = maxval(divb)
		max_ind = maxloc(divb)
		id = max_ind(1); jd = max_ind(2)


	!!$ dimensionless div*B
		  divbmax=divbmax*rmleng/bmax
	!!$      print*,"div b before ",divbmax

	!!$     calculate the corrective factor grad*Phi by solving
	!!$      nabla^2 Phi=-div B using the SOR method with relaxation
	!!$      factor "relax"

	!$omp parallel default(shared)
	!$omp do private(i,j)
		do j=-1,ny+2
			do i=-1,nx+2

				phi(i,j)=0.001*dabs(bmax*xlength)+1.d-10

			enddo
		enddo
	!$omp enddo
	!$omp end parallel

		coe1=2.d0*(dy/dx+dx/dy)
		coe2=dy/dx
		coe3=dx/dy
		coe4=4d0*dx*dy
		coe5=0.d0

		relax=1.5d0


		do ite=1,itemax

			ermax=0.d0

			do j = 1,ny
			do i = 1,nx

				coe5=2.d0*torus*dy/Rmaj(i)

				phiold=phi(i,j)

				phinew=coe2*(phi(i+2,j)+phi(i-2,j))+  &
								coe5*(phi(i+1,j)-phi(i-1,j))+  &
								coe3*(phi(i,j+2)+phi(i,j-2))+coe4*divb(i,j)

				phi(i,j)=relax*phinew/coe1+(1.-relax)*phiold

				err=dabs((phi(i,j)-phiold)/phiold)
				iter=ite

				if(err.ge.ermax)ermax=err

			enddo
			enddo

	!!$    calculate the maximum of Phi to determine the relative error 
			phimax = maxval(abs(phi))

	!!$     maximum relative error allowed

	!!$     apply the boundary conditions: phi = constant on the boundary

	!$omp parallel default(shared)
	!$omp do private(i)
			do i=1,nx

				phi(i,ny+1) = phi(i,ny)
				phi(i,0 )= phi(i,1)
				phi(i,ny+2) = 0.d0
				phi(i,-1) = 0.d0

			enddo
	!$omp enddo

	!$omp do private(j)
			do j=1,ny

				phi(0,j) = phi(1,j)
				phi(nx+1,j) = phi(nx,j)
				phi(-1,j) = 0.d0
				phi(nx+2,j) = 0.d0

			enddo
	!$omp enddo
	!$omp end parallel

			if((ermax.le.err_tol).and.(ite>1))go to 1212

		enddo

		print*,"reached itemax in div b"



	 1212  continue
 
	!!$     update the field

	!$omp parallel default(shared)
	!$omp do private(i,j)
		do j=1,ny
			do i=1,nx

				bx(i,j)=bx(i,j)+(phi(i+1,j)-phi(i-1,j))/(2.*dx)
				by(i,j)=by(i,j)+(phi(i,j+1)-phi(i,j-1))/(2.*dy)

			enddo
		enddo
	!$omp enddo
	!$omp end parallel

	!!$     calculate div*B and its maximum with its location
	divbmax=0

		do j=1,ny
		do i=1,nx

			divb(i,j)=0.5d0*(bx(i+1,j)-bx(i-1,j))/dx+  &
				0.5d0*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)

			if(dabs(divb(i,j)).ge.divbmax)then
				id=i
				jd=j
				divbmax=dabs(divb(i,j))
			endif

		enddo
		enddo

	!!$ dimensionless div*B
		  divbmax=divbmax*rmleng/bmax
	!!$      print*,"div b after ",divbmax
	!!$      pause
		if(divB_flag==1) then
		  if(divbmax.ge.0.01) print*," divb_max = ",divbmax, id, jd 
		  if(iter.ge.150) print*," iterations in divb =",iter
			  if(divbmax>1.1d-2) then
	      		print*, 'divB: rewinding'
	      		goto 1
			  endif
		endif

		err_final = divbmax


		if((time_divB >= tprint).and.(time_divB > time_last))then

			time_last = time_divB

			open(69, file='div_B.plt')

		! open output file and print header for Tecplot
			write(69,*)'TITLE="solution at time t=',time_divB,'"'
			write(69,*)'Variables = "X", "Y","div_B"'
			write(69,*)'ZONE I=',nx,',J=',ny,',F=Point'

			do j = 1,ny
			do i = 1,nx

				write(69,*)  xV(i), yV(j), divb(i,j)

			enddo
			enddo

			close(69)

		endif

		return

	end subroutine divB_phi_2BC

	!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	subroutine div4_1_b_old
	!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@


		!return

		!!$ calculate the divergence of B and the corrective factor
		!!$ grad_Phi in such a way that the resulting B=B+grad_Phi is 
		!!$ divergence free. 

		print*, 'divB out'

		if(divB_flag==0) then
			err_tol = 1.d-8
			itemax = 10000
		elseif(divB_flag==1) then
			err_tol = divergence_tolerance
			itemax = 2000
		endif

	1 continue
	! for repeating in case of troubles

	!!$     A dimensionless form of div*B is div*B*rmlength/Bmax
	!!$     rmleng is the length of the system. 
		  rmleng=dmax1(dx*nx,dy*ny)

	!!$    calculate the maximum of B and major radius
		bmax=1.d-10

		do 10 i=1,nx
		do 10 j=1,ny

			bij=dsqrt(bx(i,j)**2+by(i,j)**2)
			if(bij.ge.bmax)bmax=bij

		10   continue

	!!$     calculate div*B and its maximum with its location
		  divbmax=0


	!$omp parallel default(shared)
	!$omp do private(i,j)
		do 20 j=1,ny
		  do 20 i=1,nx
 
			divb(i,j)=0.5*(bx(i+1,j)-bx(i-1,j))/dx+  &
				  0.5*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)


	 20   continue
	!$omp enddo
	!$omp end parallel

		divbmax = maxval(divb)
		max_ind = maxloc(divb)
		id = max_ind(1); jd = max_ind(2)

	!!$ 	if(dabs(divb(i,j)).ge.divbmax)then
	!!$ 	  id=i
	!!$ 	  jd=j
	!!$ 	  divbmax=dabs(divb(i,j))
	!!$ 	endif


	!!$ dimensionless div*B
		  divbmax=divbmax*rmleng/bmax
	!!$      print*,"div b before ",divbmax

	!!$     calculate the corrective factor grad*Phi by solving
	!!$      nabla^2 Phi=-div B using the SOR method with relaxation
	!!$      factor "relax"

	!$omp parallel default(shared)
	!$omp do private(i,j)
		 do 30 j=-1,ny+2
		  do 30 i=-1,nx+2

			phi(i,j)=0.001*dabs(bmax*xlength)+1.d-10

	 30   continue
	!$omp enddo
	!$omp end parallel

			coe1=2.*(dy/dx+dx/dy)
			coe2=dy/dx
			coe3=dx/dy
			coe4=4*dx*dy
			coe5=0.d0

			relax=1.5


		do 50 ite=1,itemax

			ermax=0.d0

			do 40 j=1,ny
			do 40 i=1,nx

				coe5=2.d0*torus*dy/Rmaj(i)

				phiold=phi(i,j)

				phinew=coe2*(phi(i+2,j)+phi(i-2,j))+  &
								coe5*(phi(i+1,j)-phi(i-1,j))+  &
								coe3*(phi(i,j+2)+phi(i,j-2))+coe4*divb(i,j)

				phi(i,j)=relax*phinew/coe1+(1.-relax)*phiold

				err=dabs((phi(i,j)-phiold)/phiold)
				iter=ite

				if(err.ge.ermax)ermax=err

		40   continue

	!!$    calculate the maximum of Phi to determine the relative error 
		phimax=0

	!!$ 	do 90 j=1,ny
	!!$ 	do 90 i=1,nx
	!!$  90	if(dabs(phi(i,j)).ge.phimax)phimax=dabs(phi(i,j))

		phimax = maxval(abs(phi))

	!!$     maximum relative error allowed

	!!$     apply the boundary conditions of zero normal field at walls

	!$omp parallel default(shared)
	!$omp do private(i)

		do 70 i=1,nx

			phi(i,ny+1)=phi(i,ny)
			phi(i,ny+2)=phi(i,ny-1)
			phi(i,-1)=phi(i,2)

	 70   phi(i,0)=phi(i,1)
	!$omp enddo

	!$omp do private(j)
		do 80 j=1,ny

			phi(0,j)=phi(1,j)
			phi(-1,j)=phi(2,j)
			phi(nx+2,j)=phi(nx-1,j)

	 80	phi(nx+1,j)=phi(nx,j)
	!$omp enddo
	!$omp end parallel

		  if((ermax.le.err_tol).and.(ite>1))go to 1212

	 50   continue

		  print*,"reached itemax in div b"



	 1212  continue
 
	!!$     update the field

	!$omp parallel default(shared)
	!$omp do private(i,j)
		do 100 j=1,ny
			do 100 i=1,nx

				bx(i,j)=bx(i,j)+(phi(i+1,j)-phi(i-1,j))/(2.*dx)
				by(i,j)=by(i,j)+(phi(i,j+1)-phi(i,j-1))/(2.*dy)

	 100  continue  
	!$omp enddo
	!$omp end parallel

	!!$     calculate div*B and its maximum with its location
	divbmax=0

		do 120 j=1,ny
		do 120 i=1,nx

			divb(i,j)=0.5*(bx(i+1,j)-bx(i-1,j))/dx+  &
			0.5*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)

			if(dabs(divb(i,j)).ge.divbmax)then
				id=i
				jd=j
				divbmax=dabs(divb(i,j))
			endif

	120   continue

	!!$ dimensionless div*B
		  divbmax=divbmax*rmleng/bmax
	!!$      print*,"div b after ",divbmax
	!!$      pause
		if(divB_flag==1) then
		  if(divbmax.ge.0.01) print*," divb_max = ",divbmax, id, jd 
		  if(iter.ge.150) print*," iterations in divb =",iter
			  if(divbmax>1.1d-2) then
	      		print*, 'divB: rewinding'
	      		goto 1
			  endif
		endif

		err_final = divbmax


		if((time_divB >= tprint).and.(time_divB > time_last))then

	!		print*, 'divB, times = ', time_divB, tprint, time_last
	!		pause

			time_last = time_divB

			open(69, file='div_B.plt')

		! open output file and print header for Tecplot
			write(69,*)'TITLE="solution at time t=',time_divB,'"'
			write(69,*)'Variables = "X", "Y","div_B"'
			write(69,*)'ZONE I=',nx,',J=',ny,',F=Point'

			do j = 1,ny
			do i = 1,nx

				write(69,*)  xV(i), yV(j), divb(i,j)

			enddo
			enddo

			close(69)

		endif

		return

	end subroutine div4_1_b_old

	!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	subroutine div4_1_b_take2
	!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

		integer :: istart, ipass

!!$		print*, 'divB in'

		!return

		!!$ calculate the divergence of B and the corrective factor
		!!$ grad_Phi in such a way that the resulting B=B+grad_Phi is 
		!!$ divergence free. 

		if(divB_flag==0) then
			err_tol = divergence_tolerance * 1.d-3
			itemax = 10000
		elseif(divB_flag==1) then
			err_tol = divergence_tolerance
			itemax = 2000
		endif

		1 continue
		! for repeating in case of troubles

		!!$     A dimensionless form of div*B is div*B*rmlength/Bmax
		!!$     rmleng is the length of the system. 
		rmleng=dmax1(dx*nx,dy*ny)

		!!$    calculate the maximum of B and major radius
		bmax=1.d-10

		do i=1,nx
		do j=1,ny

			bij=dsqrt(bx(i,j)**2+by(i,j)**2)
			if(bij.ge.bmax)bmax=bij

		enddo
		enddo !10

		!!$     calculate div*B and its maximum with its location
		divbmax=0

		!$omp parallel default(shared)
		!$omp do private(i,j)
		do j=1,ny
		do i=1,nx

			divb(i,j)=0.5*(bx(i+1,j)-bx(i-1,j))/dx+  &
				0.5*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)

			if(dabs(divb(i,j)).ge.divbmax)then
				id=i
				jd=j
				divbmax=dabs(divb(i,j))
			endif

		enddo !20
		enddo
		!$omp enddo
		!$omp end parallel

		!!$ dimensionless div*B
		divbmax=divbmax*rmleng/bmax
		!!$      print*,"div b before ",divbmax
!!$		print*, divbmax, 'divbmax before'

		!!$     calculate the corrective factor grad*Phi by solving
		!!$      nabla^2 Phi=-div B using the SOR method with relaxation
		!!$      factor "relax"

		!$omp parallel default(shared)
		!$omp do private(i,j)
		do j=-1,ny+2
		do i=-1,nx+2

			phi(i,j)=0.001*dabs(bmax*xlength)+1.d-10

		enddo !30
		enddo
		!$omp enddo
		!$omp end parallel

		coe1=2.*(dy/dx+dx/dy)
		coe2=dy/dx
		coe3=dx/dy
		coe4=4*dx*dy
		coe5=0.d0

		relax=1.5

		iteloop: do ite=1,itemax

			ermax=0.d0

			do ipass = 1,2

				istart = ipass

				do j=1,ny
					do i=istart,nx,2

						coe5=2.d0*torus*dy/Rmaj(i)

						phiold=phi(i,j)

						phinew=coe2*(phi(i+2,j)+phi(i-2,j))+  &
										coe5*(phi(i+1,j)-phi(i-1,j))+  &
										coe3*(phi(i,j+2)+phi(i,j-2))+coe4*divb(i,j)

						phi(i,j)=relax*phinew/coe1+(1.-relax)*phiold

						err=dabs((phi(i,j)-phiold)/phiold)
						iter=ite

						if(err.ge.ermax) ermax=err

					enddo
					istart = istart + (-1)**(istart+1)
				enddo

			enddo

			!!$    calculate the maximum of Phi to determine the relative error 
			phimax = maxval(abs(phi))

			!!$     maximum relative error allowed

			!!$ apply the boundary conditions of zero normal field at walls (buondary points are on the wall)

			!$omp parallel default(shared)

			!$omp do private(i)
			do i=1,nx

				phi(i,ny+1) = (4.d0*phi(i,ny)-phi(i,ny-1))/3.d0	! on the wall
				phi(i,ny+2) = phi(i,ny)

				phi(i,0) = (4.d0*phi(i,1)-phi(i,2))/3.d0	! on the wall
				phi(i,-1) = phi(i,1)

			enddo
			!$omp enddo

			!$omp do private(j)
			do j=1,ny

				phi(0,j) = (4.d0*phi(1,j)-phi(2,j))/3.d0	! on the wall
				phi(-1,j) = phi(1,j)

				phi(nx+1,j) = (4.d0*phi(nx,j)-phi(nx-1,j))/3.d0	! on the wall
				phi(nx+2,j) = phi(nx,j)

			enddo
			!$omp enddo

			!$omp end parallel

		  if((ermax.le.err_tol).and.(ite>1)) exit iteloop
!		  if((ermax.le.err_tol).and.(ite>1))go to 1212

		enddo iteloop

		if(ite>itemax) print*,"reached itemax in div b"

		!!$     update the field

		!$omp parallel default(shared)
		!$omp do private(i,j)
		do j=1,ny
		do i=1,nx

			bx(i,j)=bx(i,j)+(phi(i+1,j)-phi(i-1,j))/(2.*dx)
			by(i,j)=by(i,j)+(phi(i,j+1)-phi(i,j-1))/(2.*dy)

		enddo
		enddo !100
		!$omp enddo
		!$omp end parallel

		!!$     calculate div*B and its maximum with its location
		divbmax=0

		do j=1,ny
		do i=1,nx

			divb(i,j)=0.5*(bx(i+1,j)-bx(i-1,j))/dx+  &
				0.5*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)

			if(abs(divb(i,j))>divbmax) then
				divbmax = abs(divb(i,j))
				id = i; jd = j
			endif

		enddo
		enddo !120

	!!$ dimensionless div*B
		  divbmax=divbmax*rmleng/bmax
	!!$      print*,"div b after ",divbmax
	!!$      pause
		if(divB_flag==1) then
		  if(divbmax.ge.0.01) print*," divb_max = ",divbmax, id, jd 
		  if(iter.ge.150) print*," iterations in divb =",iter
			  if(divbmax>1.1d-2) then
	      		print*, 'divB: rewinding'
	      		goto 1
			  endif
		endif

		err_final = divbmax


		if((time_divB >= tprint).and.(time_divB > time_last))then

	!		print*, 'divB, times = ', time_divB, tprint, time_last
	!		pause

			time_last = time_divB

			open(69, file='div_B.plt')

		! open output file and print header for Tecplot
			write(69,*)'TITLE="solution at time t=',time_divB,'"'
			write(69,*)'Variables = "X", "Y","div_B"'
			write(69,*)'ZONE I=',nx,',J=',ny,',F=Point'

			do j = 1,ny
			do i = 1,nx

				write(69,*)  xV(i), yV(j), divb(i,j)

			enddo
			enddo

			close(69)

		endif

!!$		print*, 'divB out'

		return

	end subroutine div4_1_b_take2

!!$	!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
!!$	subroutine div4_1_b_take2_0_edge
!!$	!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
!!$
!!$		integer :: istart, ipass
!!$
!!$!!$		print*, 'divB in'
!!$
!!$		!return
!!$
!!$		!!$ calculate the divergence of B and the corrective factor
!!$		!!$ grad_Phi in such a way that the resulting B=B+grad_Phi is 
!!$		!!$ divergence free. 
!!$
!!$		if(divB_flag==0) then
!!$			err_tol = 1.d-8
!!$			itemax = 10000
!!$		elseif(divB_flag==1) then
!!$			err_tol = divergence_tolerance
!!$			itemax = 2000
!!$		endif
!!$
!!$		1 continue
!!$		! for repeating in case of troubles
!!$
!!$		!!$     A dimensionless form of div*B is div*B*rmlength/Bmax
!!$		!!$     rmleng is the length of the system. 
!!$		rmleng=dmax1(dx*nx,dy*ny)
!!$
!!$		!!$    calculate the maximum of B and major radius
!!$		bmax=1.d-10
!!$
!!$		do i=1,nx
!!$		do j=1,ny
!!$
!!$			bij=dsqrt(bx(i,j)**2+by(i,j)**2)
!!$			if(bij.ge.bmax)bmax=bij
!!$
!!$		enddo
!!$		enddo !10
!!$
!!$		!!$     calculate div*B and its maximum with its location
!!$		divbmax=0
!!$
!!$		!$omp parallel default(shared)
!!$		!$omp do private(i,j)
!!$		do j=1,ny
!!$		do i=1,nx
!!$
!!$			divb(i,j)=0.5*(bx(i+1,j)-bx(i-1,j))/dx+  &
!!$				0.5*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)
!!$
!!$		enddo !20
!!$		enddo
!!$		!$omp enddo
!!$		!$omp end parallel
!!$
!!$		divbmax = maxval(abs(divb))
!!$		max_ind = maxloc(abs(divb))
!!$		id = max_ind(1); jd = max_ind(2)
!!$
!!$		!!$ dimensionless div*B
!!$		divbmax=divbmax*rmleng/bmax
!!$		!!$      print*,"div b before ",divbmax
!!$!!$		print*, divbmax, 'divbmax before'
!!$
!!$		!!$     calculate the corrective factor grad*Phi by solving
!!$		!!$      nabla^2 Phi=-div B using the SOR method with relaxation
!!$		!!$      factor "relax"
!!$
!!$		!$omp parallel default(shared)
!!$		!$omp do private(i,j)
!!$		do j=-1,ny+2
!!$		do i=-1,nx+2
!!$
!!$			phi(i,j)=0.001*dabs(bmax*xlength)+1.d-10
!!$
!!$		enddo !30
!!$		enddo
!!$		!$omp enddo
!!$		!$omp end parallel
!!$
!!$		coe1=2.*(dy/dx+dx/dy)
!!$		coe2=dy/dx
!!$		coe3=dx/dy
!!$		coe4=4*dx*dy
!!$		coe5=0.d0
!!$
!!$		relax=1.5
!!$
!!$		iteloop: do ite=1,itemax
!!$
!!$			ermax=0.d0
!!$
!!$			do ipass = 1,2
!!$
!!$				istart = ipass
!!$
!!$				do j=1,ny
!!$					do i=istart,nx,2
!!$
!!$						coe5=2.d0*torus*dy/Rmaj(i)
!!$
!!$						phiold=phi(i,j)
!!$
!!$						phinew=coe2*(phi(i+2,j)+phi(i-2,j))+  &
!!$										coe5*(phi(i+1,j)-phi(i-1,j))+  &
!!$										coe3*(phi(i,j+2)+phi(i,j-2))+coe4*divb(i,j)
!!$
!!$						phi(i,j)=relax*phinew/coe1+(1.-relax)*phiold
!!$
!!$						err=dabs((phi(i,j)-phiold)/phiold)
!!$						iter=ite
!!$
!!$						if(err.ge.ermax) ermax=err
!!$
!!$					enddo
!!$					istart = istart + (-1)**(istart+1)
!!$				enddo
!!$
!!$			enddo
!!$
!!$			!!$    calculate the maximum of Phi to determine the relative error 
!!$			phimax = maxval(abs(phi))
!!$
!!$			!!$     maximum relative error allowed
!!$
!!$			!!$ apply the boundary conditions of zero normal field at walls (buondary points are on the wall)
!!$
!!$			!$omp parallel default(shared)
!!$
!!$			!$omp do private(i)
!!$			do i=1,nx
!!$
!!$				phi(i,ny+1) = 0.d0	! on the wall
!!$				phi(i,ny+2) = -phi(i,ny)
!!$
!!$				phi(i,0) = 0.d0	! on the wall
!!$				phi(i,-1) = -phi(i,1)
!!$
!!$			enddo
!!$			!$omp enddo
!!$
!!$			!$omp do private(j)
!!$			do j=1,ny
!!$
!!$				phi(0,j) = 0.d0	! on the wall
!!$				phi(-1,j) = -phi(1,j)
!!$
!!$				phi(nx+1,j) = 0.d0	! on the wall
!!$				phi(nx+2,j) = -phi(nx,j)
!!$
!!$			enddo
!!$			!$omp enddo
!!$
!!$			!$omp end parallel
!!$
!!$		  if((ermax.le.err_tol).and.(ite>1)) exit iteloop
!!$!		  if((ermax.le.err_tol).and.(ite>1))go to 1212
!!$
!!$		enddo iteloop
!!$
!!$		if(ite>itemax) print*,"reached itemax in div b"
!!$
!!$		!!$     update the field
!!$
!!$		!$omp parallel default(shared)
!!$		!$omp do private(i,j)
!!$		do j=0,ny+1
!!$		do i=0,nx+1
!!$
!!$			bx(i,j)=bx(i,j)+(phi(i+1,j)-phi(i-1,j))/(2.*dx)
!!$			by(i,j)=by(i,j)+(phi(i,j+1)-phi(i,j-1))/(2.*dy)
!!$
!!$		enddo
!!$		enddo !100
!!$		!$omp enddo
!!$		!$omp end parallel
!!$
!!$		!!$     calculate div*B and its maximum with its location
!!$		divbmax=0
!!$
!!$		do j=1,ny
!!$		do i=1,nx
!!$
!!$			divb(i,j)=0.5*(bx(i+1,j)-bx(i-1,j))/dx+  &
!!$				0.5*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)
!!$
!!$			if(abs(divb(i,j))>divbmax) then
!!$				divbmax = abs(divb(i,j))
!!$				id = i; jd = j
!!$			endif
!!$
!!$		enddo
!!$		enddo !120
!!$
!!$	!!$ dimensionless div*B
!!$		  divbmax=divbmax*rmleng/bmax
!!$	!!$      print*,"div b after ",divbmax
!!$	!!$      pause
!!$		if(divB_flag==1) then
!!$		  if(divbmax.ge.0.01) print*," divb_max = ",divbmax, id, jd 
!!$		  if(iter.ge.150) print*," iterations in divb =",iter
!!$			  if(divbmax>1.1d-2) then
!!$	      		print*, 'divB: rewinding'
!!$	      		goto 1
!!$			  endif
!!$		endif
!!$
!!$		err_final = divbmax
!!$
!!$
!!$		if((time_divB >= tprint).and.(time_divB > time_last))then
!!$
!!$	!		print*, 'divB, times = ', time_divB, tprint, time_last
!!$	!		pause
!!$
!!$			time_last = time_divB
!!$
!!$			open(69, file='div_B.plt')
!!$
!!$		! open output file and print header for Tecplot
!!$			write(69,*)'TITLE="solution at time t=',time_divB,'"'
!!$			write(69,*)'Variables = "X", "Y","div_B"'
!!$			write(69,*)'ZONE I=',nx,',J=',ny,',F=Point'
!!$
!!$			do j = 1,ny
!!$			do i = 1,nx
!!$
!!$				write(69,*)  xV(i), yV(j), divb(i,j)
!!$
!!$			enddo
!!$			enddo
!!$
!!$			close(69)
!!$
!!$		endif
!!$
!!$!!$		print*, 'divB out'
!!$
!!$		return
!!$
!!$	end subroutine div4_1_b_take2_0_edge
!!$

	!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	subroutine div4_1_b_safe
	!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@


	!return

	!!$ calculate the divergence of B and the corrective factor
	!!$ grad_Phi in such a way that the resulting B=B+grad_Phi is 
	!!$ divergence free. 

	integer :: ipass, istart, iend, istep, jstart, jend, jstep

		if(divB_flag==0) then
			err_tol = 1.d-8
			itemax = 10000
		elseif(divB_flag==1) then
			err_tol = divergence_tolerance
			itemax = 2000
		endif

	1 continue
	! for repeating in case of troubles

	!!$     A dimensionless form of div*B is div*B*rmlength/Bmax
	!!$     rmleng is the length of the system. 
		  rmleng=dmax1(dx*nx,dy*ny)

	!!$    calculate the maximum of B and major radius
		bmax=1.d-10

		do i=1,nx
		do j=1,ny

			bij=dsqrt(bx(i,j)**2+by(i,j)**2)
			if(bij.ge.bmax)bmax=bij

		enddo
		enddo

	!!$     calculate div*B and its maximum with its location
		  divbmax=0


	!$omp parallel default(shared)
	!$omp do private(i,j)
	do j = 1,ny
	do i = 1,nx

		divb(i,j)=0.5*(bx(i+1,j)-bx(i-1,j))/dx+  &
			  0.5*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)


			if(dabs(divb(i,j)).ge.divbmax)then
				id=i
				jd=j
				divbmax=dabs(divb(i,j))
			endif
	 enddo
	 enddo
	!$omp enddo
	!$omp end parallel

	!!$ dimensionless div*B
		  divbmax=divbmax*rmleng/bmax
	!!$      print*,"div b before ",divbmax

	!!$     calculate the corrective factor grad*Phi by solving
	!!$      nabla^2 Phi=-div B using the SOR method with relaxation
	!!$      factor "relax"

	!$omp parallel default(shared)
	!$omp do private(i,j)
	do j=-1,ny+2
	do i=-1,nx+2

		phi(i,j)=0.001*dabs(bmax*xlength)+1.d-10

	enddo
	enddo
	!$omp enddo
	!$omp end parallel

	coe1=2.*(dy/dx+dx/dy)
	coe2=dy/dx
	coe3=dx/dy
	coe4=4*dx*dy
	coe5=0.d0

	relax=1.5


	do ite=1,itemax

		ermax=0.d0

		do ipass = 1, 2

			istart = ipass

			do j=1, ny

				do i = istart, nx, 2

					coe5=2.d0*torus*dy/Rmaj(i)

					phiold=phi(i,j)

					phinew=coe2*(phi(i+2,j)+phi(i-2,j))+  &
									coe5*(phi(i+1,j)-phi(i-1,j))+  &
									coe3*(phi(i,j+2)+phi(i,j-2))+coe4*divb(i,j)

					phi(i,j)=relax*phinew/coe1+(1.-relax)*phiold

					err=dabs((phi(i,j)-phiold)/phiold)
					iter=ite

					if(err.ge.ermax)ermax=err

				enddo

				istart = istart + (-1)**(istart+1)	! changes 1 to 2 and 2 to 1

			enddo

		enddo

		!!$ calculate the maximum of Phi to determine the relative error 
		phimax=0


		phimax = maxval(abs(phi))

		!!$ maximum relative error allowed

		!!$ apply the boundary conditions of zero normal field at walls (buondary points are on the wall)

		!$omp parallel default(shared)
		!$omp do private(i)

		do i=1,nx

			phi(i,ny+1) = (4.d0*phi(i,ny)-phi(i,ny-1))/3.d0	! on the wall
			phi(i,ny+2) = phi(i,ny)

			phi(i,0) = (4.d0*phi(i,1)-phi(i,2))/3.d0	! on the wall
			phi(i,-1) = phi(i,1)

		enddo
		!$omp enddo

		!$omp do private(j)
		do j=1,ny

			phi(0,j) = (4.d0*phi(1,j)-phi(2,j))/3.d0	! on the wall
			phi(-1,j) = phi(1,j)

			phi(nx+1,j) = (4.d0*phi(nx,j)-phi(nx-1,j))/3.d0	! on the wall
			phi(nx+2,j) = phi(nx,j)

		enddo
		!$omp enddo
		!$omp end parallel

		if((ermax.le.err_tol).and.(ite>1)) exit

	enddo

	if(ite>itemax) print*,"reached itemax in div b"

	!!$     update the field

	!$omp parallel default(shared)
	!$omp do private(i,j)
	do j=1,ny
		do i=1,nx

				bx(i,j) = bx(i,j)+(phi(i+1,j)-phi(i-1,j))/(2.*dx)
				by(i,j) = by(i,j)+(phi(i,j+1)-phi(i,j-1))/(2.*dy)

		 enddo
	 enddo
	!$omp enddo
	!$omp end parallel

	!!$ calculate div*B and its maximum with its location
	divbmax=0

	do j=1,ny
	do i=1,nx

		divb(i,j)=0.5*(bx(i+1,j)-bx(i-1,j))/dx+  &
		0.5*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)

			if(dabs(divb(i,j)).ge.divbmax)then
				id=i
				jd=j
				divbmax=dabs(divb(i,j))
			endif
	enddo
	enddo

	!!$ dimensionless div*B
	divbmax=divbmax*rmleng/bmax
	!!$      print*,"div b after ",divbmax
	!!$      pause
	if(divB_flag==1) then

		if(divbmax.ge.0.01) print*," divb_max = ",divbmax, id, jd

		if(iter.ge.150) print*," iterations in divb =",iter

		if(divbmax>1.1d-2) then
			print*, 'divB: rewinding'
			goto 1
		endif

	endif

	err_final = divbmax


	if((time_divB >= tprint).and.(time_divB > time_last))then

	!		print*, 'divB, times = ', time_divB, tprint, time_last
	!		pause

		time_last = time_divB

		open(69, file='div_B.plt')

	! open output file and print header for Tecplot
		write(69,*)'TITLE="solution at time t=',time_divB,'"'
		write(69,*)'Variables = "X", "Y","div_B"'
		write(69,*)'ZONE I=',nx,',J=',ny,',F=Point'

		do j = 1,ny
		do i = 1,nx

			write(69,*)  xV(i), yV(j), divb(i,j)

		enddo
		enddo

		close(69)

	endif

	return

	end subroutine div4_1_b_safe

!!$	!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
!!$	subroutine divb_FFT
!!$	!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
!!$	! uses an FFT transform in Z (y) and a direct tridiagonal solver in R (x) to correct the divergence of B
!!$	! of course this will only work if ny+2 is a power of 2
!!$	! also notice that boundary points are on the wall
!!$	! the index k is used for the harmonics
!!$
!!$	use mod_parameters, only : pi, ylength
!!$
!!$	logical, save :: do_check = .true.
!!$	integer, save :: ipow = 0
!!$	real(kind=dkind), dimension(:), allocatable, save :: cos_array
!!$	real(kind=dkind), dimension(0:nmx+1, 0:nmy+1) :: divB_k	! first index is R/x, second is k
!!$	real(kind=dkind), dimension(0:nmy+1) :: temp_FT
!!$	real(kind=dkind), dimension(0:nx+1) :: a, b, c, r
!!$	real(kind=dkind) :: Lzdx, dxm1, diag
!!$	integer :: ierror
!!$	integer :: p, ntemp
!!$	integer :: k
!!$
!!$	! check if ny is a power of two (to be done only once)
!!$	if(do_check) then
!!$
!!$		ntemp = ny+2
!!$
!!$		do while(ntemp>2)
!!$
!!$			p = modulo(ntemp,2)
!!$
!!$			if(p/=0) then
!!$
!!$				! ntemp is not divisible by 2 -> ny is not a power of 2!
!!$				print*, 'error in divB_FFT: ny+2 is not a power of 2'
!!$				print*, 'divb_option changed to 0'
!!$
!!$				divb_option = 0
!!$				return
!!$
!!$			endif
!!$
!!$			ntemp = ntemp/2
!!$
!!$		enddo
!!$
!!$		do_check = .false.
!!$
!!$	endif
!!$
!!$	if(ipow==0) then
!!$
!!$		do while(2**ipow/=ny+2)
!!$			ipow = ipow+1
!!$		enddo
!!$
!!$		allocate(cos_array(0:ny+1))
!!$		call inifct(cos_array,ny+2,ierror,ipow)
!!$		if(ierror/=0) then
!!$			write(*,*) 'error in FFT divergence setup'
!!$			read(*, '() ')
!!$		endif
!!$
!!$	endif
!!$
!!$	! first, get the R-part of the divergence (due to the method, the Z-part is transformed integrating by parts)
!!$
!!$	!$omp parallel default(shared)
!!$	!$omp do private(i,j)
!!$	do j = 0,ny+1
!!$	do i = 1,nx
!!$
!!$		divb(i,j) = 0.5d0*(bx(i+1,j)-bx(i-1,j))/dx + torus*bx(i,j)/Rmaj(i)
!!$
!!$	 enddo
!!$	 enddo
!!$	!$omp enddo
!!$	!$omp end parallel
!!$
!!$	!$omp parallel default(shared)
!!$	!$omp do private(i,j,temp_FT)
!!$	do i = 1,nx
!!$
!!$		! sine transform of by term
!!$		temp_FT = by(i,:)
!!$		call sinft(temp_Ft,ny+2)
!!$
!!$		! cosine transform of bx term
!!$		call fct(divb(i,:),cos_array,ny+2,ipow)
!!$
!!$		do j = 0, ny+1
!!$			divB_k(i,j) = divb(i,j) + by(i,ny+1) - by(i,0) + temp_Ft(j)
!!$		enddo
!!$
!!$	 enddo
!!$	!$omp enddo
!!$	!$omp end parallel
!!$
!!$	! now solve the tridiagonal system for phi_k for all k's
!!$
!!$	Lzdx = ylength / dx
!!$	dxm1 = 1.d0/dx
!!$
!!$	!$omp parallel default(shared)
!!$	!$omp do private(i,j,k,a,b,c,r,diag)
!!$	do k = 0, ny+1
!!$
!!$		diag = 2.d0*ylength / dx**2 + pi**2*k**2/ylength
!!$
!!$		! build the matrix
!!$		do i = 1, nx
!!$
!!$			a(i) = -Lzdx * (dxm1 - 0.5d0/Rmaj(i))
!!$			b(i) = diag
!!$			c(i) = -Lzdx * (dxm1 + 0.5d0/Rmaj(i))
!!$
!!$			r(i) = divB_k(i,k)
!!$
!!$		enddo
!!$
!!$		 ! zero normal field BC:
!!$
!!$		 a(0) = 0d0
!!$		 b(0) = 1.d0
!!$		 c(0) = -1.d0
!!$		 r(0) = 0.d0
!!$
!!$		 a(nx+1) = -1.d0
!!$		 b(nx+1) = 1.d0
!!$		 c(nx+1) = 0.d0
!!$		 r(nx+1) = 0.d0
!!$
!!$		 ! tridiagonal system solution
!!$
!!$		call tridiag(a,b,c,r,phi(:,k),nx+2,nx+2)
!!$
!!$	enddo
!!$	!$omp enddo
!!$	!$omp end parallel
!!$
!!$	! apply inverse transform to phi
!!$
!!$	!$omp parallel default(shared)
!!$	!$omp do private(i,j)
!!$	do i = 1, nx
!!$
!!$		call ifct(phi(i,:),cos_array,ny+2,ipow)
!!$
!!$		do j = 0, ny+1
!!$			phi(i,j) = phi(i,j) * 2.d0/(ny+2.d0)
!!$		enddo
!!$
!!$	enddo
!!$	!$omp enddo
!!$	!$omp end parallel
!!$
!!$	! apply BC
!!$
!!$	if(divB_option==-1) then
!!$
!!$		do j = 1, ny
!!$
!!$			phi(0,j) = phi(1,j)
!!$			phi(nx+1,j) = phi(nx,j)
!!$
!!$		enddo
!!$
!!$	elseif(divB_option==-2) then
!!$
!!$		do j = 1, ny
!!$
!!$			phi(0,j) = (4.d0*phi(1,j)-phi(2,j))/3.d0	! on the wall
!!$			phi(nx+1,j) = (4.d0*phi(nx,j)-phi(nx-1,j))/3.d0	! on the wall
!!$
!!$		enddo
!!$
!!$	endif
!!$
!!$	!$omp parallel default(shared)
!!$	!$omp do private(i,j)
!!$	do j=1,ny
!!$		do i=1,nx
!!$
!!$				bx(i,j) = bx(i,j)+(phi(i+1,j)-phi(i-1,j))/(2.*dx)
!!$				by(i,j) = by(i,j)+(phi(i,j+1)-phi(i,j-1))/(2.*dy)
!!$
!!$		 enddo
!!$	 enddo
!!$	!$omp enddo
!!$	!$omp end parallel
!!$
!!$	!!$ calculate div*B and its maximum with its location
!!$	divbmax=0
!!$
!!$	do j=1,ny
!!$	do i=1,nx
!!$
!!$		divb(i,j)=0.5*(bx(i+1,j)-bx(i-1,j))/dx+  &
!!$		0.5*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)
!!$
!!$	enddo
!!$	enddo
!!$
!!$	open(69, file='div_B.plt')
!!$
!!$! open output file and print header for Tecplot
!!$	write(69,*)'TITLE="solution at time t=',time_divB,'"'
!!$	write(69,*)'Variables = "X", "Y","div_B"'
!!$	write(69,*)'ZONE I=',nx,',J=',ny,',F=Point'
!!$
!!$	do j = 1,ny
!!$	do i = 1,nx
!!$
!!$		write(69,*)  xV(i), yV(j), divb(i,j)
!!$
!!$	enddo
!!$	enddo
!!$
!!$	close(69)
!!$
!!$	end subroutine divb_FFT

end subroutine divergence_B


!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
subroutine div4_1_b_take2_0_edge(bx,by,time_divB,divB_flag,err_final)
!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

	use mod_parameters, ONLY: dkind, dx,dy,dt,xlength,nx,ny, torus,  &
				tprint, divergence_tolerance, divB_option
	use mod_arrays, only : nmx,nmy, Rmaj, xV, yV

	implicit none

	real(kind=dkind), dimension(0:nmx+1,0:nmy+1) :: bx, by, divb
	real(kind=dkind) :: phi(-1:nmx+2,-1:nmy+2)

	integer :: divB_flag
	real(kind=dkind) :: err_final

	real(kind=dkind) :: err_tol
	integer :: itemax
	integer :: max_ind(1:2)
	real(kind=dkind) :: rmleng, bmax, bij, divbmax, ermax, phiold, phinew, phimax
	real(kind=dkind) :: coe1, coe2, coe3, coe4, coe5, relax, err
	real(kind=dkind), save :: time_last = 0.d0
	real(kind=dkind) :: time_divB
	integer :: id, jd, ite, iter
	integer :: i, j
	integer :: istart, ipass

!!$		print*, 'divB in'

	!return

	!!$ calculate the divergence of B and the corrective factor
	!!$ grad_Phi in such a way that the resulting B=B+grad_Phi is 
	!!$ divergence free. 

	if(divB_flag==0) then
		err_tol = divergence_tolerance * 1.d-3
		itemax = 10000
	elseif(divB_flag==1) then
		err_tol = divergence_tolerance
		itemax = 2000
	endif

	1 continue
	! for repeating in case of troubles

	!!$     A dimensionless form of div*B is div*B*rmlength/Bmax
	!!$     rmleng is the length of the system. 
	rmleng=dmax1(dx*nx,dy*ny)

	!!$    calculate the maximum of B and major radius
	bmax=1.d-10

	do i=1,nx
	do j=1,ny

		bij=dsqrt(bx(i,j)**2+by(i,j)**2)
		if(bij.ge.bmax)bmax=bij

	enddo
	enddo !10

	!!$     calculate div*B and its maximum with its location
	divbmax=0

	!$omp parallel default(shared)
	!$omp do private(i,j)
	do j=1,ny
	do i=1,nx

		divb(i,j)=0.5*(bx(i+1,j)-bx(i-1,j))/dx+  &
			0.5*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)


			if(dabs(divb(i,j)).ge.divbmax)then
				id=i
				jd=j
				divbmax=dabs(divb(i,j))
			endif

	enddo !20
	enddo
	!$omp enddo
	!$omp end parallel

	!!$ dimensionless div*B
	divbmax=divbmax*rmleng/bmax
	!!$      print*,"div b before ",divbmax
!!$		print*, divbmax, 'divbmax before'

	!!$     calculate the corrective factor grad*Phi by solving
	!!$      nabla^2 Phi=-div B using the SOR method with relaxation
	!!$      factor "relax"

	!$omp parallel default(shared)
	!$omp do private(i,j)
	do j=-1,ny+2
	do i=-1,nx+2

		phi(i,j) = 1.d0

	enddo !30
	enddo
	!$omp enddo
	!$omp end parallel

	coe1=2.*(dy/dx+dx/dy)
	coe2=dy/dx
	coe3=dx/dy
	coe4=4*dx*dy
	coe5=0.d0

	relax=.25

	iteloop: do ite=1,itemax

		ermax=0.d0
		iter=ite

		do ipass = 1,2

			istart = ipass

			do j=1,ny
				do i=istart,nx,2

					coe5=2.d0*torus*dy/Rmaj(i)

					phiold=phi(i,j)

					phinew=coe2*(phi(i+2,j)+phi(i-2,j))+  &
									coe5*(phi(i+1,j)-phi(i-1,j))+  &
									coe3*(phi(i,j+2)+phi(i,j-2))+coe4*divb(i,j)

					phi(i,j)=relax*phinew/coe1+(1.-relax)*phiold

					err=dabs((phi(i,j)-phiold)/phiold)

					if(err.ge.ermax) then
						ermax=err
						id=i; jd=j
					endif

				enddo
				istart = istart + (-1)**(istart+1)
			enddo

		enddo

		!!$    calculate the maximum of Phi to determine the relative error 
		phimax = maxval(abs(phi))

		!!$     maximum relative error allowed

		!!$ apply the boundary conditions of zero normal field at walls (buondary points are on the wall)

		!$omp parallel default(shared)

		!$omp do private(i)
		do i=1,nx

			phi(i,ny+1) = 1.d0	! on the wall
			phi(i,ny+2) = 2.d0 -phi(i,ny)

			phi(i,0) = 1.d0	! on the wall
			phi(i,-1) = 2.d0 -phi(i,1)

		enddo
		!$omp enddo

		!$omp do private(j)
		do j=1,ny

			phi(0,j) = 1.d0	! on the wall
			phi(-1,j) = 2.d0 -phi(1,j)

			phi(nx+1,j) = 1.d0	! on the wall
			phi(nx+2,j) = 2.d0 -phi(nx,j)

		enddo
		!$omp enddo

		!$omp end parallel

	  if((ermax.le.err_tol).and.(ite>1)) exit iteloop
!		  if((ermax.le.err_tol).and.(ite>1))go to 1212

	enddo iteloop

	if(ite>itemax) print*,"reached itemax in div b"

	!!$     update the field

	!$omp parallel default(shared)
	!$omp do private(i,j)
	do j=0,ny+1
	do i=0,nx+1

		bx(i,j)=bx(i,j)+(phi(i+1,j)-phi(i-1,j))/(2.*dx)
		by(i,j)=by(i,j)+(phi(i,j+1)-phi(i,j-1))/(2.*dy)

	enddo
	enddo !100
	!$omp enddo
	!$omp end parallel

	!!$     calculate div*B and its maximum with its location
	divbmax=0

	do j=1,ny
	do i=1,nx

		divb(i,j)=0.5*(bx(i+1,j)-bx(i-1,j))/dx+  &
			0.5*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)

		if(abs(divb(i,j))>divbmax) then
			divbmax = abs(divb(i,j))
			id = i; jd = j
		endif

	enddo
	enddo !120

!!$ dimensionless div*B
	  divbmax=divbmax*rmleng/bmax
!!$      print*,"div b after ",divbmax
!!$      pause
	if(divB_flag==1) then
	  if(divbmax.ge.0.01) print*," divb_max = ",divbmax, id, jd 
	  if(iter.ge.150) print*," iterations in divb =",iter
		  if(divbmax>1.1d-2) then
	      	print*, 'divB: rewinding'
	      	goto 1
		  endif
	endif

	err_final = divbmax


	if((time_divB >= tprint).and.(time_divB > time_last))then

!		print*, 'divB, times = ', time_divB, tprint, time_last
!		pause

		time_last = time_divB

		open(69, file='div_B.plt')

	! open output file and print header for Tecplot
		write(69,*)'TITLE="solution at time t=',time_divB,'"'
		write(69,*)'Variables = "X", "Y","div_B"'
		write(69,*)'ZONE I=',nx,',J=',ny,',F=Point'

		do j = 1,ny
		do i = 1,nx

			write(69,*)  xV(i), yV(j), divb(i,j)

		enddo
		enddo

		close(69)

	endif

!!$		print*, 'divB out'

	return

end subroutine div4_1_b_take2_0_edge

!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
subroutine div4_1_b_take2_0_edge_bis(bx,by,time_divB,divB_flag,err_final)
!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
! this version has no normal field at the left and right boundary,
! but allows for normal field variation on top and bottom

	use mod_parameters, ONLY: dkind, dx,dy,dt,xlength,nx,ny, torus,  &
				tprint, divergence_tolerance, divB_option
	use mod_arrays, only : nmx,nmy, Rmaj, xV, yV

	implicit none

	real(kind=dkind), dimension(0:nmx+1,0:nmy+1) :: bx, by, divb
	real(kind=dkind) :: phi(-1:nmx+2,-1:nmy+2)

	integer :: divB_flag
	real(kind=dkind) :: err_final

	real(kind=dkind) :: err_tol
	integer :: itemax
	integer :: max_ind(1:2)
	real(kind=dkind) :: rmleng, bmax, bij, divbmax, ermax, phiold, phinew, phimax
	real(kind=dkind) :: coe1, coe2, coe3, coe4, coe5, relax, err
	real(kind=dkind), save :: time_last = 0.d0
	real(kind=dkind) :: time_divB
	integer :: id, jd, ite, iter
	integer :: i, j
	integer :: istart, ipass

!!$		print*, 'divB in'

	!return

	!!$ calculate the divergence of B and the corrective factor
	!!$ grad_Phi in such a way that the resulting B=B+grad_Phi is 
	!!$ divergence free. 

	if(divB_flag==0) then
		err_tol = divergence_tolerance * 1.d-3
		itemax = 10000
	elseif(divB_flag==1) then
		err_tol = divergence_tolerance
		itemax = 2000
	endif

	1 continue
	! for repeating in case of troubles

	!!$     A dimensionless form of div*B is div*B*rmlength/Bmax
	!!$     rmleng is the length of the system. 
	rmleng=dmax1(dx*nx,dy*ny)

	!!$    calculate the maximum of B and major radius
	bmax=1.d-10

	do i=1,nx
	do j=1,ny

		bij=dsqrt(bx(i,j)**2+by(i,j)**2)
		if(bij.ge.bmax)bmax=bij

	enddo
	enddo !10

	!!$     calculate div*B and its maximum with its location
	divbmax=0

	!$omp parallel default(shared)
	!$omp do private(i,j)
	do j=1,ny
	do i=1,nx

		divb(i,j)=0.5*(bx(i+1,j)-bx(i-1,j))/dx+  &
			0.5*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)

			if(dabs(divb(i,j)).ge.divbmax)then
				id=i
				jd=j
				divbmax=dabs(divb(i,j))
			endif

	enddo !20
	enddo
	!$omp enddo
	!$omp end parallel

	!!$ dimensionless div*B
	divbmax=divbmax*rmleng/bmax
	!!$      print*,"div b before ",divbmax
!!$		print*, divbmax, 'divbmax before'

	!!$     calculate the corrective factor grad*Phi by solving
	!!$      nabla^2 Phi=-div B using the SOR method with relaxation
	!!$      factor "relax"

	!$omp parallel default(shared)
	!$omp do private(i,j)
	do j=-1,ny+2
	do i=-1,nx+2

		phi(i,j) = 1.d0

	enddo !30
	enddo
	!$omp enddo
	!$omp end parallel

	coe1=2.*(dy/dx+dx/dy)
	coe2=dy/dx
	coe3=dx/dy
	coe4=4*dx*dy
	coe5=0.d0

	relax=.25

	iteloop: do ite=1,itemax

		ermax=0.d0
		iter=ite

		do ipass = 1,2

			istart = ipass

			do j=1,ny
				do i=istart,nx,2

					coe5=2.d0*torus*dy/Rmaj(i)

					phiold=phi(i,j)

					phinew=coe2*(phi(i+2,j)+phi(i-2,j))+  &
									coe5*(phi(i+1,j)-phi(i-1,j))+  &
									coe3*(phi(i,j+2)+phi(i,j-2))+coe4*divb(i,j)

					phi(i,j)=relax*phinew/coe1+(1.-relax)*phiold

					err=dabs((phi(i,j)-phiold)/phiold)

					if(err.ge.ermax) then
						ermax=err
						id=i; jd=j
					endif

				enddo
				istart = istart + (-1)**(istart+1)
			enddo

		enddo

		!!$    calculate the maximum of Phi to determine the relative error 
		phimax = maxval(abs(phi))

		!!$     maximum relative error allowed

		!!$ apply the boundary conditions of zero normal field at walls (buondary points are on the wall)

		!$omp parallel default(shared)

		!$omp do private(i)
		do i=1,nx

			phi(i,ny+1) = (4.d0*phi(i,ny)-phi(i,ny-1))/3.d0	! on the wall
			phi(i,ny+2) = phi(i,ny)

			phi(i,0) = (4.d0*phi(i,1)-phi(i,2))/3.d0	! on the wall
			phi(i,-1) = phi(i,1)

		enddo
		!$omp enddo

		!$omp do private(j)
		do j=1,ny

			phi(0,j) = 1.d0	! on the wall
			phi(-1,j) = 2.d0 -phi(1,j)

			phi(nx+1,j) = 1.d0	! on the wall
			phi(nx+2,j) = 2.d0 -phi(nx,j)

		enddo
		!$omp enddo

		!$omp end parallel

	  if((ermax.le.err_tol).and.(ite>1)) exit iteloop
!		  if((ermax.le.err_tol).and.(ite>1))go to 1212

	enddo iteloop

	if(ite>itemax) print*,"reached itemax in div b"

	!!$     update the field

	!$omp parallel default(shared)
	!$omp do private(i,j)
	do j=0,ny+1
	do i=0,nx+1

		bx(i,j)=bx(i,j)+(phi(i+1,j)-phi(i-1,j))/(2.*dx)
		by(i,j)=by(i,j)+(phi(i,j+1)-phi(i,j-1))/(2.*dy)

	enddo
	enddo !100
	!$omp enddo
	!$omp end parallel

	!!$     calculate div*B and its maximum with its location
	divbmax=0

	do j=1,ny
	do i=1,nx

		divb(i,j)=0.5*(bx(i+1,j)-bx(i-1,j))/dx+  &
			0.5*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)

		if(abs(divb(i,j))>divbmax) then
			divbmax = abs(divb(i,j))
			id = i; jd = j
		endif

	enddo
	enddo !120

!!$ dimensionless div*B
	  divbmax=divbmax*rmleng/bmax
!!$      print*,"div b after ",divbmax
!!$      pause
	if(divB_flag==1) then
	  if(divbmax.ge.0.01) print*," divb_max = ",divbmax, id, jd 
	  if(iter.ge.150) print*," iterations in divb =",iter
		  if(divbmax>1.1d-2) then
	      	print*, 'divB: rewinding'
	      	goto 1
		  endif
	endif

	err_final = divbmax


	if((time_divB >= tprint).and.(time_divB > time_last))then

!		print*, 'divB, times = ', time_divB, tprint, time_last
!		pause

		time_last = time_divB

		open(69, file='div_B.plt')

	! open output file and print header for Tecplot
		write(69,*)'TITLE="solution at time t=',time_divB,'"'
		write(69,*)'Variables = "X", "Y","div_B"'
		write(69,*)'ZONE I=',nx,',J=',ny,',F=Point'

		do j = 1,ny
		do i = 1,nx

			write(69,*)  xV(i), yV(j), divb(i,j)

		enddo
		enddo

		close(69)

	endif

!!$		print*, 'divB out'

	return

end subroutine div4_1_b_take2_0_edge_bis



!!$	!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
!!$	subroutine divb_FFT(bx,by,time_divB)
!!$	!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
!!$	! uses an FFT transform in Z (y) and a direct tridiagonal solver in R (x) to correct the divergence of B
!!$	! of course this will only work if ny+2 is a power of 2
!!$	! also notice that boundary points are on the wall
!!$	! the index k is used for the harmonics
!!$
!!$	use mod_parameters, ONLY: dkind, dx,dy,dt,xlength,nx,ny, torus,  &
!!$				tprint, divergence_tolerance, divB_option, pi, ylength
!!$	use mod_arrays, only : nmx,nmy, Rmaj, xV, yV
!!$
!!$	implicit none
!!$
!!$	real(kind=dkind) :: divBmax, time_divB
!!$
!!$	real(kind=dkind), dimension(0:nmx+1,0:nmy+1) :: bx, by, divb
!!$	real(kind=dkind) :: phi(-1:nmx+2,-1:nmy+2)
!!$
!!$	logical, save :: do_check = .true.
!!$	integer, save :: ipow = 0
!!$	real(kind=dkind), dimension(:), allocatable, save :: cos_array
!!$	real(kind=dkind), dimension(0:nmx+1, 0:nmy+1) :: divB_k	! first index is R/x, second is k
!!$	real(kind=dkind), dimension(0:nmy+1) :: temp_FT
!!$	real(kind=dkind), dimension(0:nx+1) :: a, b, c, r
!!$	real(kind=dkind) :: Lzdx, dxm1, diag
!!$	integer :: ierror
!!$	integer :: p, ntemp
!!$	integer :: i, j, k
!!$
!!$	! check if ny is a power of two (to be done only once)
!!$	if(do_check) then
!!$
!!$		ntemp = ny+2
!!$
!!$		do while(ntemp>2)
!!$
!!$			p = modulo(ntemp,2)
!!$
!!$			if(p/=0) then
!!$
!!$				! ntemp is not divisible by 2 -> ny is not a power of 2!
!!$				print*, 'error in divB_FFT: ny+2 is not a power of 2'
!!$				print*, 'divb_option changed to 0'
!!$
!!$				divb_option = 0
!!$				return
!!$
!!$			endif
!!$
!!$			ntemp = ntemp/2
!!$
!!$		enddo
!!$
!!$		do_check = .false.
!!$
!!$	endif
!!$
!!$	if(ipow==0) then
!!$
!!$		do while(2**ipow/=ny+2)
!!$			ipow = ipow+1
!!$		enddo
!!$
!!$		allocate(cos_array(0:ny+1))
!!$		call inifct(cos_array,ny+2,ierror,ipow)
!!$		if(ierror/=0) then
!!$			write(*,*) 'error in FFT divergence setup'
!!$			read(*, '() ')
!!$		endif
!!$
!!$	endif
!!$
!!$!---------------------------------------------------------
!!$
!!$!	do j=1,ny
!!$!	do i=1,nx
!!$!
!!$!		divb(i,j)=0.5*(bx(i+1,j)-bx(i-1,j))/dx+  &
!!$!		0.5*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)
!!$!
!!$!	enddo
!!$!	enddo
!!$!
!!$!	open(69, file='div_B_0.plt')
!!$!
!!$!! open output file and print header for Tecplot
!!$!	write(69,*)'TITLE="solution at time t=',time_divB,'"'
!!$!	write(69,*)'Variables = "X", "Y","div_B"'
!!$!	write(69,*)'ZONE I=',nx,',J=',ny,',F=Point'
!!$!
!!$!	do j = 1,ny
!!$!	do i = 1,nx
!!$!
!!$!		write(69,*)  xV(i), yV(j), divb(i,j)
!!$!
!!$!	enddo
!!$!	enddo
!!$!
!!$!	close(69)
!!$
!!$!-------------------------------------------------------------
!!$
!!$	! first, get the R-part of the divergence (due to the method, the Z-part is transformed integrating by parts)
!!$
!!$	!$omp parallel default(shared)
!!$	!$omp do private(i,j)
!!$	do j = 0,ny+1
!!$	do i = 1,nx
!!$
!!$		divb(i,j) = 0.5d0*(bx(i+1,j)-bx(i-1,j))/dx + torus*bx(i,j)/Rmaj(i)
!!$
!!$	 enddo
!!$	 enddo
!!$	!$omp enddo
!!$	!$omp end parallel
!!$
!!$	!$omp parallel default(shared)
!!$	!$omp do private(i,j,temp_FT)
!!$	do i = 1,nx
!!$
!!$		! sine transform of by term
!!$		temp_FT = by(i,:)
!!$		call sinft(temp_Ft,ny+2)
!!$
!!$		! cosine transform of bx term
!!$		call fct(divb(i,:),cos_array,ny+2,ipow)
!!$
!!$		do j = 0, ny+1
!!$!			divB_k(i,j) = divb(i,j) + by(i,ny+1) - by(i,0) + temp_Ft(j)
!!$			divB_k(i,j) = divb(i,j) + (by(i,ny+1) - by(i,0) + temp_Ft(j))  * 2.d0/(ny+2.d0) !/(ny+2)
!!$		enddo
!!$
!!$	 enddo
!!$	!$omp enddo
!!$	!$omp end parallel
!!$
!!$	! now solve the tridiagonal system for phi_k for all k's
!!$
!!$	Lzdx = ylength / dx
!!$	dxm1 = 1.d0/dx
!!$
!!$	!$omp parallel default(shared)
!!$	!$omp do private(i,j,k,a,b,c,r,diag)
!!$	do k = 0, ny+1
!!$
!!$		diag = 2.d0*ylength / dx**2 + pi**2*k**2/ylength
!!$		diag = 2.d0 + (pi*k*dx/ylength)**2
!!$!		diag = 2.d0 + (pi*(k+1)*dx/ylength)**2
!!$
!!$		! build the matrix
!!$		do i = 1, nx
!!$
!!$!			a(i) = -Lzdx * (dxm1 - 0.5d0/Rmaj(i))
!!$!			b(i) = diag
!!$!			c(i) = -Lzdx * (dxm1 + 0.5d0/Rmaj(i))
!!$
!!$!			r(i) = divB_k(i,k)
!!$
!!$			a(i) = -(1.d0 - 0.5d0*dx/Rmaj(i))
!!$			b(i) = diag
!!$			c(i) = -(1.d0 + 0.5d0*dx/Rmaj(i))
!!$
!!$			r(i) = - dx**2 * divB_k(i,k) !2.d0*dx**2/ylength * divB_k(i,k)
!!$
!!$		enddo
!!$
!!$!		 ! zero normal field BC:
!!$!
!!$!		 a(0) = 0d0
!!$!		 b(0) = 1.d0
!!$!		 c(0) = -1.d0
!!$!		 r(0) = 0.d0
!!$!
!!$!		 a(nx+1) = -1.d0
!!$!		 b(nx+1) = 1.d0
!!$!		 c(nx+1) = 0.d0
!!$!		 r(nx+1) = 0.d0
!!$
!!$		 ! constant phi BC:
!!$
!!$		 a(0) = 0d0
!!$		 b(0) = 1.d0
!!$		 c(0) = 0.d0
!!$		 r(0) = 0.d0
!!$
!!$		 a(nx+1) = 0.d0
!!$		 b(nx+1) = 1.d0
!!$		 c(nx+1) = 0.d0
!!$		 r(nx+1) = 0.d0
!!$
!!$		 ! tridiagonal system solution
!!$
!!$!		if(k==0) then
!!$
!!$			call sgtsv( ny+2, 1, a(1:ny+1), b, c(0:ny), r, ny+2, ierror )
!!$
!!$			do i = 0, ny+1
!!$				phi(i,k) = r(i)
!!$			enddo
!!$
!!$!		else
!!$
!!$!			call tridiag(a,b,c,r,phi(0:ny+1,k),nx+2,nx+2)
!!$
!!$!		endif
!!$
!!$	enddo
!!$	!$omp enddo
!!$	!$omp end parallel
!!$
!!$	! apply inverse transform to phi
!!$
!!$	!$omp parallel default(shared)
!!$	!$omp do private(i,j)
!!$	do i = 0, nx+1
!!$
!!$		call ifct(phi(i,:),cos_array,ny+2,ipow)
!!$
!!$		do j = 0, ny+1
!!$			phi(i,j) = phi(i,j) * 2.d0/(ny+2.d0)
!!$		enddo
!!$
!!$	enddo
!!$	!$omp enddo
!!$	!$omp end parallel
!!$
!!$	! apply BC
!!$
!!$	if(divB_option==-1) then
!!$
!!$		do j = 1, ny
!!$
!!$!			phi(0,j) = phi(1,j)
!!$!			phi(nx+1,j) = phi(nx,j)
!!$
!!$		enddo
!!$
!!$	elseif(divB_option==-2) then
!!$
!!$		do j = 1, ny
!!$
!!$			phi(0,j) = (4.d0*phi(1,j)-phi(2,j))/3.d0	! on the wall
!!$			phi(nx+1,j) = (4.d0*phi(nx,j)-phi(nx-1,j))/3.d0	! on the wall
!!$
!!$		enddo
!!$
!!$	endif
!!$
!!$	!$omp parallel default(shared)
!!$	!$omp do private(i,j)
!!$	do j=1,ny
!!$		do i=1,nx
!!$
!!$				bx(i,j) = bx(i,j)+(phi(i+1,j)-phi(i-1,j))/(2.*dx)
!!$				by(i,j) = by(i,j)+(phi(i,j+1)-phi(i,j-1))/(2.*dy)
!!$
!!$		 enddo
!!$	 enddo
!!$	!$omp enddo
!!$	!$omp end parallel
!!$
!!$	!!$ calculate div*B and its maximum with its location
!!$	divbmax=0
!!$
!!$	do j=1,ny
!!$	do i=1,nx
!!$
!!$		divb(i,j)=0.5*(bx(i+1,j)-bx(i-1,j))/dx+  &
!!$		0.5*(by(i,j+1)-by(i,j-1))/dy +torus*bx(i,j)/Rmaj(i)
!!$
!!$	enddo
!!$	enddo
!!$
!!$	open(69, file='div_B.plt')
!!$
!!$! open output file and print header for Tecplot
!!$	write(69,*)'TITLE="solution at time t=',time_divB,'"'
!!$	write(69,*)'Variables = "X", "Y","div_B"'
!!$	write(69,*)'ZONE I=',nx,',J=',ny,',F=Point'
!!$
!!$	do j = 1,ny
!!$	do i = 1,nx
!!$
!!$		write(69,*)  xV(i), yV(j), divb(i,j)
!!$
!!$	enddo
!!$	enddo
!!$
!!$	close(69)
!!$
!!$	end subroutine divb_FFT
!!$

!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
subroutine sinft(y,n)
!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
! sine discrete FFT
	use mod_parameters, only : dkind

	implicit none

	integer :: n
	real(kind=dkind) :: y(n)
	real(kind=dkind) :: wr, wi, wpr, wpi, wtemp, theta, y1, y2, sum
	integer :: j, m

	theta=3.14159265358979d0/dble(n)
	wr=1.0d0
	wi=0.0d0
	wpr=-2.0d0*dsin(0.5d0*theta)**2
	wpi=dsin(theta)
	y(1)=0.0
	m=n/2

	do j = 1,m

		wtemp=wr
		wr=wr*wpr-wi*wpi+wr
		wi=wi*wpr+wtemp*wpi+wi
		y1=wi*(y(j+1)+y(n-j+1))
		y2=0.5*(y(j+1)-y(n-j+1))
		y(j+1)=y1+y2
		y(n-j+1)=y1-y2

	enddo	

!	call realft(y,m,+1)
	call realft_boh(y,m,+1)

	sum=0.0
	y(1)=0.5*y(1)
	y(2)=0.0

	do j=1,n-1,2

		sum=sum+y(j)
		y(j)=y(j+1)
		y(j+1)=sum

	enddo

	return

end subroutine sinft


!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
subroutine cosft(y,n)
!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
! cosine discrete FFT

	use mod_parameters, only : dkind, pi

	implicit none

	integer n
	real(kind=dkind) :: wr,wi,wpr,wpi,wtemp,theta
	real(kind=dkind) :: y(n)
	real(kind=dkind) :: sum, y1, y2, even, odd
	real(kind=dkind) :: enf0, sumo, sume
	integer :: i, j, m

	theta = pi/(n*1.d0)

	wr=1.0d0
	wi=0.0d0
	wpr=-2.0d0*dsin(0.5d0*theta)**2
	wpi=dsin(theta)
	sum = 0.5d0*(y(1)-y(n+1))
	y(1)= 0.5d0*(y(1)+y(n+1))

	do j=1,n/2-1
		wtemp=wr
		wr=wr*wpr-wi*wpi+wr
		wi=wi*wpr+wtemp*wpi+wi
		y1=0.5*(y(j+1)+y(n-j+1))
		y2=(y(j+1)-y(n-j+1))
		y(j+1)=y1-wi*y2
		y(n-j+1)=y1+wi*y2
		sum=sum+wr*y2
	enddo

	call realft(y,n,+1)

	y(n+1) = y(2)
	y(2)=sum

	do j=4,n,2
		sum=sum+y(j)
		y(j)=sum
	enddo

	return

end subroutine cosft


!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
subroutine cosft_stag(y,n,isign)
!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
! cosine discrete FFT, staggered version

	use mod_parameters, only : dkind, pi

	implicit none

	integer n, isign
	real(kind=dkind) :: wr,wi,wpr,wpi,wtemp,theta
	real(kind=dkind) :: y(n)
	real(kind=dkind) :: sum, y1, y2, even, odd
	real(kind=dkind) :: enf0, sumo, sume
	integer :: i, j, m

	theta = pi/(n*1.d0)

	wr=1.0d0
	wi=0.0d0
	wpr=-2.0d0*dsin(0.5d0*theta)**2
	wpi=dsin(theta)
	sum=y(1)

	m=n/2

	do j=1,m
		wtemp=wr
		wr=wr*wpr-wi*wpi+wr
		wi=wi*wpr+wtemp*wpi+wi
		y1=0.5*(y(j+1)+y(n-j+1))
		y2=(y(j+1)-y(n-j+1))
		y(j+1)=y1-wi*y2
		y(n-j+1)=y1+wi*y2
		sum=sum+wr*y2
	enddo

	call realft(y,m,+1)
	y(2)=sum

	do j=4,n,2
		sum=sum+y(j)
		y(j)=sum
	enddo

	if (isign==-1) then

		even=y(1)
		odd=y(2)

		do i=3,n-1,2
			even=even+y(i)
			odd=odd+y(i+1)
		enddo

		enf0=2.0*(even-odd)
		sumo=y(1)-enf0
		sume=(2.0*odd/float(n))-sumo
		y(1)=0.5*enf0
		y(2)=y(2)-sume

		do i=3,n-1,2
			y(i)=y(i)-sumo
			y(i+1)=y(i+1)-sume
		enddo

	endif

	return

end subroutine cosft_stag

!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
subroutine realft(data_in,n,isign)
!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

	use mod_parameters, only : dkind, pi

	implicit none

	integer :: n, isign
	real(kind=dkind) :: wr,wi,wpr,wpi,wtemp,theta
	real(kind=dkind) :: c1, c2
!	real(kind=dkind), dimension(*) :: data_in
	real(kind=dkind), dimension(2*n) :: data_in !check!!
	real(kind=dkind) :: h1r, h1i, h2r, h2i
	integer :: n2p3, i1, i2, i3, i4
	integer :: i


	theta=pi/(n*0.5d0)

	c1=0.5d0

	if (isign==1) then
		c2=-0.5d0
		call four1(data_in,n/2,+1)
	else
		c2=0.5d0
		theta=-theta
	endif

	wpr=-2.0d0*dsin(0.5d0*theta)**2
	wpi=dsin(theta)
	wr = 1.d0+wpr
	wi = wpi

	n2p3=2*n+3

	do i = 2,n/4

		i1=2*i-1
		i2=i1+1
		i3=n2p3-i2
		i4=i3+1

		h1r=c1*(data_in(i1)+data_in(i3))
		h1i=c1*(data_in(i2)-data_in(i4))
		h2r=-c2*(data_in(i2)+data_in(i4))
		h2i=c2*(data_in(i1)-data_in(i3))

		data_in(i1)=h1r+wr*h2r-wi*h2i
		data_in(i2)=h1i+wr*h2i+wi*h2r
		data_in(i3)=h1r-wr*h2r+wi*h2i
		data_in(i4)=-h1i+wr*h2i+wi*h2r

		wtemp=wr
		wr=wr*wpr-wi*wpi+wr
		wi=wi*wpr+wtemp*wpi+wi

	enddo

	if (isign==1) then
		h1r = data_in(1)
		data_in(1) = h1r + data_in(2)
		data_in(2) = h1r-data_in(2)
	else
		h1r = data_in(1)
		data_in(1) = c1*(h1r+data_in(2))
		data_in(2) = c1*(h1r-data_in(2))
		call four1(data_in,n/2,-1)
	endif

	return

end subroutine realft

!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
subroutine realft_boh(data_in,n,isign)
!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

	use mod_parameters, only : dkind, pi

	implicit none

	integer :: n, isign
	real(kind=dkind) :: wr,wi,wpr,wpi,wtemp,theta
	real(kind=dkind) :: c1, c2
!	real(kind=dkind), dimension(*) :: data_in
	real(kind=dkind), dimension(*) :: data_in !check!!
	real(kind=dkind) :: h1r, h1i, h2r, h2i
	integer :: n2p3, i1, i2, i3, i4
	integer :: i


	theta=pi/(n*1.d0)

	wr=1.0d0
	wi=0.0d0
	c1=0.5d0

	if (isign==1) then
		c2=-0.5d0
		call four1(data_in,n,+1)
		data_in(2*n+1)=data_in(1)
		data_in(2*n+2)=data_in(2)
	else
		c2=0.5d0
		theta=-theta
		data_in(2*n+1)=data_in(2)
		data_in(2*n+2)=0.0
		data_in(2)=0.0
	endif

	wpr=-2.0d0*dsin(0.5d0*theta)**2
	wpi=dsin(theta)
	n2p3=2*n+3

	do i=1,n/2+1

		i1=2*i-1
		i2=i1+1
		i3=n2p3-i2
		i4=i3+1

		h1r=c1*(data_in(i1)+data_in(i3))
		h1i=c1*(data_in(i2)-data_in(i4))
		h2r=-c2*(data_in(i2)+data_in(i4))
		h2i=c2*(data_in(i1)-data_in(i3))

		data_in(i1)=h1r+wr*h2r-wi*h2i
		data_in(i2)=h1i+wr*h2i+wi*h2r
		data_in(i3)=h1r-wr*h2r+wi*h2i
		data_in(i4)=-h1i+wr*h2i+wi*h2r

		wtemp=wr
		wr=wr*wpr-wi*wpi+wr
		wi=wi*wpr+wtemp*wpi+wi

	enddo

	if (isign==1) then
		data_in(2)=data_in(2*n+1)
	else
		call four1(data_in,n,-1)
	endif

	return

end subroutine realft_boh

!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
subroutine four1(data_in,nn,isign)
!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

	use mod_parameters, only : dkind

	implicit none

	integer :: nn, isign
	real(kind=dkind) :: wr,wi,wpr,wpi,wtemp,theta
!	real(kind=dkind), dimension(*) :: data_in
	real(kind=dkind), dimension(2*nn) :: data_in
	real(kind=dkind) :: tempr, tempi
	integer :: mmax, m, n, istep, i, j

	n=2*nn
	j=1

	do i=1,n,2

		if(j>i)then

			tempr=data_in(j)
			tempi=data_in(j+1)
			data_in(j)=data_in(i)
			data_in(j+1)=data_in(i+1)
			data_in(i)=tempr
			data_in(i+1)=tempi

		endif

		m=n/2

		1       if ((m.ge.2).and.(j.gt.m)) then
			j=j-m
			m=m/2
			go to 1
		endif

		j=j+m

	enddo

	mmax=2

	2     if (n.gt.mmax) then

		istep=2*mmax
		theta=6.28318530717959d0/(isign*mmax)
		wpr=-2.d0*dsin(0.5d0*theta)**2
		wpi=dsin(theta)
		wr=1.d0
		wi=0.d0

		do m=1,mmax,2

			do i=m,n,istep

				j=i+mmax
				tempr = wr*data_in(j)-wi*data_in(j+1)
				tempi = wr*data_in(j+1)+wi*data_in(j)
				data_in(j)=data_in(i)-tempr
				data_in(j+1)=data_in(i+1)-tempi
				data_in(i)=data_in(i)+tempr
				data_in(i+1)=data_in(i+1)+tempi

			enddo

		wtemp=wr
		wr=wr*wpr-wi*wpi+wr
		wi=wi*wpr+wtemp*wpi+wi

		enddo

		mmax=istep

		go to 2

	endif

	return

end subroutine four1

!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
subroutine inifct(c,n,ifault,ipow)
!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

	use mod_parameters, only : dkind, pi

	implicit none

	integer :: n
	integer :: ifault
	integer :: ipow
	real(kind=dkind) :: c(n)
	real(kind=dkind) :: one, two
	parameter (one=1.0d0, two=2.0d0)
	integer, parameter :: maxpow=20
	real(kind=dkind) :: zcos, a
!	common /fctlen/ length,ipow
!	save /fctlen/
	zcos(a) = dcos(a)

	integer :: nv2, nitems, ifac, igroup, item
	integer :: i, ii, k


! check for valid transform size

!	print *, 'inifct -- n=',n

	ifault = 0

	if(n.le.1) then
		ifault = 1
		return
	endif

	ii = 1

	do 180 k = 1, maxpow

		ii = ii + ii
		if (ii .eq. n) goto 190
		if (ii .gt. n) then
		ifault = 3
		return
		endif

	180   continue

	ifault = 2

	return

	! if we reach this point, transform length is valid.

	190   ipow = k
	nv2 = n/2

	! put values into top half of c array

	do 200 i = 1, nv2
	200     c(nv2 + i) = dble(4*i-3)

	! copy scaled values from top half of c array to bottom half

	nitems = 1
	ifac = nv2

	do igroup = 1, ipow - 1

		do item = 1, nitems        
			c(nitems + item) = dble(ifac) * c(nv2 + item)
		enddo

		nitems = nitems + nitems
		ifac = ifac / 2

	enddo

	! take cosine of each element of c array

	do i = 2, n
		c(i) = one / (two * zcos ( c(i) * pi / dble(n+n) ))
	enddo

	return

end subroutine inifct

!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
subroutine fct(f,c,n,ipow)
!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
! fast discrete cosine transform

	use mod_parameters, only : dkind

	implicit none

	integer :: n
	integer :: ipow
	real(kind=dkind) :: f(n), c(n)

	real(kind=dkind), parameter :: two=2.0, rt2inv = 0.707106781186547524400844362105d0
	real(kind=dkind) :: temp,cfac,twovn

	integer :: nv2, ngrps, iwngsp, incr, istage, ibutfy, ibase, igroup, nthrds, istpsz, nsteps, thread, index
	integer :: i, ii1, ii2, istep

	! no need to check for n being a power of two, it is done once and for all elsewhere

!!$	ifault = 0
!!$ if((n.ne.length).or.(n.le.1)) call inifct(c,n,ifault)
!!$ if(ifault.ne.0) return

	nv2 = n/2

!!$     scramble the data in f into odd-even ordering
	call scramb(f,n)

!!$     do the butterflies

	ngrps = 1
	iwngsp = nv2
	incr = n

	do istage = ipow, 1, -1

		do ibutfy = 1, iwngsp

			cfac = c(iwngsp + ibutfy)
			ibase = 0

			do igroup = 1, ngrps

				ii1 = ibase + ibutfy
				ii2 = ii1 + iwngsp
				temp = f(ii2)
				f(ii2) = cfac * (f(ii1) - temp)
				f(ii1) = f(ii1) + temp
				ibase = ibase + incr

			enddo

		enddo

		incr = incr / 2
		iwngsp = iwngsp / 2
		ngrps = ngrps + ngrps

	enddo


!!$     bit-reverse order array f
	call bitrev(f,n)

!!$     do the sums

	nthrds = n/4
	istpsz = nv2
	nsteps   = 2

	do istage = ipow-1, 1, -1

		do thread = 1, nthrds

			index = nthrds + thread

			do istep = 1, nsteps-1
				f(index) = f(index) + f(index + istpsz)
				index = index + istpsz
			enddo

		enddo

		nsteps = nsteps + nsteps
		nthrds = nthrds / 2
		istpsz = istpsz / 2

	enddo

	!!$     scale the result

	twovn = two / (float(n))

	do i = 1, n
		f(i) = f(i) * twovn
	enddo

	f(1) = f(1) * rt2inv

	return

end subroutine fct

!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
subroutine ifct(f,c,n,ipow)
!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
! fast discrete cosine transform

	use mod_parameters, only : dkind

	implicit none

	integer :: n
	integer ::ifault
	integer :: ipow
	real(kind=dkind) :: f(n), c(n)
	real(kind=dkind), parameter :: rt2inv = 0.707106781186547524400844362105d0
	real (kind=dkind) :: temp,cfac
	integer :: nv2, ngrps, iwngsp, incr, istage, ibutfy, ibase, igroup, nthrds, istpsz, nsteps, thread, index
	integer :: i, ii1, ii2, istep

	nv2 = n/2
	f(1) = f(1) * rt2inv
	
	! do the sums

	nthrds = 1
	istpsz = 2
	nsteps   = nv2

	do istage = 1, ipow-1

		do thread = 1, nthrds

			index = n - thread + 1

			do istep = 1, nsteps-1
				f(index) = f(index) + f(index - istpsz)
				index = index - istpsz
			enddo

		enddo

		nsteps = nsteps / 2
		nthrds = nthrds + nthrds
		istpsz = istpsz + istpsz

	enddo

	! bit-reverse order array f

	call bitrev(f,n)

	!do the butterflies

	ngrps = nv2
	iwngsp = 1
	incr = 2

	do istage = 1, ipow

		do ibutfy = 1, iwngsp

			cfac = c(iwngsp + ibutfy)
			ibase = 0

			do igroup = 1, ngrps

				ii1 = ibase + ibutfy
				ii2 = ii1 + iwngsp
				temp = cfac * f(ii2)
				f(ii2) = f(ii1) - temp
				f(ii1) = f(ii1) + temp
				ibase = ibase + incr

			enddo

		enddo

		incr = incr + incr
		iwngsp = iwngsp + iwngsp
		ngrps = ngrps/2

	enddo

	! unscramble from odd-even ordering to natural ordering

	call uscram(f,n)

	return

end subroutine ifct


!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
subroutine scramb(f,n)
!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

	use mod_parameters, only : dkind

	implicit none

	integer :: n
	real(kind=dkind) :: f(n)
	real(kind=dkind) :: temp
	integer :: nv2, nv4
	integer :: i, ii1, ii2

	nv2 = n/2
	nv4 = n/4

	call bitrev(f,n)
	call bitrev(f,nv2)
	call bitrev(f(nv2+1),nv2)

	ii1 = n
	ii2 = nv2 + 1

	do i = 1,nv4

		temp = f(ii1)
		f(ii1) = f(ii2)
		f(ii2) = temp
		ii1 = ii1-1
		ii2 = ii2+1

	enddo

	return

end subroutine scramb

!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
subroutine uscram(f,n)
!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

	use mod_parameters, only : dkind

	implicit none

	integer :: n
	real(kind=dkind) :: f(n)
	real(kind=dkind) :: temp
	integer :: nv2, nv4
	integer :: i, ii1, ii2

	nv2 = n/2
	nv4 = n/4
	ii1 = n
	ii2 = nv2+1

	do i = 1,nv4
		temp = f(ii1)
		f(ii1) = f(ii2)
		f(ii2) = temp
		ii1 = ii1-1
		ii2 = ii2+1
	enddo

	call bitrev(f,nv2)
	call bitrev(f(nv2+1),nv2)
	call bitrev(f,n)

	return

end subroutine uscram


!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
subroutine bitrev(f,n)
!@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

	use mod_parameters, only : dkind

	implicit none

	integer :: n
	real(kind=dkind) :: f(n)
	real(kind=dkind) :: temp
	integer :: m, nv2
	integer :: i, j

	if(n.le.2) then
		return
	endif

	nv2 = n/2
	j = 1

	do i = 1, n

		if (i .lt. j) then
			temp = f(j)
			f(j) = f(i)
			f(i) = temp
		endif

		m = nv2
		10      if (j .le. m) goto 20
		j = j - m
		m = (m + 1) / 2
		goto 10

		20 j = j + m

	enddo

	return

end subroutine bitrev


