! $Id$ ! ! CPARAM logical, parameter :: ltraining = .true. ! ! MAUX CONTRIBUTION 6 ! !*************************************************************** ! module Training use Cdata use General, only: itoa use Messages use Cudafor, only: cudaSetDevice,CUDASUCCESS,cudaGetDeviceCount use Torchfort, only: torchfort_create_distributed_model, torchfort_create_model,& torchfort_result_success,torchfort_load_model,torchfort_load_checkpoint,& torchfort_save_model,torchfort_result_success,torchfort_save_checkpoint,& torchfort_inference,torchfort_train !use iso_c_binding implicit none include 'training.h' include "training.h" integer :: model_device=0 integer :: it_train=-1, it_train_chkpt=-1, it_train_start=1 !real(KIND=rkind4), dimension(:,:,:,:,:), allocatable, device :: input, label, output real, dimension(:,:,:,:,:), allocatable, device :: input, label, output real :: train_loss !(KIND=rkind4) :: train_loss integer :: itau, itauxx, itauxy, itauxz, itauyy, itauyz, itauzz character(LEN=fnlen) :: model='model', config_file="config_mlp_native.yaml", model_file logical :: lroute_via_cpu=.false., lfortran_launched, luse_trained_tau, lwrite_sample=.false., lscale=.true. real :: max_loss=1.e-4 integer :: idiag_loss=0 ! DIAG_DOC: torchfort training loss integer :: idiag_tauerror=0 ! DIAG_DOC: $\sqrt{\left<(\sum_{i,j} u_i*u_j - tau_{ij})^2\right>}$ namelist /training_run_pars/ config_file, model, it_train, it_train_start, it_train_chkpt, & luse_trained_tau, lscale, lwrite_sample, max_loss, lroute_via_cpu ! character(LEN=fnlen) :: model_output_dir, checkpoint_output_dir integer :: istat, train_step_ckpt, val_step_ckpt logical :: ltrained=.false., lckpt_written=.false. real, dimension (mx,my,mz,3) :: uumean real :: tauerror, input_min, input_max, output_min, output_max real, dimension(mx, my, mz, 6) :: tau_pred contains !*************************************************************** subroutine initialize_training use File_IO, only: file_exists use Mpicomm, only: mpibcast, MPI_COMM_WORLD use Syscalls, only: system_cmd character(LEN=fnlen) :: modelfn integer :: ndevs lfortran_launched = .not. lgpu .or. lroute_via_cpu if (lreloading) return if (.not.lhydro) call fatal_error('initialize_training','needs HYDRO module') istat = cudaGetDeviceCount(ndevs) if (istat /= CUDASUCCESS) call fatal_error('initialize_training','cudaGetDeviceCount failed') istat = cudaSetDevice(mod(iproc,ndevs)) if (istat /= CUDASUCCESS) call fatal_error('initialize_training','cudaSetDevice failed') model_output_dir=trim(datadir)//'/training/' checkpoint_output_dir=model_output_dir model_file = trim(model)//'.pt' modelfn=trim(model_output_dir)//trim(model_file) if (lroot) then if (.not.file_exists(model_output_dir)) then call system_cmd('mkdir '//trim(model_output_dir)) else ltrained = file_exists(trim(modelfn)) endif endif call mpibcast(ltrained) ! ! TorchFort create model ! print*, 'CONFIG FILE=', trim(model_output_dir)//trim(config_file) if (lmpicomm) then istat = torchfort_create_distributed_model(trim(model), trim(model_output_dir)//trim(config_file), & MPI_COMM_WORLD, mod(iproc,ndevs)) else istat = torchfort_create_model(trim(model), trim(model_output_dir)//trim(config_file), model_device) endif if (istat /= TORCHFORT_RESULT_SUCCESS) then call fatal_error("initialize_training","when creating model "//trim(model)//": istat="//trim(itoa(istat))) else call information('initialize_training','TORCHFORT LIB LOADED SUCCESFULLY') endif if (ltrained) then istat = torchfort_load_model(trim(model), trim(modelfn)) if (istat /= TORCHFORT_RESULT_SUCCESS) then call fatal_error("initialize_training","when loading model: istat="//trim(itoa(istat))) else call information('initialize_training','TORCHFORT MODEL "'//trim(modelfn)//'" LOADED SUCCESFULLY') endif else !if (file_exists(trim(checkpoint_output_dir)//'/'//trim(model)//'.pt')) then if (file_exists(trim(checkpoint_output_dir)//'/model.pt')) then !istat = torchfort_load_checkpoint(trim(model), trim(checkpoint_output_dir), train_step_ckpt, val_step_ckpt) istat = torchfort_load_checkpoint('model', trim(checkpoint_output_dir), train_step_ckpt, val_step_ckpt) if (istat /= TORCHFORT_RESULT_SUCCESS) then call fatal_error("initialize_training","when loading checkpoint: istat="//trim(itoa(istat))) else call information('initialize_training','TORCHFORT CHECKPOINT LOADED SUCCESFULLY') endif endif endif luse_trained_tau = luse_trained_tau.and.ltrained if (lrun .and. lfortran_launched) then allocate(input (mx, my, mz, 3, 1)) allocate(output(mx, my, mz, 6, 1)) allocate(label (mx, my, mz, 6, 1)) endif endsubroutine initialize_training !*********************************************************************** subroutine register_training ! ! Register slots in f-array for the six independent components of the Reynolds stress tensor tau. ! use FArrayManager ! ! Identify version number (generated automatically by SVN). ! if (lroot) call svn_id( & "$Id$") ! call farray_register_auxiliary('tau',itau,vector=6,on_gpu=lgpu) ! ! Indices to access tau. ! itauxx=itau; itauyy=itau+1; itauzz=itau+2; itauxy=itau+3; itauxz=itau+4; itauyz=itau+5 endsubroutine register_training !*********************************************************************** subroutine read_training_run_pars(iostat) ! ! 23-jan-24/MR: coded ! use File_io, only: parallel_unit ! integer, intent(out) :: iostat ! read(parallel_unit, NML=training_run_pars, IOSTAT=iostat) endsubroutine read_training_run_pars !*************************************************************** subroutine write_training_run_pars(unit) ! integer, intent(in) :: unit write(unit, NML=training_run_pars) endsubroutine write_training_run_pars !*************************************************************** subroutine training_after_boundary(f) use Sub, only: smooth real, dimension (mx,my,mz,mfarray) :: f if (ltrained) then call infer(f) if ((ldiagnos.or.lvideo).and.lfirst) then call calc_tau(f) if (lfortran_launched) then ! ! Copy data from device to host. ! f(:,:,:,itauxx:itauzz) = f(:,:,:,itauxx:itauzz) - output(:,:,:,:,1) endif tauerror = sum(f(l1:l2,m1:m2,n1:n2,itauxx:itauzz)**2)/nx else f(:,:,:,itauxx:itauzz) = output(:,:,:,:,1) endif else if (lfirst) call train(f) endif ! ! output for plotting ! if (lvideo .or. lwrite_sample .and. mod(it, 50)==0) then ! call calc_tau(f) call infer(f) tau_pred = output(:,:,:,:,1) ! device to host if (lscale) call descale(tau_pred, output_min, output_max) if (lwrite_sample .and. mod(it, 50)==0) then call write_sample(f(:,:,:,itauxx), mx, my, mz, "target_"//trim(itoa(iproc))//".hdf5") call write_sample(tau_pred(:,:,:,1), mx, my, mz, "pred_"//trim(itoa(iproc))//".hdf5") endif endif endsubroutine training_after_boundary !*************************************************************** subroutine infer(f) use Gpu, only: get_ptr_gpu_training, infer_gpu use Sub, only: smooth real, dimension (mx,my,mz,mfarray) :: f real, dimension (:,:,:,:), pointer :: ptr_uu, ptr_tau if (lfortran_launched) then ! ! Smooth velocity -> "mean". ! uumean = f(:,:,:,iux:iuz) call smooth(uumean,1,3,lgauss=.true.) if (lscale) call scale(uumean, input_min, input_max) ! ! Copy data from host to device. ! input(:,:,:,:,1) = uumean istat = torchfort_inference(model, input, output) else !istat = torchfort_inference(model, get_ptr_gpu_training(iux,iuz), & ! get_ptr_gpu_training(itauxx,itauzz)) call infer_gpu(0) endif if (istat /= TORCHFORT_RESULT_SUCCESS) & call fatal_error("infer","istat="//trim(itoa(istat))) endsubroutine infer !*************************************************************** subroutine scale(f, minvalue, maxvalue) real, dimension (:,:,:,:) :: f real :: minvalue, maxvalue f = (f - minvalue)/(maxvalue - minvalue) endsubroutine !*************************************************************** subroutine descale(f, minvalue, maxvalue) real, dimension (:,:,:,:) :: f real :: minvalue, maxvalue f = f*(maxvalue - minvalue) + minvalue endsubroutine !*************************************************************** subroutine train(f) use Gpu, only: get_ptr_gpu_training, train_gpu, infer_gpu real, dimension (mx,my,mz,mfarray) :: f if (it