;---------------------------------------------------------------------------------------------------
;
; name:			geoms_vntools.pro
; description: 	GEOMS data version name checker tool
;
; 2019-12-05, v2.00	Ian Boyd	initial implementation (version aligned with geoms_qa)
; 2020-02-25, v2.01 Ian Boyd  changes to align with version 2.3 of the Data Versioning Guidelines
; 2020-04-27, v2.02 Ian Boyd  Allow DATA_SOURCE_01 inclusion/exclusion settings to cover more than 
;                             one value - e.g. UVVIS.DOAS would mean the version name can be used 
;                             in any file with UVVIS.DOAS as part of the DATA_SOURCE value
; 2023-12-18, v2.03 Ian Boyd  Make keyword checks case sensitive                            
;---------------------------------------------------------------------------------------------------
FUNCTION is_a_number_vn,value

  if strtrim(value, 2) eq '' then RETURN, 0B $
  else begin
    ON_IOERROR, ConversionError
    ;separate value into individual characters and test
    vlen = strlen(value)
    for i = 0, vlen-1 do begin
      subval = strmid(value, i, 1)
      if strtrim(subval, 2) eq '' then RETURN, 0B
      n = DOUBLE(subval)
    endfor
    return,1B
    ConversionError:
    return, 0B
  endelse
END

;---------------------------------------------------------------------------------------------------
FUNCTION determine_current_file, chFileGEOMSvnspec
;determine the most up to date list file based on the version in the file name (e.g. 01R001)

result = File_Search(chFileGEOMSvnspec, count=vncnt)

if vncnt eq 0 then return, '-1' $
else if vncnt eq 1 then return, result $
else begin
  vr = intarr(2, vncnt)
  for i = 0, vncnt-1 do begin
    ftest = strmid( strupcase(result[i]),strpos(result[i],'.',/Reverse_Search)-6,6)
    ftest = ftest+'      ' ;add extra elements to avoid error
    vval = strmid(ftest,0,2)
    rval = strmid(ftest,3,3)
    rchar = strmid(ftest,2,1)
    if (is_a_number_vn(vval)) and (is_a_number_vn(rval)) and (rchar eq 'R') then begin
      vr[0,i] = fix(vval) & vr[1,i] = fix(rval)
    endif
  endfor
  if (max(vr[0,*]) eq 0) and (max(vr[1,*]) eq 0) then return, '-1' $
  else begin
    maxv = max(vr[0,*])
    gi = where( vr[0,*] eq maxv) ;determine the files that have the highest version number
    maxvr = max(vr[1,gi], mxvri) ;determine the highest revision number for the files with the highest version number
    return, result[gi[mxvri]]
  endelse
endelse

end

;---------------------------------------------------------------------------------------------------
FUNCTION return_ga_check_values, ga
;return all the global attribute values necessary to do the Version Name checks
;including VERSION_NAME, DATA_SOURCE_01, FILE_ACCESS, PI_NAME, AFFILIATION (from DATA_SOURCE)
;and DATA_LOCATION. Note: returns values in uppercase (1st index) and as written to the file
;(2nd index)

  r_vals = STRARR(6,2)
  ga=STRTRIM(ga,2)
  dsi = WHERE(STRMID(STRUPCASE(ga),0,11) eq 'DATA_SOURCE',dscnt)
  if dscnt eq 1 then begin
    dsv = strmid(ga[dsi[0]], strpos(ga[dsi[0]],'=')+1) ;everything after the '=' sign
    dsx = strsplit(strtrim(dsv,2),'_',/Extract,count=cdsx)
    if cdsx eq 3 then begin
      r_vals[0,0] = strupcase(dsx[2]) & r_vals[0,1] = dsx[2]
    endif
    if cdsx ge 1 then begin
      r_vals[1,0] = strupcase(dsx[0]) & r_vals[1,1] = dsx[0]
    endif
    if cdsx ge 2 then begin
      r_vals[4,0] = strmid( strupcase(dsx[1]), 0, strlen(dsx[1])-3)
      r_vals[4,1] = strmid( dsx[1], 0, strlen(dsx[1])-3)
    endif
  endif

  dsi = WHERE(STRMID(STRUPCASE(ga),0,11) eq 'FILE_ACCESS',dscnt)
  if dscnt eq 1 then begin
    dsv = strmid(ga[dsi[0]], strpos(ga[dsi[0]],'=')+1) ;everything after the '=' sign
    r_vals[2,0] = strtrim(strupcase(dsv),2)
    r_vals[2,1] = strtrim(dsv,2)
  endif

  dsi = WHERE(STRMID(STRUPCASE(ga),0,7) eq 'PI_NAME',dscnt)
  if dscnt eq 1 then begin
    dsv = strmid(ga[dsi[0]], strpos(ga[dsi[0]],'=')+1) ;everything after the '=' sign
    r_vals[3,0] = strcompress(strupcase(dsv),/Remove_All)
    r_vals[3,1] = strcompress(dsv,/Remove_All)
  endif

  dsi = WHERE(STRMID(STRUPCASE(ga),0,13) eq 'DATA_LOCATION',dscnt)
  if dscnt eq 1 then begin
    dsv = strmid(ga[dsi[0]], strpos(ga[dsi[0]],'=')+1) ;everything after the '=' sign
    r_vals[5,0] = strtrim(strupcase(dsv),2)
    r_vals[5,1] = strtrim(dsv,2)
  endif

  return, r_vals

END

;---------------------------------------------------------------------------------------------------
FUNCTION incl_excl_checks, chvn_chk, chvn_excl_incl, n_chks, ga_chk_vals, lu
;Check against excluded (~) or included DATA_SOURCE_01, FILE_ACCESS, PI_NAME, AFFILIATION
;(from DATA_SOURCE) and DATA_LOCATION values

  chtxtvn = ' with this version name. Restriction criteria are: '
  ico = 0 ;default value for successful check - changes to -1 if an error is found
  ei = where (chvn_chk[ 1:n_chks-1] ne '', ecnt)
  if ecnt eq 0 then begin
    chMessage = '  INFORMATION: No restrictions apply for keyword '+chvn_chk[0]
    printf, lu, chMessage
    print, chMessage
    return, ico
  endif else begin
    for i = 1, n_chks-1 do begin
      if chvn_chk[i] ne '' then begin
        ;separate out into component values (separated by '|' or '&')
        ds_vals = strsplit(strupcase(chvn_chk[i]), '|&', /EXTRACT, count=dscnt)
        ds_vals = strcompress(ds_vals,/Remove_All)
        ;separate inclusion and exclusion values
        evi = where(strmid(ds_vals,0,1) eq '~', evcnt, complement = ivi, ncomplement = ivcnt)

        if chvn_excl_incl[i] eq 'FILE_ACCESS' then begin
          ;can have multiple values in the GEOMS file
          ga_fa_vals = strsplit(ga_chk_vals[i,0],' ;',/Extract, count=fa_cnt)
          ga_fa_vals = strcompress(ga_fa_vals,/Remove_All)
          ;1. need to check that all ga_fa_vals are in the 'included' list
          ;2. need to check that none of the ga_fa_vals are in the 'excluded' list
          faok = bytarr(fa_cnt)
          for j = 0, fa_cnt-1 do begin
            gi = where(ga_fa_vals[j] eq ds_vals, gcnt)
            if gcnt ne 0 then faok[j] = 1B
            gi = where('~'+ga_fa_vals[j] eq ds_vals, gcnt)
            if gcnt ne 0 then faok[j] = 0B
          endfor
          gi = where(faok eq 0B, gcnt)
          if fa_cnt eq 1 then chtxt = 'value' else chtxt = 'values'
          if gcnt eq 0 then $
            chMessage = '  INFORMATION: '+ga_chk_vals[i,1]+' FILE_ACCESS '+chtxt+' permitted' $
          else begin
            if gcnt eq 1 then chtxt = 'value' else chtxt = 'values'
            failed_fa=ga_fa_vals[gi[0]]
            if gcnt gt 1 then $
              for j = 1, gcnt-1 do failed_fa = failed_fa+';'+ga_fa_vals[gi[j]]
            chMessage = '  ERROR: '+failed_fa+' FILE_ACCESS '+chtxt+' not permitted'
            chMessage = chMessage+chtxtvn+chvn_chk[i]
            ico= (-1)
          endelse
          printf, lu, chMessage
          print, chMessage
        endif else if chvn_excl_incl[i] eq 'DATA_SOURCE_01' then begin
          ;Allow ds_vals to include multiple DATA_SOURCE_01 options e.g. UVVIS.DOAS
          ;Won't pass if it fails an exclusion check
          passcheck = 1B ;set default to DATA_SOURCE_01 value being OK
          if evcnt ne 0 then begin
            ;check if file value is on the list of exclusions - if so then fails check
            for j = 0, evcnt-1 do begin
              if strpos( ga_chk_vals[i,0], strmid(ds_vals[evi[j]],1)) ne -1 then $
                passcheck = 0B
            endfor
          endif
          ;Note: only need to do inclusion checks if it has passed, or there are no, exclusion checks
          if (ivcnt ne 0) and (passcheck) then begin 
            ;check if file value is on the list of inclusions - if not then fails check
            passcheck = 0B ;set default to DATA_SOURCE_01 value not being OK
            for j = 0, ivcnt-1 do begin
              if strpos( ga_chk_vals[i,0], ds_vals[ivi[j]]) ne -1 then $
                passcheck = 1B
            endfor
          endif 

          if ~passcheck then begin
            chMessage = '  ERROR: '+ga_chk_vals[i,1]+' '+chvn_excl_incl[i]+ $
              ' value not permitted'
            chMessage = chMessage+chtxtvn+chvn_chk[i]
            ico = (-1)
          endif else chMessage = '  INFORMATION: '+ga_chk_vals[i,1]+' '+ $
            chvn_excl_incl[i]+' value permitted'
          printf, lu, chMessage
          print, chMessage
        endif else begin
          ;do checks on the rest of the criteria
          ;Won't pass if it fails an exclusion check
          passcheck = 1B ;set default to value being OK
          if evcnt ne 0 then begin
            ;check if file value is on the list of exclusions - if so then fails check
            for j = 0, evcnt-1 do begin
              if ga_chk_vals[i,0] eq strmid(ds_vals[evi[j]],1) then passcheck = 0B
            endfor
          endif
          ;Note: only need to do inclusion checks if it has passed, or there are no, exclusion checks
          if (ivcnt ne 0) and (passcheck) then begin
            ;check if file value is on the list of inclusions - if not then fails check
            passcheck = 0B ;set default to DATA_SOURCE_01 value not being OK
            for j = 0, ivcnt-1 do begin
              if ga_chk_vals[i,0] eq ds_vals[ivi[j]] then passcheck = 1B
            endfor
          endif
          if ~passcheck then begin
            chMessage = '  ERROR: '+ga_chk_vals[i,1]+' '+chvn_excl_incl[i]+ $
              ' value not permitted'
            chMessage = chMessage+chtxtvn+chvn_chk[i]
            ico = (-1)
          endif else chMessage = '  INFORMATION: '+ga_chk_vals[i,1]+' '+ $
            chvn_excl_incl[i]+' value permitted'
          printf, lu, chMessage
          print, chMessage
        endelse
      endif else begin
        ;chMessage = '  INFORMATION: No '+chvn_excl_incl[i]+' restrictions apply'
        ;printf, lu, chMessage
        ;print, chMessage
      endelse
    endfor
    return, ico
  endelse

END ;Function incl_excl_checks

;---------------------------------------------------------------------------------------------------
FUNCTION analyze_GEOMS_VN, chVN_value, chFileGEOMSvn, ga_chk_vals, lu

	tccode = ['0 (Passed version name checks)', '1 (Failed version name checks)']
	iCheckOut = 0

	chvn_excl_incl = ['VERSION_NAME', 'DATA_SOURCE_01', 'FILE_ACCESS', 'PI_NAME', $
	                  'ORGANIZATION', 'DATA_LOCATION'] ;must match the order in the file
	n_chks = N_ELEMENTS(chvn_excl_incl)

	if FILE_TEST( chFileGEOMSvn ) then begin
	  ;read in contents of the list file
	  dum='' & n_vn = 0L
	  openr, fu, chFileGEOMSvn, /Get_Lun

	  ;Determine number of version keywords - exclude comment lines or spaces
	  while ~eof(fu) do begin
	    readf, fu, dum
	    dum = strtrim(dum, 2)
	    char1 = ''
	    if strlen(dum) ge 1 then char1 = strmid(dum, 0, 1)
	    if (char1 ne '') and (char1 ne '!') then n_vn++
	  endwhile
	  Free_Lun, fu

    ;read in version names and inclusion/exclusion settings
	  chvernames = STRARR( n_vn, n_chks)
	  openr, fu, chFileGEOMSvn, /Get_Lun
    i = 0L
	  while ~eof(fu) do begin
	    readf, fu, dum
	    dum = strtrim(dum, 2)
	    char1 = ''
	    if strlen(dum) ge 1 then char1 = strmid(dum, 0, 1)
	    if (char1 ne '') and (char1 ne '!') then begin
	      ;separate list into component parts
	      lres = strsplit( dum, ',', /Extract, /Preserve_Null)
        chvernames[i, *] = strtrim(lres[0:n_chks-1], 2)
        i++
      endif
    endwhile
    Free_Lun, fu

	  ;Check 1 - Does the version name consist of keyword part(s), optionally followed by version number part(s)
	  pres = strsplit( chvn_value,'.', /Extract, /Preserve_Null, Count=pcnt)
	  ni = bytarr(pcnt) ;0B = null or NaN; 1B = number
	  for j = 0, pcnt-1 do $
	    if is_a_number_vn(pres[j]) then ni[j] = 1B else ni[j] = 0B
	  nni = where(ni eq 1B, ncnt)
	  kni = where(ni eq 0B, kcnt)

    if kcnt ne 0 then begin
	    ;check for '' or ' ' (either consecutive dots or space in the keyword
	    writeonced = 0B & writeonces = 0B
	    for j = 0, kcnt-1 do begin
	      if (pres[kni[j]] eq '') or (strcompress( pres[kni[j]],/Remove_All) ne pres[kni[j]]) then begin
	        if (pres[kni[j]] eq '') and (~writeonced) then begin
	          if (strmid( chvn_value, 0, 1) eq '.') or $
	             (strmid( chvn_value, strlen(chvn_value)-1, 1) eq '.') then begin
	            if strmid( chvn_value, 0, 1) eq '.' then etxt = 'start' else etxt = 'end'
	            chMessage = '  ERROR: Dot present at '+etxt+' of data version name: '+chvn_value
	          endif else begin
	            chMessage = '  ERROR: Data version name contains consecutive dots: '+chvn_value
	          endelse
	          writeonced = 1B
	          printf, lu, chMessage
	          print, chMessage
	        endif else if ~writeonces then begin
	          chMessage = '  ERROR: Data version name contains spaces: '+chvn_value
	          writeonces = 1B
	          printf, lu, chMessage
	          print, chMessage
	        endif
	        iCheckOut = 1
	      endif
	    endfor
	  endif

    if iCheckOut eq 0 then begin
	    if ncnt eq pcnt then begin
	      ;version name consists only of version numbers so no further checks required
	      chMessage = '  INFORMATION: Version name '+chvn_value+' consists of a processing version number only. '+ $
	        'No further checks required.'
	      printf, lu, chMessage
	      print, chMessage
	      iCheckOut = 2
	    endif else if (ncnt ne pcnt) and (ncnt ne 0) then begin
	      ;need to separate out keyword part and test that part only
	      if kni[kcnt-1] lt nni[0] then begin
	        ;the keyword part(s) come(s) before the version number part(s) so write keywords to chvn_value
          chvn_value = ''
          for j = 0, kcnt-1 do begin
            if j eq kcnt-1 then dtxt='' else dtxt='.'
            chvn_value=chvn_value+pres[kni[j]]+dtxt
          endfor
	      endif else begin
	        if kcnt eq 1 then ktxt = '' else ktxt = ' last'
	        if ncnt eq 1 then ntxt = '' else ntxt = ' parts'
	        chMessage = '  ERROR: Data version number'+ntxt+' must be after the'+ktxt+' data processing keyword: '
	        chMessage = chMessage+chvn_value
	        printf, lu, chMessage
	        print, chMessage
	        iCheckOut = 1
	      endelse
	    endif ;o/w only data processing keywords are present so no need to rewrite chVn_value
	  endif

    ;print, '"'+chvn_value+'"'

	  ;Check 2 - Are the keyword part(s) present in the list
	  if iCheckOut eq 0 then begin
	    ;check each keyword or combination until all keywords are accounted for or an error is found
	    ;Note keyword comparison is case sensitive (introduced 20231218)
	    chvn_allwdupuc = strupcase( chvernames[*,0]) ;check if there is a case sensitivity issue
	    keyword = chvn_value
	    chvn_allwdup = chvernames[*,0]

	    j = kcnt & partok = 0
	    repeat begin
        cserror = 0B
	      repeat begin
          ;start with full version keyword then reduce until a match is found or no words are left to test
	        if j eq kcnt then begin
	          kwtest = keyword & kwtestuc = strupcase(keyword)
	        endif else begin
	          kwtest = strmid(kwtest, 0, strpos( kwtest,'.',/Reverse_Search))
	          kwtestuc = strupcase(kwtest)
	        endelse
	        gi = where( kwtest eq chvn_allwdup, gcnt)
	        if gcnt eq 0 then begin
	          ;check if keyword is present but case is incorrect
	          gi = where( kwtestuc eq chvn_allwdupuc, gcnt)
	          if gcnt ne 0 then cserror = 1B
	        endif
	        if (gcnt eq 0) or ((gcnt ne 0) and (kcnt eq 1)) then j--
	      endrep until (gcnt ne 0) or (j eq 0)
	      if gcnt ne 0 then begin
	        ;partial or full match found so do inclusion/exclusion checks (unless the case is incorrect)
	        if (j ne 0) or (partok eq 1) then kwpart = ' part' else kwpart = ''
	        if cserror then begin
	          chMessage = '  ERROR: Version keyword'+kwpart+' found with case mismatch: '+kwtest+'/'+chvn_allwdup[gi[0]]
	          iCheckOut = (-1)	        
	        endif else begin
	          chMessage = '  INFORMATION: Version keyword match found for '+kwtest+kwpart
          endelse
          printf, lu, chMessage
          print, chMessage
	        ico = incl_excl_checks( chvernames[gi[0],*], chvn_excl_incl, n_chks, ga_chk_vals, lu)
	        if ico eq (-1) then iCheckOut = ico

	        if j ne 0 then begin
            ;partial match only so still parts of the version name keywords to test
	          partok = 1
            ;extract untested part of the keyword for retesting
	          keyword = strmid( keyword, strlen(kwtest)+1)
	          kcnt = kcnt-j & j = kcnt
	        endif
	      endif else begin
	        ;No match found so generate error message
	        if partok eq 1 then kwerror = kwtest+' part' else kwerror = chvn_value
	        chMessage = '  ERROR: Version keyword match not found for '+kwerror
	        printf, lu, chMessage
	        print, chMessage
	        iCheckOut = 1
	      endelse
	    endrep until (j eq 0) or (iCheckOut eq 1)
	    if iCheckOut eq -1 then iCheckOut = 1 ;happens if error(s) with inclusion/exclusion checks
	  endif

	endif else begin
	  chMessage = '  ERROR: Version Name list file not found'
	  printf, lu, chMessage
	  print, chMessage
	  iCheckOut = -1
	endelse

	if iCheckOut eq 2 then iCheckOut = 0 ;No checks required as version name consists of a version number only

  printf, lu, ''
	printf, lu, tccode(abs(iCheckOut))

	return, iCheckOut
END

;---------------------------------------------------------------------------------------------------
FUNCTION geoms_vntools, dvndir, hdffile, QA_error_code, TC_error_code

	TC_error_code_2 = 0
	chFileGEOMSvnspec = dvndir+'data_version_names_*.csv'

	;determine name of most up to date list file
	chFileGEOMSvn = determine_current_file( chFileGEOMSvnspec)

	reterr=''
	idlcr8ascii, hdffile, ga, sds, reterr

	;determine if DATA_SOURCE has a VERSION_NAME field
	ga_chk_vals = return_ga_check_values( ga ) ;get the global attribute values required for the checks
	chVN_value = ga_chk_vals[0, 1] ;version of the name as written in the GEOMS file instead of uppercase

	; find log file (see geoms_tools.pro)
	logfile=STRMID(hdffile,0,STRPOS(hdffile,'.',/REVERSE_SEARCH))+'.log'
	openw,lu,logfile,/GET_LUN,/APPEND

  if chVN_value ne '' then begin
     if chFileGEOMSvn eq '-1' then begin
      chMessage = 'ERROR: GEOMS Version Name list file not found'
      print, chMessage
      printf, lu, ''
      printf, lu, chMessage
      TC_error_code_2 = (-1)
    endif else begin
      chMessage = 'Analyzing compliance to GEOMS version name list: '+File_Basename( chFileGEOMSvn )
      print, chMessage
      printf, lu, ''
      printf, lu, chMessage

      TC_error_code_2 = analyze_GEOMS_VN( chVN_value, chFileGEOMSvn, ga_chk_vals, lu )
    endelse

    ;if TC_error_code_2 eq -1 then chFileGEOMSvn is not present (i.e. problem with input file)
    if (TC_error_code_2 eq -1) or (QA_error_code eq 3) then begin
      chMessage = '3 (Total QA/TC-VN error)'
    endif else begin
      if TC_error_code_2 lt TC_error_code then TC_error_code_2 = TC_error_code
      chMessage = STRTRIM(STRING( QA_error_code + 3*TC_error_code_2 + 4 ), 1)+' (Total QA/TC-VN error)'
    endelse
    printf, lu, ''
    printf, lu, chMessage
    Free_Lun, lu
    return, TC_error_code_2
  endif else begin
    if QA_error_code eq 3 then begin
      printf, lu, ''
      printf, lu, '3 (Total QA/TC error)'
    endif
    Free_Lun, lu
    return, TC_error_code
  endelse

END