;; papco_data_cache -- simple model of local data store with optional
;;                     remote data source.
;;
;; notes:
;;   remote file listing is done once per cache structure lifecycle,
;;      and cached from then on.
;;   username/password is stored once per lifecycle
;;
;; requires:
;;   papco_remote_filesystem
;;   papco_get_remote_data
;;

function papco_data_cache::get_name_for_date, mjdt1
  result= self.constFormat[0]
  mjdt1->breakApart, year, month, day_om, hour, minute, second, doy=day_oy
  
  for i=0,self.nConstFormat-2 do begin
      fc= self.dateFormat[i]
      if ( fc eq 'Y' ) then begin
          s= string( format='(i4.4)', year )
      endif else if ( fc eq 'y' ) then begin
          s= string( format='(i2.2)', year mod 100 )
      endif else if ( fc eq 'j' ) then begin
          s= string( format='(i3.3)', day_oy )
      endif else if ( fc eq 'm' ) then begin
          s= string( format='(i2.2)', month )
      endif else if ( fc eq 'd' ) then begin
          s= string( format='(i2.2)', day_om )
      endif else if ( fc eq 'H' ) then begin
          s= string( format='(i2.2)', hour )
      endif else if ( fc eq 'M' ) then begin
          s= string( format='(i2.2)', minute )
      endif else if ( fc eq 'S' ) then begin
          s= string( format='(i2.2)', second )
      endif else begin
          message, 'invalid format specifier: '+ self.dateFormat
      endelse

      result= result+s
      result= result+self.constFormat[i+1]
  endfor
  return, result
end

function papco_data_cache::wild
  result= self.constFormat[0]
  
  for i=0, self.nConstFormat -2 do begin
      fc= self.dateFormat[i]
      if ( fc eq 'Y' ) then begin
          s= '????'
      endif else if ( fc eq 'j' ) then begin
          s= '???'
      endif else begin
          s= '??'
      endelse
      
      result= result+s
      result= result+self.constFormat[i+1]
  endfor
  return, result
end

; returns timeRange
function papco_data_cache::range, file
;;  %H  hours
;;  %M  minute
;;  %S  second
;;  %Y  four-digit year
;;  %y  two-digit year
;;  %m  month
;;  %d  day of month
;;  %j  day of year
  timearr= [ -1, -1, -1, -1, -1, -1 ] ;; y,m,d,h,m,s
  implicit_timearr= [ -1, 1, 1, 0, 0, 0 ]  ;; use this value if not specified

  formatArr= self.dateFormat
  ipos= strlen( self.constFormat[0] )

  for i=0,self.nConstFormat-2 do begin
      fc= self.dateFormat[i]
      if ( fc eq 'Y' ) then begin
          ii= fix( strmid( file, ipos, 4 ) )
          if ( ii eq 0 ) then stop
          ipos= ipos+4
          timearr[0]= ii
      endif else if ( fc eq 'y' ) then begin
          ii= fix( strmid( file, ipos, 2 ) )
          ipos= ipos+2
          if ( ii gt 58 ) then timearr[0]= ii+1900 else timearr[0]= ii+2000
      endif else if ( fc eq 'j' ) then begin
          ii= fix( strmid( file, ipos, 3 ) )
          ipos= ipos+3
          if ( timearr[0] eq -1 ) then begin
              message, 'invalid time format '+format+': doy preceeds year'
          endif
          jj= julday( 1, ii, timearr[0] )
          caldat, jj, mm, dd, yy
          timearr[1]= mm
          timearr[2]= dd
      endif else begin
          ff= 'xmdHMS'
          index= strpos( ff, fc )
          if ( index eq -1 ) then message, 'illegal character in time format '+format
          ii= fix( strmid( file, ipos, 2 ) )
          if ( strpos( 'xmd', fc ) ne -1 and ii eq 0 ) then begin
              message, 'got 0 when reading '+fc+' from chars: '+strmid( file, ipos, 2 ), /cont
              message, '  filename='+file, /cont
          endif
          ipos= ipos+2
          timearr[index]= ii
      endelse
      ipos= ipos + strlen( self.constFormat[i+1] )
  endfor

  i=5;
  while i ge 0 do begin
      if ( timearr[i] ne -1 ) then break
      timearr[i]= implicit_timearr[i]
      i= i-1
  endwhile

  lsd= i

  if ( i eq -1 ) then message, 'invalid time format: empty'
  while i ge 0 do begin
      if ( timearr[i] eq -1 ) then break
      i= i-1
  endwhile
  if ( i ge 0 ) then message, 'invalid time format: empty more significant digits means time is ambiguous'

  time1= papco_new_time( dom=timearr[2], month=timearr[1], year=timearr[0], $
                         hour=timearr[3], minutes=timearr[4], $
                         seconds=timearr[5] )

  timearr[lsd]= timearr[lsd]+1

  time2= papco_new_time( dom=timearr[2], month=timearr[1], year=timearr[0], $
                         hour=timearr[3], minutes=timearr[4], $
                         seconds=timearr[5] )
  
  return, papco_new_timeRange( time1, time2 )
end



; attempt to get a local copy of the file, but not allowing web transfer
function papco_data_cache::get_local, mjdt1
  name= self->get_name_for_date( mjdt1 )
  f= findfile( self.localRoot + name )
  if f[0] ne '' then begin
      return, f[n_elements(f)-1]
  endif else begin
      if ( obj_valid( self.secondaryCache ) ) then begin
          secondName= self.secondaryCache->get_local( mjdt1 )
          return, secondName
      endif else begin
          return, ''
      endelse
  endelse
end

pro papco_data_cache::transfer, file
  papco_get_remote_data, self.urlRoot, file, self.localRoot
end

; attempt to get a copy of the file, allow transfer if it's not local
function papco_data_cache::get, mjdt1
  result= self->get_local( mjdt1 )
  if ( result ne '' ) then begin
      return, result
  endif else begin
      if ( self.urlRoot eq '' ) then begin
          return, ''
      endif else begin
          name= self->get_name_for_date( mjdt1 )
          self->transfer, name
          result= self->get_local( mjdt1 )
          if ( result ne '' ) then begin
              return, result
          endif else begin
              if ( obj_valid( self.secondaryCache ) ) then begin
                  return, self.secondaryCache->get( mjdt1 )
              endif else begin
                  return, ''
              endelse
          endelse
      endelse
  endelse
end


function papco_data_cache::get_files_local, targetRange, count=count, monitor=monitor
  ;;  return the files in strarr that intersect with timeRange
  
  if n_elements(monitor) eq 0 then monitor= obj_new('papco_monitor')

  f= findfile( self.localRoot + self.fileGlob, count=count )

  if ( f[0] eq '' ) then begin
      message, 'no data in local data store', /cont
      return, ''
  endif

  result= ''
  n= strlen( self.localRoot )
  monitor->setTasksize, count
  monitor->setMessage, 'getting local file list'
  for i=0,count-1 do begin
      monitor->setProgress, i
      rangei= self->range( strmid( f[i], n, strlen(f[i])-n ) )
      if ( rangei->intersects( targetRange ) ) then begin
          result= [ result, strmid( f[i], n, strlen(f[i])-n ) ]
      endif
  endfor
  if n_elements( result ) gt 1 then begin
      count= n_elements( result ) - 1
      return, result[1:*]
  endif else begin
      count= 0
      return, ''
  endelse

end


pro papco_data_cache::reset
  ptr_free, self.remoteList
end


function papco_data_cache::get_remote_list, count=count, monitor=monitor
  if ( ptr_valid( self.remoteList ) ) then begin
      f= *self.remoteList
  endif else begin
      self->authentication, monitor=monitor
      f= papco_wget_unglob( self.urlRoot, self.fileGlob, monitor=monitor )
      ptr_free, self.remoteList
      self.remoteList= ptr_new(f)
  endelse
  count= f[0] eq '' ? 0 : n_elements(f)
  return, f
end


function papco_data_cache::get_files_remote, targetRange, monitor=monitor
  ;;  return the files in strarr that intersect with timeRange

  if n_elements(monitor) eq 0 then monitor= obj_new('papco_monitor')
  f= self->get_remote_list( count=count, monitor=monitor )

  result= ''
  monitor->setTaskSize, count
  for i=0,count-1 do begin
      monitor->setProgress, i
      rangei= self->range( f[i] )
      if ( rangei->intersects( targetRange ) ) then begin
          result= [ result, f[i] ]
      endif
  endfor
  if n_elements( result ) gt 1 then return, result[1:*] else return, ''

end

function complement, a1, a2, count=count
  ;  returns the elements in a1 that are not in a2.
  if a1[0] eq '' then begin
      count=0
      return, ''
  endif else if a2[0] eq '' then begin
      count= n_elements(a1)
      return, a1[sort(a1)]
  endif

  s1= a1[sort(a1)]
  s2= a2[sort(a2)]
  i2=0
  result= make_array( n_elements(a1) )
  iresult= 0
  for i=0,n_elements(a1)-1 do begin
      while ( s2[i2] ne s1[i] and s2(i2) lt s1[i] and i2 lt (n_elements( s2 )-1) ) do i2= i2+1
      if ( s2[i2] ne s1[i] ) then begin
          result[iresult]= s1[i]
          iresult= iresult+1
      endif
  endfor
  count=iresult
  if ( iresult eq 0 ) then begin
      return, ''
  endif else begin
      return, result[0:(iresult-1)]
  endelse

end

function papco_data_cache::get_files, targetRange, count=count, monitor=monitor

  if ( n_elements(monitor) eq 0 ) then monitor= obj_new('papco_monitor')

  local= self->get_files_local( targetRange, count=nlocal )
  remote= self->get_files_remote( targetRange )

  if ( remote[0] eq '' ) then begin
      nretrievable=0
  endif else begin
      retrievable= complement( remote, local, count=nretrievable )
  endelse
  if ( nretrievable gt 0 ) then begin
      retrieved= strarr( nretrievable )
      iretrieve= 0
      monitor->setMessage,'retrieving data files'
      monitor->setTaskSize,  nretrievable
      for i=0,nretrievable-1 do begin
          monitor->setProgress, i
          tr= self->range( retrievable[i] )
          x= self->get( tr.t1 )  ;; transfer or secondary
          if x ne '' then begin
              retrieved[iretrieve]= x
              iretrieve= iretrieve+1
          endif
      endfor
      nretrieved= iretrieve
  endif else begin
      retrieved=''
      nretrieved= 0
  endelse

  count= nlocal + nretrieved
  if ( local[0] eq '' and retrieved[0] eq '' ) then begin
      return, ''
  endif else if local[0] eq '' then begin
      return, retrieved[0:(nretrieved-1)]
  endif else if retrieved[0] eq '' then begin
      return, self.localRoot + local
  endif else begin
      return, [ self.localRoot + local, retrieved[0:(nretrieved-1)] ]
  endelse
end

; returns 1 if the file matches the cache data file spec, 0 otherwise
function papco_data_cache::matches, file
  ipos= 0
  if ( strmid( file, ipos, strlen( self.constFormat[0] ) ) ne self.constFormat[0] ) then $
    return, 0
  ipos= ipos + strlen( self.constFormat[0] )
  for i=0, self.nConstFormat-2 do begin
      fc= self.dateFormat[i]
      if ( fc eq 'Y' ) then begin
          ipos= ipos+4
      endif else if ( fc eq 'j' ) then begin
          ipos= ipos+3
      endif else begin
          ipos= ipos+2
      endelse
      if ( strmid( file, ipos, strlen( self.constFormat[i+1] ) ) ne self.constFormat[i+1] ) then return, 0
      ipos= ipos + strlen(  self.constFormat[i+1] )
  endfor
  return, 1
end


; returns timeRange of data converage, may contain holes
function papco_data_cache::local_available

  f= findfile( self.localRoot + self.fileGlob, count=count )

  if ( f[0] eq '' ) then begin
      message, 'no data in local data store', /cont
      return, papco_new_timerange( '1/1/1990', '1/1/1990' )
  endif
  n= strlen( self.localRoot )
  range= self->range( strmid( f[0], n, strlen(f[0])-n ) )
  for i=1,count-1 do begin
      rangei= self->range( strmid( f[i], n, strlen(f[i])-n ) )
      range= range->include( rangei )
  endfor

  return, range
end

pro papco_data_cache::authentication, monitor=monitor
  if ( n_elements(monitor) eq 0 ) then monitor=obj_new('papco_monitor')
  if ( self.urlRoot ne '' ) then begin
      res= strpos( self.urlRoot, 'user' )
      if ( res ne -1 ) then begin
          monitor->setMessage,'waiting for username / password'
          url= self.urlRoot
          papco_get_remote_account_info, url
          s= str_sep( url, '//' )
          s= str_sep( s[1], ':' )
          user= s[0]
          if user ne '' then begin
              self.urlRoot= url
          endif
      endif
  endif
end

function papco_data_cache::toString
  s= ''+self.localRoot+self.fileglob
  cr= string( byte(10) )
  if ( obj_valid( self.secondaryCache ) ) then begin
      s= s+ cr + 'then: '+ cr + self.secondaryCache->toString()
  endif
  return, s
end

function papco_data_cache::remote_available

  self->authentication

  f= self->get_remote_list( count=count )

  gotFirst=0

  for i=0,count-1 do begin
      rangei= cache->range( f[i] )
      if ( gotFirst ) then begin
          range= range->include( rangei )
      endif else begin
          range= rangei
          gotFirst=1
      endelse
  endfor

  if ( gotFirst eq 0 ) then begin
      message, 'remote data store contains no matching files: '+self.urlRoot, /cont
      return, papco_new_timerange(  '1/1/1990', '1/1/1990' )
  endif else begin
      return, range
  endelse

end

function papco_data_cache::has_secondary_cache
  return, obj_valid( self.secondaryCache )
end

function papco_data_cache::get_secondary_cache
  return, self.secondaryCache
end
pro papco_data_cache_test
  forward_function papco_data_cache
  forward_function papco_data_cache_get
  forward_function papco_data_cache_local_available

  remoteEph= 'ftp://papco:pap_dat@nis-ftp.lanl.gov/cluster/eph/'
  cache2= papco_data_cache( '/papco_data/cluster/eph/cdf/', 'cl_jp_pgp_%Y%m??_v??.cdf', $
                           urlRoot=remoteEph+'cdf/' )
  cache= papco_data_cache( '/papco_data/cluster/eph/savesets/', '%y%m_cluster_eph.idl', $
                           urlRoot=remoteEph+'savesets/', $
                           secondaryCache= cache2 )

  mjdt= papco_new_time( '20010401 00:00' )

  print, cache->get_name_for_date( mjdt )

  print, ( cache->remote_available( ) )->toString()

  print, cache->get( mjdt )
  print, ( cache->range( '0104_cluster_eph.idl' ))->toString()
  print, ( cache->local_available() )->toString()
end

pro papco_data_cache_test2
  cache= papco_data_cache( '/papco_data/omni/', 'omni2_%Y.dat', $
                            urlRoot='ftp://papco:pap_dat@nis-ftp.lanl.gov/omni/' )
  print, cache->get_files_local( cache, papco_new_timerange( '12/15/2000', '1/19/2002' ) )
  print, cache->get_files_remote( cache, papco_new_timerange( '12/15/2000', '1/19/2002' ) )
  print, cache->get_files( cache, papco_new_timerange( '12/15/2000', '1/19/2002' ) )
  print, cache->matches( cache, '#omni.dat#' )
  print, cache->matches( cache, 'omni_m2000.dat' )
  print, papco_tostring( cache->remote_available( cache ) )
  print, papco_tostring( cache->local_available( cache ) )

end


pro papco_data_cache_test3
  cache= papco_data_cache( '/papco_data/noaa/', 'NOAA_N12/sem/%Y/data/N12%Y%j.dat.gz', $
                           urlRoot='ftp://papco:pap_dat@nis-ftp.lanl.gov/noaa/' )
  print, cache->matches( cache, 'N122000365.dat.gz' )
  print, cache->matches( cache, 'NOAA_N12/sem/2000/data/N122000365.dat.gz' )
  print, papco_tostring( cache->remote_available( cache ) )
  print, papco_tostring( cache->local_available( cache ) )
  print, cache->get( cache, papco_new_time( '3/20/2001' ) )
end

pro papco_data_cache_test4
  remote='http://genesis.lanl.gov/solar_wind_stats/'
  local= getenv('PAPCO_DLNK')+'/genesis/'

  cache= papco_data_cache( local, 'sws_n_%y%m%d_v1.idlsav', urlRoot=remote )
  print, cache->get_files_local( papco_new_timerange( '12/15/2000', '1/19/2002' ) )
  print, cache->get_files_remote( papco_new_timerange( '12/15/2000', '1/19/2002' ) )
  print, cache->get_files( papco_new_timerange( '12/15/2000', '1/19/2002' ) )
  print, cache->matches( 'sws_n_030302_v1.idlsav' )
  print, cache->range( 'sws_n_030302_v1.idlsav' )
  print, (cache->remote_available( cache ))->toString()
  print, (cache->local_available( cache ))->toString()
end

pro papco_data_cache_test5
  remote='http://papco:pap_dat@www.papco.org/data/polar/hydra/survey/'
  local= getenv('PAPCO_DLNK')+'/polar/hydra/survey'

  cache= papco_data_cache( local, '%Y%m%d_hyd_sv_v?.??.cdf', urlRoot=remote )
  print, cache->get_files_local( papco_new_timerange( '12/15/2000', '1/19/2002' ) )
  print, cache->get_files_remote( papco_new_timerange( '12/15/2000', '1/19/2002' ) )
  print, cache->get_files( papco_new_timerange( '12/15/1990', '1/19/2006' ) )
  print, cache->matches( '20010508_hyd_sv_v5.51.cdf' )
  print, cache->range( '20010508_hyd_sv_v5.51.cdf' )
  print, (cache->remote_available( ))->toString()
  print, (cache->local_available( ))->toString()
end
; localRoot is the filename of the localRoot
; urlRoot is the url of the remote data store, can be http or ftp
; fileTemplate contains {0}, which is replaced by the
; secondaryCache should be checked before attempting a web transfer.
; monitor  see papco_monitor.pro for monitor interface

pro papco_data_cache__define
  struct= { papco_data_cache, $
            localRoot:'', $
            urlRoot:'', $
            remoteList:ptr_new(), $
            username:'', $
            password:'', $
            fileGlob:'', $
            constFormat: strarr(20), $
            nConstFormat: 0, $
            dateFormat: strarr(20), $
            nDateFormat: 0, $
            secondaryCache:obj_new() }
end

function papco_data_cache::Init, localRoot, fileTemplate, $
                         urlRoot=urlRoot, $
                         secondaryCache= secondaryCache

  if ( n_elements( urlRoot ) eq 0 ) then urlRoot=''
  if ( n_elements( origTemplate ) eq 0 ) then origTemplate=''

  s= str_sep( fileTemplate, '%' )
  if ( n_elements(s) gt 1 ) then begin
      dateFormat= strarr( n_elements(s)-1 )
  endif else begin
      dateFormat=0
  endelse

  constFormat= strarr( n_elements(s) )
  constFormat[0]= s[0]
  for i=1,n_elements( s )-1 do begin
      fc= strmid(s[i],0,1)
      constFormat[i]= strmid( s[i],1,strlen(s[i])-1 )
      dateFormat[i-1]= fc
  endfor

  n= n_elements(constFormat)
  self.constFormat[0:n-1]= constFormat
  self.nConstFormat= n
  self.dateFormat[0:n-2]= dateFormat
  
  self.localRoot= localRoot
  self.urlRoot= urlRoot

  if ( n_elements(secondaryCache) eq 1 ) then self.secondaryCache=secondaryCache

  self.fileGlob= self->wild() 
  return, 1
end


function papco_data_cache, localRoot, fileTemplate, $
                           urlRoot=urlRoot, $
                           secondaryCache= secondaryCache

  cache= obj_new( 'papco_data_cache', $
                  localRoot, fileTemplate, urlRoot= urlRoot )

  return, cache

end



