# $Id: importNOS_TideRefs.rb 7595 2022-01-05 18:54:39Z flaterco $
# Functions for importNOS.rb:
# Tidal reference station import

# Messiness happens because every field wants to be fixed up using some other
# field that is also unreliable or incomplete.  Something has to go first.

# Threshold for equality-testing on constituent speeds.
SpeedTolerance = 3e-5

# Find possible matches for a constituent whose name was not in the database.
def findBySpeed(speeds, speed)
  speeds.select{|k,v| (v-speed).abs <= SpeedTolerance}.map{|k,v| k}
end

def importTideRefs(db)
  if File.exist?("renamings-tiderefs.sql")
    puts "Don't want to overwrite renamings-tiderefs.sql; do something with it"
    exit
  end
  gt = readGeogroupsTidesJson()

  # Report questionable subregions for addition to fixgeogroup.
  # gt.each_value{|g|
  #   if g[:level] == 7
  #     n = g[:name]
  #     puts n if !n.nil? and n.include?(",")
  #   end
  # }

  # tideGeogroupTest(gt)

  sbgt = readStationsByGroupTidesJson()
  stateMap = getOldStateMap(db)
  wAleutSids = getWesternAleutians(db)
  aliases = getConstituentAliases(db)
  speeds = getSpeeds(db)
  tides = readTidesJson()
  harcon = JSON.parse(File.read("harcon.json"))["stations"]
  File.open("renamings-tiderefs.sql","w") {|renamings|
    harcon.each {|hsta|
      raise "Unhandled: tidal is not true"       if hsta["tidal"] != true
      raise "Unhandled: greatlakes is not false" if hsta["greatlakes"] != false

      sid = hsta["id"]
      hname = hsta["name"]
      raise "Nil hname" if hname.nil?
      if isMaybeNotFreeData(gt, sbgt, hsta)
	print "Skipped ", sid, " ", hname, "\n"
	next
      end

      # We are enumerating tide reference stations from harcon and don't give
      # a rat's ass what the type field of tides.json says.  readTidesJson
      # ensures that there is at most one record with a given sid.
      tsta = (tides.include?(sid) ? tides[sid] : nil)
      tname = (tsta.nil? ? nil : tsta["name"])

      # The coordinates in harcon are very unreliable.
      lat, lng = (tsta.nil? ? [hsta["lat"], hsta["lng"]] : [tsta["lat"], tsta["lng"]])
      if lng < 0 && westernAleutian?(gt, sbgt, wAleutSids,
				     (tname.nil? ? hname : tname), sid)
	print "Fixing longitude of ", sid, " ", hname, "\n"
	lng = -lng
      end

      oldtuple = getOldNameStateTz(db, sid)
      newStation = oldtuple.nil?
      if newStation
        # state a.k.a. statecode_guess
	# tides[sid]["state"] tends to be blank sometimes but otherwise
	# agrees with hsta["state"].
	state = guessState(gt, sbgt, stateMap, hsta, lat, lng)
	orig_geogroup = getTideGeogroupBySid(gt, sbgt, sid, state)
        geogroup = fixgeogroup(orig_geogroup)
        # Guessed statecode is more trustworthy than geogroup[0]--even when
        # it's nil.
        geogroup = [(state.nil? ? nil : StateName[state]), geogroup[1], geogroup[2]]
        fullhname = generateName(hname, nil, geogroup, nil)
        fulltname = generateName(tname, nil, geogroup, nil) # maybe nil
        name = fullhname
        name = fulltname if !fulltname.nil? and fulltname.length > fullhname.length
	tz = guessTimezone(state, name, lat, lng) # nil statecode OK
      else
	oldname, state, tz = oldtuple
        oldname = chompname(oldname)
        name = fixname(oldname)
        if name != oldname
          print "#{sid} fixing up name from data_sets_old\n"
          print "  #{oldname}\n"
          print "  #{name}\n"
        end
      end

      raise "State might be nil but should not be empty" if !state.nil? and state.empty?
      country = guessCountry(state, name)

      # harcon.json has "timezonecorr" (hours) and "timezone" ("xST")
      # tides.json has "timemeridian" (degrees = 15 / hour)
      # They often disagree with each other and with the LST meridian of
      # harmonic constants, and they aren't used for anything.

      # Notice the notices.
      notes = nil
      comments = nil
      hsta["notices"]["notices"].each {|notice|
	text = notice["text"]
	if text.include?("high water condition")
	  # Irrelevant.
	elsif text.include?("Columbia River Datum")
	  # These notes are misleading since the predictions match the
	  # default results for MLLW.
	  # notes = "The datum for this station is Columbia River Datum."
	elsif text.include?("may wish to re-download")
	  comments = append(comments, wrapPlain(text))
	else
	  print "Unhandled notice:\n", text, "\n"
	  raise "Unhandled notice"
	end
      }

      # The disclaimers are too long to put in notes.
      hdiscs = Array.new
      hsta["disclaimers"]["disclaimers"].each {|disclaimer|
        text = disclaimer["text"]
	comments = append(comments, wrapPlain(text))
	hdiscs.push(text)
	notes = "See station comments in 'about' for disclaimers."
      }

      datums = readStationDatums(sid)
      xfields = "Credit:NOAA data processed by David Flater for XTide\n https://flaterco.com/xtide/"
      datum = nil
      unless datums.nil?
	datums["disclaimers"]["disclaimers"].each {|disclaimer|
          text = disclaimer["text"]
	  # Cursed duplication.
	  comments = append(comments, wrapPlain(text)) unless hdiscs.include?(text)
	  notes = "See station comments in 'about' for disclaimers."
	}
	datarray = datums["datums"]
	unless datarray.nil? or datarray.empty?
	  # The desired value is MSL - MLLW.
          # The CRD stations are providing MLLW, CRD_OFFSET, and a
          # disclaimer.  The behavior of the web site when CRD is selected is
          # surprising, so for now I'm doing nothing with CRD_OFFSET.
	  mllw = datums["datums"].select{|d| d["name"] == "MLLW"}
	  msl  = datums["datums"].select{|d| d["name"] == "MSL"}
	  raise "Repeated datums" if mllw.length > 1 or msl.length > 1
	  if mllw.length == 1 and msl.length == 1
	    datumkind = "Mean Lower Low Water"
	    datum = msl[0]["value"] - mllw[0]["value"]
	  end
	end
      end
      if datum.nil?
	datumkind = "Mean Astronomical Tide"
	datum = 0.0
	comments = append(comments, "Datum not available from MDAPI")
      end

      constants = readStationConstants(sid)

      # Warn about LST discrepancies
      m2 = constants[0]
      raise "M2 not where expected" if m2["name"] != "M2"
      m2merid = meridFromM2(m2["phase_GMT"], m2["phase_local"])
      tzmerid = TZmerids[tz]
      if tzmerid != m2merid
	print "LST problem: ", sid, " ", name, "\n"
	case state
	when "AK"
	  warn = "NOS uses UTC%+d for LST; " % m2merid
	  warn += (tz == ":America/Adak" ?
		   "HAST (since 1983) is UTC-10" :
		   "AKST (since 1983) is UTC-9")
	else
	  warn = "NOS uses UTC%+d for LST" % m2merid
	end
	notes = append(notes, warn)
      end

      # Name clashes:  since tide refs go first, the only clashes will be
      # multiple data sets with exactly the same name, like
      # Grand Isle, Mississippi River Delta, Louisiana
      # 8761720 *and* 8761724
      if nameInUse(db, name)
        stupidSuffixNumber = 1
	begin
	  stupidSuffixNumber += 1
	  tryname = name + " (" + stupidSuffixNumber.to_s + ")"
        end while nameInUse(db, tryname)
	name = tryname
      end

      if newStation
	mname = db.escape_string(name)
	renamings.print "-- #{sid} #{lat} #{lng}\n"
        if !fulltname.nil? and fulltname != fullhname
          renamings.print "-- hname: #{db.escape_string(fullhname)}\n"
          renamings.print "-- tname: #{db.escape_string(fulltname)}\n"
        end
        gtext = (sbgt.include?(sid) ? "geogroup" : "geogroup (GUESSED)")
        if orig_geogroup != geogroup
          renamings.print "-- orig_#{gtext}: #{orig_geogroup}\n"
        end
        renamings.print "-- #{gtext}: #{geogroup}\n"
        renamings.print "update data_sets\n"
        renamings.print "  set state = ",
          (state.nil? ? "null" : "'#{state}'"), ",\n"
        renamings.print "  name = '#{mname}'\n  where name = '#{mname}';\n"
      end

      # 2018-12-26 13:22
      # Changed insert to use returning so oids no longer needed.
      res = db.exec("insert into data_sets (name, original_name, station_id_context, station_id, lat, lng, timezone, state, country, units, source, notes, comments, restriction, meridian, datumkind, datum, xfields) values ($1, $2, 'NOS', $3, $4, $5, $6, $7, $8, 'feet', 'CO-OPS Metadata API', $9, $10, 'Public domain', '0:00', $11, $12, $13) returning index", [name, hname, sid, lat, lng, tz, state, country, notes, comments, datumkind, datum, xfields])
      index = res.getvalue(0,0).to_i

      # Add the constants, verifying that speeds match and trying to identify
      # any constituents that aren't in there by such name.
      missingConstituents = false
      const_added = 0
      constants.each {|constant|
	amp = constant["amplitude"]
	next if amp < 0.00005
	constname = constant["name"]
	phase = constant["phase_GMT"]
	phase = 0 if phase == 360  # This happens
	if phase < 0 or phase >= 360
	  print "Station ", sid, " constituent ", constname, " phase = ", phase, "\n"
          phase %= 360
	  print "  Normalized to ", phase, "\n"
	end
	constname = aliases[constname] if aliases.include?(constname)
	speed = constant["speed"]
	if !speeds.include?(constname)
	  print "Station ", sid, " constituent ", constname, " not in harmbase\n"
	  maybe = findBySpeed(speeds, speed)
	  if maybe.empty?
	    puts "No constituent with matching speed found"
	  else
	    print "Did you mean ", maybe.join(", or "), "?\n"
	  end
	  missingConstituents = true
	  next
	end
	raise "Bad speed" if (speed - speeds[constname]).abs > SpeedTolerance
	res = db.exec("insert into constants values ($1, $2, $3, $4)",
		      [index, constname, phase, amp])
	const_added += 1
      }
      raise "Unknown constituent" if missingConstituents
      if const_added.zero?
	print "** No harmonic constants added for #{sid} #{name}\n"
	raise "Broken station"
      end
    }
  }
end
