# $Id: importNOS_Geogroups.rb 7589 2022-01-02 22:31:42Z flaterco $
# Functions for importNOS.rb:
# Working with -tide- geogroups

# Fix up level 6 geogroup names; make them presentable or nil them out.
# Those that do not appear in the hash are left unchanged.
L6rewrite = {
  "CONNECTICUT, Long Island Sound" => "Long Island Sound",
  "Delaware Bay, Western Shore" => "Delaware Bay",
  "Nassau Sound and Fort George River" => nil,
  "Hawaiin Islands" => nil,
  "British Columbia and Alaska" => nil,
  "Gulf of Alaska" => nil,
  "Eastport to Swans Island" => nil,
  "MARYLAND, outer coast" => nil,
  "MASSACHUSETTS, outer coast" => nil,
  "MAINE and NEW HAMPSHIRE" => nil,
  "Long Island Sound, north side" => "Long Island Sound",
  "Delaware River-cont." => "Delaware River",
  "SOUTH CAROLINA, outer coast" => nil,
  "VIRGINIA, outer coast" => nil,
  "FLORIDA, St. Johns River" => "St. Johns River",
  "Tybee Creek and Wassaw Sound" => nil,
  "Southern California" => nil,
  "Outer Coast" => nil,
  "RHODE ISLAND, Outer Coast" => nil,
  "Chesapeake Bay, Eastern Shore" => "Chesapeake Bay",
  "FLORIDA, East Coast" => nil,
  "MAINE, outer coast" => nil,
  "Delaware Bay, Eastern Shore" => "Delaware Bay",
  "Long Island, Long Island Sound" => "Long Island",
  "DELAWARE, outer coast" => nil,
  "Chesapeake Bay, Western Shore-cont." => "Chesapeake Bay",
  "Florida Keys" => nil,
  "St. Catherines and Sapelo Sounds" => nil,
  "Cohasset Harbor to Davis Bank" => nil,
  "Chesapeake Bay, western shore" => "Chesapeake Bay",
  "Doboy and Altamaha Sounds" => nil,
  "MAINE, outer coast-cont." => nil,
  "Nantucket Sound, north side" => "Nantucket Sound",
  "SOUTH CAROLINA, outer coast-cont." => nil,
  "Dall Island, west coast" => "Dall Island",
  "Outer Coast, Port San Luis" => "Port San Luis",
  "The Kills and Newark Bay" => nil,
  "Lesser Antilles & Virgin Islands" => nil,
  "Meares Passage to Davidson Inlet" => nil,
  "Gulf Coast" => nil,
  "Gulf Coast-cont." => nil,
  "Lower New York Bay, Raritan Bay, etc." => nil,
  "Chesapeake Bay, southern shore" => "Chesapeake Bay",
  "Yaquina Bay and River" => nil,
  "Outer Coast, San Francisco" => "San Francisco",
  "Possession Sound and Port Susan" => nil,
  "Baranof Island, west coast" => "Baranof Island",
  "Saratoga Passage and Skagit Bay" => nil,
  "Salisbury Sound and Peril Strait" => nil,
  "Rosario Strait, etc." => nil,
  "Chichagof Island, west coast" => "Chichagof Island",
  "Kenai Peninsula, outer coast" => "Kenai Peninsula",
  "Kodiak and Afognak Islands" => nil,
  "Kuskokwim Bay and River" => nil,
  "NORTH CAROLINA, outer coast" => nil
}

# Another level of fixups after L6rewrite, applied to [state, region,
# subregion].  Note that import will overwrite the state with its own guess
# anyway.
def fixgeogroup(g)
  if !g[2].nil? and g[2].end_with?(", etc.")
    return [g[0], g[1], nil]
  elsif g[1] == "Delaware River"
    states = ["Pennsylvania", "Delaware", "New Jersey"]
    states.each{|state|
      suffix = ", " + state
      if !g[2].nil? and g[2].end_with?(suffix)
        return [state, g[1], g[2].chomp(suffix)]
      end
    }
  end
  g
end

# Navigate upward and return the geogroups of a specified level as an array
# of names.  (lvl should be int)
# Does not modify l4 or l6.
# Does not suffix the parent levels.
# Duplicates resulting from diamond inheritance are removed.
# Static function called only by stationGeoLevelName.
def geoLevelName(gt, ggid, lvl)
  if gt.include?(ggid)
    g = gt[ggid]
    l = g[:level]
    if l > lvl
      g[:parents].map{|p| geoLevelName(gt, p, lvl)}.flatten.uniq
    elsif l == lvl
      [g[:name]]
    else
      []
    end
  else
    []
  end
end

# Ibid. starting with station ID instead of geogroup ID.
# This is a leftover from the pre-2020 geogroups code which doesn't really
# need to be replaced.
# Callers (all in DodgyCode):
#   isMaybeNotFreeData
#   westernAleutian?
#   guessState
def stationGeoLevelName(gt, sbgt, sid, lvl)
  if sbgt.include?(sid)
    geoLevelName(gt, sbgt[sid]["parentGeoGroupId"].to_i, lvl)
  else
    []
  end
end

# 2020-12
# Transitioned to simplified geogroups system used for currents:
# state, region, subregion = l5, l6, l7
# One subregion is incorrectly placed at l2.
# Islands, other countries are placed randomly in l5..l7.
#
# As of 2017:
# Level 2 is only Elizabeth River; should be 7.
# There is nothing at levels 1 or 3.
# Level 4 is West Coast, East Coast, Gulf Coast, Pacific, Caribbean Islands.
# State names are level 5.
# Level 6 is a mixed bag; some need to be suppressed.  Some include state.
# Level 7 is the bottom and always good.
# The only ones with multiple parents are Potomac River (DC/MD/VA) and
# Columbia River (WA/OR).

# Translate a geogroup ID to [[state, region, subregion], ...].
# Returns an array because a given geogroup can have multiple parents and we
# have no clue which way to go.
# L4 is suppressed.  L6 is rewritten or suppressed per L6rewrite.
def getTideGeogroupsByGgid(gt, ggid)
  if gt.include?(ggid)
    g = gt[ggid]
    case g[:level]
    when 0, 2, 7 # subregion (level null becomes 0)
      myname = g[:name]
      raise "Nil subregion" if myname.nil?
      p = g[:parents].map{|pgid| getTideGeogroupsByGgid(gt, pgid)}.flatten(1)
      (p.empty? ? [[nil, nil, myname]] : p.map{|g| [g[0], g[1], myname]})
    when 6 # region
      myname = g[:name]
      raise "Nil region" if myname.nil?
      myname = L6rewrite[myname] if L6rewrite.include?(myname) # can become nil
      p = g[:parents].map{|pgid| getTideGeogroupsByGgid(gt, pgid)}.flatten(1)
      (p.empty? ? (myname.nil? ? [] : [[nil, myname, nil]])
                : p.map{|g| [g[0], myname, g[2]]})
    when 5 # state
      # l5 includes:
      #   Normal long state names
      #   Washington DC
      #   American Samoa
      #   Tokelau
      #   nil
      # May as well standardize state names here.
      mystate = g[:name]
      if mystate.nil?
        []
      elsif mystate == "Washington DC"
        [["DC", nil, nil]]
      else
        [[mystate, nil, nil]]
      end
    else
      []
    end
  else
    []
  end
end

# Test or dump
def tideGeogroupTest(gt)
  # ggids = [1526, 1672, 1548, 1542]
  # ggids = [1620] # Elizabeth River error
  # ggids = [1461, 1526, 1614] # Multiple parents
  # ggids = gt.keys.sample(5)
  # ggids = [1541] # [[nil, nil, nil]] happened (fixed)
  ggids = gt.keys
  ggids.each{|ggid|
    print ggid, "\n"
    groups_orig = getTideGeogroupsByGgid(gt, ggid)
    groups_fixed = groups_orig.map{|g| fixgeogroup(g)}
    print "  ", groups_orig, "\n"
    print "  ", groups_fixed, "\n" if groups_fixed != groups_orig
  }
end

# Translate a geogroup ID to [state, region, subregion].
# Exactly one answer guaranteed.  Disambiguated by state.
# Statecode can be nil if unknown, but may throw.
def getTideGeogroupByGgid(gt, ggid, statecode)
  g = getTideGeogroupsByGgid(gt, ggid)
  raise "g should never be nil" if g.nil?
  if g.empty?
    [nil, nil, nil]
  elsif g.length == 1
    g[0]
  else
    raise "Need statecode for resolution" if statecode.nil?
    raise "Unknown statecode" unless StateName.include?(statecode)
    state = StateName[statecode]
    g1 = g.select{|gg| gg[0] == state}
    raise "Resolution failed" if g1.length != 1
    g1[0]
  end
end

# Ibid. starting with station ID instead of geogroup ID.
def getTideGeogroupBySid(gt, sbgt, sid, statecode)
  if sbgt.include?(sid)
    getTideGeogroupByGgid(gt, sbgt[sid]["parentGeoGroupId"].to_i, statecode)
  else
    # [nil, nil, nil]
    # Interpolate like guessCurrentsGeogroups.
    gprevsid = gnextsid = nil
    sbgt.keys.each{|k|
      if k < sid
	gprevsid = k if gprevsid.nil? or k > gprevsid
      else
	gnextsid = k if gnextsid.nil? or k < gnextsid
      end
    }
    raise "Edge case" if gprevsid.nil? or gnextsid.nil?
    gprev = sbgt[gprevsid]["parentGeoGroupId"].to_i
    gnext = sbgt[gnextsid]["parentGeoGroupId"].to_i
    if gprev == gnext
      g = getTideGeogroupByGgid(gt, gprev, statecode)
      print "No geogroup for #{sid}; guessed #{g}\n"
      g
    else
      print "No geogroup for #{sid} and neighbors disagree\n"
      g1 = getTideGeogroupByGgid(gt, gprev, statecode)
      print "  Prev: #{gprevsid} #{g1}\n"
      g2 = getTideGeogroupByGgid(gt, gnext, statecode)
      print "  Next: #{gnextsid} #{g2}\n"
      if g1[0]==g2[0] and g1[1]==g2[1]
        [g1[0], g1[1], nil]
      elsif g1[0]==g2[0]
        [g1[0], nil, nil]
      else
        [nil, nil, nil]
      end
    end
  end
end
