# $Id: importNOS_DodgyCode.rb 7591 2022-01-03 00:53:26Z flaterco $
# Functions for importNOS.rb:
# Dodgy code for filling in the fields and metadata that we don't get.

# -tides-
# false: the station is in a US state, a territory or commonwealth of the US,
#   or a country in free association with the US.
# true: the station should be dropped.
#
# This is called before states are rationalized.  It has to work with what is
# in the record.  Longitude might be wrong too, but is used only for VI.
#
# // CA ends a little south of Imperial Beach.
# assert (lat >= 32.5);
def isMaybeNotFreeData(gt, sbgt, somerec)
  name = somerec["name"]
  badNameWords = [", B.C.", "Wales Island (Cannery)", "(N.Z.)", "(Chile)"]
  return true if badNameWords.any?{|badword| name.include?(badword)}

  blacklist = ["Antigua and Barbuda", "Bahamas", "Baja California",
    "Bermuda", "Bermuda Islands", "Cook Islands", "Cuba", "El Salvador",
    "Fiji", "French Polynesia", "Guatemala", "Haiti and Dominican Republic",
    "Honduras", "Jamaica", "Kiribati", "Mexico", "Nicaragua", "Tokelau",
    "United Kingdom",

    # 1778000 APIA, W. SAMOA / APIA (Observatory), Upolu Island
    # West Samoa became Independent State of Samoa in 1997
    "1778000"
  ]
  return true if blacklist.include?(somerec["state"])

  l6a = stationGeoLevelName(gt, sbgt, somerec["id"], 6)

  # "Lesser Antilles & Virgin Islands" is split between U.S. Virgin
  # Islands and everything else.  See also "British Columbia and Alaska."
  if l6a.length == 1 && l6a[0] == "Lesser Antilles & Virgin Islands"
    lat = somerec["lat"]
    lng = somerec["lng"]
    !((lat > 18.2 && lat < 18.38 && lng > -65.13 && lng < -64.65) ||
      (lat > 17.65 && lat < 17.82 && lng > -64.94 && lng < -64.54))

  else
    return true if !l6a.empty? && l6a.all?{|l6| blacklist.include?(l6)}

    l5a = stationGeoLevelName(gt, sbgt, somerec["id"], 5)
    return true if !l5a.empty? && l5a.all?{|l5| blacklist.include?(l5)}

    blacklist.include?(somerec["id"])
  end
end

# -currents-
# Precedent:  a list of 7 hard-coded names in importNOSSC.pgcc.
def isMaybeNotFreeCurrent(sid)
  [
    # WA-BC border (on Wrangell)
    "PCT2576", "PCT2586", "PCT2591", "PCT2601", "PCT2606", "PCT2611",
    "PCT2616",
    # AK-BC border
    "PCT1391", "PCT1411", "PCT2661", "PCT2666", "PCT2671", "PCT2691",
    "PCT2696", "PCT2701", "PCT2706"
  ].include?(sid)
end

# -tides-
# Detect western Aleutians before name gets finalized.
W_Aleut_Islands = [
  "Rat Islands",   # l7 geogroup, 3 stations; also, in currents.json,
                   #   "Krysi Pass, Rat Islands" and
                   #   "Sea Lion Pass, Rat Islands"
  "Attu Island",   # l7 geogroup, 5 stations (1 ref)
  "Shemya Island", # station name in tides.json, "Alcan Harbor, Shemya Island"
  "Agattu Island"  # station name in tides.json, "Otkriti Bay, Agattu Island"
]
def westernAleutian?(gt, sbgt, wAleutSids, tname, sid)
  return true if wAleutSids.include?(sid)
  l7a = stationGeoLevelName(gt, sbgt, sid, 7)
  return true unless (l7a & W_Aleut_Islands).empty?
  !tname.nil? && W_Aleut_Islands.any? {|island| tname.include?(island)}
end

# From USPS 2-character codes and irregular forms appearing in harcon.json to
# long state names.
StateName = {
  "AK" => "Alaska",
  "AL" => "Alabama",
  "AS" => "American Samoa",
  "CA" => "California",
  "CT" => "Connecticut",
  "DC" => "DC",
  "DE" => "Delaware",
  "FL" => "Florida",
  "FM" => "FSM",
  "GA" => "Georgia",
  "GU" => "Guam",
  "HI" => "Hawaii",
  "LA" => "Louisiana",
  "MA" => "Massachusetts",
  "MD" => "Maryland",
  "ME" => "Maine",
  "MH" => "Marshall Islands",
  "MP" => "Marianas",     # or "Northern Mariana Islands" (vs. Guam)
  "MS" => "Mississippi",
  "NC" => "North Carolina",
  "NH" => "New Hampshire",
  "NJ" => "New Jersey",
  "NY" => "New York",
  "OR" => "Oregon",
  "PA" => "Pennsylvania",
  "PR" => "Puerto Rico",
  "PW" => "Palau",
  "RI" => "Rhode Island",
  "SC" => "South Carolina",
  "TX" => "Texas",
  "VA" => "Virginia",
  "VI" => "Virgin Islands",   # or "US Virgin Islands"
  "WA" => "Washington",

  # Irregular forms
  "United States of America" => nil,
  "US" => nil,
  "American Samoa" => "American Samoa",
  "Micronesia, Federated Sta" => "FSM",
  "Virgin Islands, US" => "Virgin Islands"

  # Other abbreviations that have been known to appear:
  #   BA = Bahamas (independent)
  #     but ISO 3166 says BA = Bosnia & Herzegovina.
  #   MT = Madero-Tampico (Tamaulipas, Mexico)
  #     but ISO 3166 says MT = Malta.
}

# From long state names, L5 geogroups, and variants to USPS 2-character codes.
# Tokelau (New Zealand) also appears in L5 for some reason.
StateCode = {
  "Alaska" => "AK",
  "Alabama" => "AL",
  "American Samoa" => "AS",
  "California" => "CA",
  "Connecticut" => "CT",
  "DC" => "DC",
  "D.C." => "DC",
  "Washington DC" => "DC",
  "Washington D.C." => "DC",
  "Washington, DC" => "DC",
  "Washington, D.C." => "DC",
  "Delaware" => "DE",
  "Florida" => "FL",
  "FSM" => "FM",
  "F.S.M." => "FM",
  "Georgia" => "GA",
  "Guam" => "GU",
  "Hawaii" => "HI",
  "Louisiana" => "LA",
  "Massachusetts" => "MA",
  "Maryland" => "MD",
  "Maine" => "ME",
  "Marshall Islands" => "MH",
  "Marianas" => "MP",
  "Micronesia, Federated Sta" => "FM",
  "Mississippi" => "MS",
  "North Carolina" => "NC",
  "New Hampshire" => "NH",
  "New Jersey" => "NJ",
  "New York" => "NY",
  "Oregon" => "OR",
  "Palau" => "PW",
  "Pennsylvania" => "PA",
  "Puerto Rico" => "PR",
  "Rhode Island" => "RI",
  "South Carolina" => "SC",
  "Texas" => "TX",
  "Virginia" => "VA",
  "Virgin Islands" => "VI",
  "Virgin Islands, US" => "VI",
  "Washington" => "WA"
}

# Return the state code from the nearest station of any kind appearing in
# data_sets_old.
# [0] state code
# [1] distance in units relative to sphere size
# Relying on this is a Faustian bargain:  it is inevitable that it will
# introduce errors, and any errors that exist it will amplify.
def guessStateDumb(stateMap, lat, lng)
  xyz = to_xyz(lat, lng)
  res = stateMap.min_by{|k,v| distfn(xyz,k)}
  [res[1], Math::sqrt(distfn(xyz, res[0]))]
end

# -tides-
# Guess the state for new stations.  Takes coordinates redundantly with
# somerec because of W Aleutians longitude problem.
def guessState(gt, sbgt, stateMap, somerec, lat, lng)
  sid = somerec["id"]

  # Columbia River in WA:
  # "9440079": # Beacon Rock State Park
  # "9440357": # TEMCO Kalama Terminal
  # "9440483": # Barlow Point
  # Palau:
  # "TPT2631"  # Shonian Harbor
  # "TPT2633"  # Koror
  return "WA" if ["9440079", "9440357", "9440483"].include?(sid)
  return "PW" if ["TPT2631", "TPT2633"].include?(sid)

  # The geogroup state is often wrong.
  # - Near bodies of water that have the state line running down the middle
  # - When the state line is at the entrance of the stream in the subregion,
  #   which empties into the river in the region
  # - Occasionally just dead wrong
  # State fields in tsta and hsta are [now] more accurate.
  state = somerec["state"]
  unless state.nil? or state.empty?
    # Fix up irregular forms appearing in JSON records.
    state = "AS" if state == "American Samoa"
    state = "FM" if state == "Micronesia, Federated Sta"
    state = nil if state == "United States of America"
    return state
  end

  # Could add workaround here to detect Guam by lat lng.
  # Also Marshall Islands.
  # This misses the corrections made by fixgeogroup, but import will
  # overwrite the state with its own guess anyway.
  l5a = stationGeoLevelName(gt, sbgt, sid, 5)
  if l5a.length == 1
    l5 = l5a[0]
    return StateCode[l5] if StateCode.include?(l5)
  end

  # So... it has come to this.
  state, dist = guessStateDumb(stateMap, lat, lng)
  print "Guessing state %s (d=%.0e) for %s %s lat %0.4f lng %0.4f\n" %
    [state, dist, sid, somerec["name"], lat, lng]
  state
end

# Assign a country on the assumption that MaybeNotFreeData has been filtered
# out and the state code has been rationalized by guessState.  Territories
# and commonwealths of the US are called USA.  We just have to identify other
# "associated states."
def guessCountry(state, name)
  if state.nil?
    if name.include?("FSM") or
	name.include?("F.S.M.") or
	name.include?("Micronesia")
      "FSM"   # Federated States of Micronesia
    elsif name.include?("Marshall Islands")
      "RMI"   # Republic of the Marshall Islands
    elsif name.include?("Palau")
      "Palau"
    else
      "USA"
    end
  else
    case state
    when "FM"
      "FSM"
    when "MH"
      "RMI"
    when "PW"
      "Palau"
    else
      "USA"
    end
  end
end

def guessTimezone(state, name, lat, lng)
  case state

  # States with only one time zone
  when "PR"
    ":America/Puerto_Rico"
  when "VI"
    ":America/St_Thomas"
  when "ME", "NH", "MA", "RI", "CT", "NY", "PA", "NJ", "DE", "MD", "DC", "VA",
      "NC", "SC", "GA"
    ":America/New_York"
  when "AL", "MS", "LA", "TX"
    ":America/Chicago"
  when "WA", "OR", "CA"
    ":America/Los_Angeles"
  when "HI"
    ":Pacific/Honolulu"
  when "AS"
    ":Pacific/Pago_Pago"
  when "GU"
    ":Pacific/Guam"
  when "MP"
    ":Pacific/Saipan"
  when "PW"
    ":Pacific/Palau"

  # Hard cases
  when "FL"
    # Port St. Joe is the westernmost spot on Eastern time.
    # As of 2017, longitude is quoted as -85.3133.
    lng < -85.31334 ? ":America/Chicago" : ":America/New_York"
  when "FM"
    # Three zones (Chuuk, Pohnpei, and Kosrae).
    # Yap and Truk are both backward-compatibility links to Chuuk.
    if name.include?("Chuuk") or
       name.include?("Ifalik")
      ":Pacific/Chuuk"
    else
      print "** Unsupported FM case: #{name}\n"
      raise "Unsupported FM case in guessTimeZone"
    end
  when "MH"
    if name.include?("Kwajalein")
      ":Pacific/Kwajalein"
    elsif name.include?("Wake")
      ":Pacific/Wake"
    else
      print "** Unsupported MH case: #{name}\n"
      raise "Unsupported MH case in guessTimeZone"
    end
  when nil
    if name.include?("Johnston")
      ":Pacific/Johnston"
    elsif name.include?("Midway")
      ":Pacific/Midway"
    else
      print "** Unsupported nil case: #{name}\n"
      raise "Unsupported nil case in guessTimeZone"
    end

  # The *really* hard case.
=begin
  ALASKAN TIME ZONES

  Both before and after 1983, there was a time zone change as you headed
  out the Aleutians.  However, the location of the dividing line moved.
  Five distinct zones result (tzdata2004g):

  Zone America/Juneau      15:02:19 -     LMT     1867 Oct 18
			   -8:57:41 -     LMT     1900 Aug 20 12:00
			   -8:00  -       PST     1942
			   -8:00  US      P%sT    1946
			   -8:00  -       PST     1969
			   -8:00  US      P%sT    1983 Oct 30 2:00
			   -9:00  US      Y%sT    1983 Nov 30
			   -9:00  US      AK%sT
  Zone America/Yakutat     14:41:05 -     LMT     1867 Oct 18
			   -9:18:55 -     LMT     1900 Aug 20 12:00
			   -9:00  -       YST     1942
			   -9:00  US      Y%sT    1946
			   -9:00  -       YST     1969
			   -9:00  US      Y%sT    1983 Nov 30
			   -9:00  US      AK%sT
  Zone America/Anchorage   14:00:24 -     LMT     1867 Oct 18
			   -9:59:36 -     LMT     1900 Aug 20 12:00
			  -10:00  -       CAT     1942
			  -10:00  US      CAT/CAWT 1946
			  -10:00  -       CAT     1967 Apr
			  -10:00  -       AHST    1969
			  -10:00  US      AH%sT   1983 Oct 30 2:00
			   -9:00  US      Y%sT    1983 Nov 30
			   -9:00  US      AK%sT
  Zone America/Nome        12:58:21 -     LMT     1867 Oct 18
			  -11:01:38 -     LMT     1900 Aug 20 12:00
			  -11:00  -       NST     1942
			  -11:00  US      N%sT    1946
			  -11:00  -       NST     1967 Apr
			  -11:00  -       BST     1969
			  -11:00  US      B%sT    1983 Oct 30 2:00
			   -9:00  US      Y%sT    1983 Nov 30
			   -9:00  US      AK%sT
  Zone America/Adak        12:13:21 -     LMT     1867 Oct 18
			  -11:46:38 -     LMT     1900 Aug 20 12:00
			  -11:00  -       NST     1942
			  -11:00  US      N%sT    1946
			  -11:00  -       NST     1967 Apr
			  -11:00  -       BST     1969
			  -11:00  US      B%sT    1983 Oct 30 2:00
			  -10:00  US      AH%sT   1983 Nov 30
			  -10:00  US      HA%sT

  Statutory time zone boundaries in Alaska, pre-1983
  --------------------------------------------------

  Excerpted from Code of Federal Regulations, 1982.  This language came
  from 35 FR 12318, 1970-08-01, which was a rewrite of previous text
  that had become chaotic from too many amendments.

  49 CFR 71.10

    The fifth zone, Pacific standard time zone, includes the part of the
    United States that is west of the boundary line between the mountain
    and Pacific standard time zones described in  71.9 and east of 137
    degrees W. longitude.

  49 CFR 71.11

    The sixth zone, the Yukon Standard Time Zone, includes the part of
    the United States that is between 137 degrees W. longitude and 141
    degrees W. longitude.

  49 CFR 71.12

    The seventh zone, the Alaska-Hawaii standard time zone, includes
    that part of the United States that is between 141 deg. W. longitude
    and 162 deg. W longitude and including all of the State of Hawaii.

  49 CFR 71.13

    The eighth zone, the Bering standard time zone, includes that part
    of the United States that is between 162 deg. W. longitude and 172
    deg. 30' W. longitude and that part of the Aleutian Islands that is
    west of 172 deg. 30' W. longitude, but does not include any part of
    the State of Hawaii.

  Statutory time zone boundaries in Alaska, post-1983
  ---------------------------------------------------

  Excerpted from Code of Federal Regulations, 2003.

  49 CFR 71.11

    The sixth zone, the Alaska standard time zone, includes the entire
    State of Alaska, except as provided in  71.12 of this title.

  49 CFR 71.12

    The seventh zone, the Hawaii-Aleutian standard time zone, includes
    the entire State of Hawaii and, in the State of Alaska, that part of
    the Aleutian Islands that is west of 169 degrees 30 minutes west
    longitude.

  tzdata2004g does not reflect a temporary shift of Juneau to Yukon time
  that occurred the summer of 1980.

  Numerous current sources report that St. Lawrence Island observes
  Hawaii-Aleutian time (Google "St. Lawrence Island" "Hawaii-Aleutian").
  The 2003 CFR do not clearly address St. Lawrence Island at all, though
  by default, it appears that it should be on Alaska time.
=end
  when "AK"
    if lng > -137 && lng < 0 # The Aleutians cross 180
      # Formerly Pacific time (-8), now Alaskan time (-9).
      ":America/Juneau"

      # Other than the first ("east of 137"), the pre-1983 disposition of
      # places on the boundary lines is not defined.  Let them go west.

    elsif lng > -141 && lng <= -137
      # Formerly Yukon time (-9), now Alaskan time (-9).
      ":America/Yakutat"

    elsif lng > -162 && lng <= -141
      # Formerly Alaska-Hawaii time (-10), now Alaskan time (-9).
      ":America/Anchorage"

      # 2003 CFR puts places exactly at 169.5 in Alaskan time.
      # (HAST is "west of 169 degrees 30 minutes")

      # Exceptions for islands north of Aleutians.  No place on the mainland
      # is west of -169.5.  The exclusion box is St. Lawrence Island.
    elsif ((lng >= -169.5 && lng <= -162) || (lng < -169.5 && lat > 56)) &&
    !(lat >= 62.8314 && lat <= 63.8614 && lng >= -171.9927 && lng <= -168.3973)
      # Formerly Bering time (-11), now Alaskan time (-9).
      ":America/Nome"

    else
      # Formerly Bering time (-11), now Hawaii-Aleutian time (-10).
      ":America/Adak"
    end

  else
    raise "Unrecognized state in guessTimeZone: " + state
  end
end

# Map back to standard time meridians (as of 2017) for validation.
# Uninhabited Johnston was arbitrated to Hawaii time by zoneinfo.
# Some other uninhabited islands don't have zones, get :Pacific/Honolulu.
TZmerids = {
  ":America/Puerto_Rico" => -4,
  ":America/St_Thomas" => -4,
  ":America/New_York" => -5,
  ":America/Chicago" => -6,
  ":America/Los_Angeles" => -8,
  ":America/Juneau" => -9,
  ":America/Yakutat" => -9,
  ":America/Anchorage" => -9,
  ":America/Nome" => -9,
  ":America/Adak" => -10,
  ":Pacific/Honolulu" => -10,
  ":Pacific/Johnston" => -10,
  ":Pacific/Midway" => -11,
  ":Pacific/Pago_Pago" => -11,
  ":Pacific/Kwajalein" => 12,
  ":Pacific/Majuro" => 12,
  ":Pacific/Wake" => 12,
  ":Pacific/Guam" => 10,
  ":Pacific/Saipan" => 10,
  ":Pacific/Truk" => 10,
  ":Pacific/Chuuk" => 10,
  ":Pacific/Yap" => 10,
  ":Pacific/Palau" => 9
}

# Back the meridian out of the M2 phases for validation.
def meridFromM2(m2utc, m2lst)
  raise "Negative phases in meridFromM2" if m2utc < 0 or m2lst < 0

  for merid in -13..13
    guess = (m2utc + 28.9841042*merid) % 360
    return merid if (guess - m2lst).abs <= 0.15
  end

  # There is a tiny minor chance that roundoff will cause guess and m2lst to
  # end up on opposite sides of the 0/360 line, but it hasn't happened yet.
  raise "meridFromM2 failed"
end

# Fixups for naming.
# Precedents:
#   201906_Salish_Puget/round2/import.rb
#   parse-oldcurlists.rb hdr2_fixups
#   guessLongStationName(hname, tname, state, geog)

# The following patterns are replaced by the matched text downcased.
DowncasePatterns = [
  / \(?(north|northeast|northwest|south|southeast|southwest|east|west) (side|of|end|approach|entrance|jetty|passage|channel)/i
]

# The following patterns are replaced by the string provided.
LiteralPatterns = [
  [/\s{2,}/, " "],  # remove extra whitespace

  [/ N\. Jetty/i, " north jetty"],
  [/ S\. Jetty/i, " south jetty"],
  [/ E\. Jetty/i, " east jetty"],
  [/ W\. Jetty/i, " west jetty"],
  [/ \(N\. Jetty\)/i, " (north jetty)"],
  [/ \(S\. Jetty\)/i, " (south jetty)"],
  [/ \(E\. Jetty\)/i, " (east jetty)"],
  [/ \(W\. Jetty\)/i, " (west jetty)"],

  [" Entrance", " entrance"],
  [" Ent.", " ent."],
  [" Ent,", " ent.,"],
  [" ent,", " ent.,"],
  [" Entr.", " ent."],
  [" entr.", " ent."],
  [" Entr,", " ent.,"],
  [" entr,", " ent.,"],
  [" Ent ", " ent. "],
  [" ent ", " ent. "],
  [" Approach", " approach"],

  ["St ", "St. "],
  [" of.", " of"],
  [" OF ", " of "],
  [/ at /i, " at "],
  ["Pass.", "Pass"],
  [/No.(\d)/i, "No. \\1"],
  ["U.S.", "US"],
  ["D.C.", "DC"],
  ["F.S.M.", "FSM"],
  ["Federated States of Micronesia", "FSM"],
  [/Northern Marianas? Islands/, "Marianas"],

  # Exceptions
  ["Six mile Reef", "Six Mile Reef"],  # This escalates quickly if not fixed
  ["Dixon entrance", "Dixon Entrance"],

  # Standardize symbols for non-SI units (c.f. ucum.org).
  # The ones saying miles probably mean nmi, but they said what they said.
  [/n\.?mi\.?/, "nmi"],
  [/ nm /i, " nmi "],
  [/nautical miles?/i, "nmi"],
  [/ miles? /, " mi "],
  [" mi. ", " mi "],
  [/(\d)nm/, "\\1 nmi"],
  [/(\d)miles?/, "\\1 mi"],
  [/(\d)mi\.?/, "\\1 mi"],
  [" yds.", " yd"],
  [" yds ", " yd "],

  # Remove excessive commas.
  # Point Beenar, 100 yd, NE of
  # Mark Island, 0.3 nmi, SSE of
  [/ (yd|nmi), ([NESW]{1,3}) of/, " \\1 \\2 of"],

  # Bowie.
  [" Bouy ", " Buoy "],
  [" bouy ", " buoy "],
  [/Buoy '(.{1,4})'/i, "Buoy \\1"],
  [/Buoy "(.{1,4})"/i, "Buoy \\1"],
  [/Marker '(.{1,4})'/i, "Marker \\1"],
  [/Marker "(.{1,4})"/i, "Marker \\1"],
  ["Buoy R '8'", "Buoy R8"],
  [/Range '(.{1,4})'/i, "Range \\1"],
  [/Jetty '(.{1,4})'/i, "Jetty \\1"],

  # Remove excessive parens.
  # Comma is OK, parens are OK, comma plus parens is excessive.
  # Landmark, (something)
  # Landmark, (something), ...
  # Landmark, (something) (depth ...
  # But beware of mismatched parens:
  # Landmark, (something (depth ...
  [/, \(([^()]+)\)/, ", \\1"],

  # The US Board on Geographic Names discourages apostrophes:
  #   https://geonames.usgs.gov/docs/pubs/DNC_PPP_DEC_2016_V.2.0.pdf, Ch. 5.
  # I'm not going to mess with e.g. Rich's Pt and Finn's Ledge, but the
  # zillions of St. Somebody's Somethings have been causing problems with
  # matching and the inconsistencies just look dumb (e.g., St. John's Point
  # on St. Johns River--it's the same John).
  [/St\. ([[:alpha:]]+)'s/, "St. \\1s"],

  # Fix "St. Croix, St. Croix Island, Virgin Islands"
  # "St. Johns" is problematic because of all the other places that are
  # actually St. Johns.
  [/St. Johns Island/, "St. John"],
  [/St.croix Island/, "St. Croix"],
  [/St. (John|Croix|Thomas) Island/, "St. \\1"],

  # Standardize
  ["Vieques Island", "Vieques"],
  [/Isla de Vieques/i, "Vieques"],

  ["Key west channel", "Key West Channel"],
  ["entrance Channel", "entrance channel"],
  ["Entrance to Eagle Harbor", "Eagle Harbor entrance"],
  ["Entrance to Ballard Locks", "Ballard Locks entrance"],
  ["Entrance to Windy Cove", "Windy Cove entrance"],
  ["West Amatuli Island, North", "West Amatuli Island, north"],
  ["- midstream", "(midstream)"],
  ["(Outside)", "(outside)"],
  [" of Bridge", " of bridge"],

  ["Johns Island. Main Road Bridge", "Johns Island, main road bridge"]
]

# Potentially unwanted abbreviations
# ----------------------------------
# Directions
# Ent, Ent.     entrance
# Entr, Entr.   entrance
# I, I. or Is.  island
# Lt.           light
# Pt or Pt.     point
# R or R.       river
# RR or RR.     railroad
# Rt.           route
# Hbr.          harbor
# Ra.           range
# Ch. or chan.  channel
# Cr.           creek
# Ft.           fort, foot, or feet
# Hwy.          highway
# St.           saint or street
# Ave.          avenue
# Sta.          station
# Fla.          Florida
# Term.         terminal

# Not fixed:  inconsistent forms of acronyms appearing as all of XYZ, X.Y.Z.,
# and X. Y. Z.

# Remaining questionable cases
# - Key West, misc. channels
# - Harbor Island West
# - Turn and Turning Basin
# - Rich's Pt, Finn's Ledge, ... but don't mangle Hawaiian or French names.

# Apply above listed replacements and capitalize the first letter, returning
# the result.
# Tides have a problem with names arriving in all caps.  Currents do not.
def fixname(n)
  nout = n.dup
  DowncasePatterns.each {|r| nout.gsub!(r) {|s| s.downcase}}
  LiteralPatterns.each {|r| nout.gsub!(r[0], r[1])}
  nout[0].upcase + nout[1..-1]
end

# -currents-
# If sid is in currentsGeogroups, return currentsGeogroups[sid].  Otherwise,
# guess.
def guessCurrentsGeogroups(currentsGeogroups, sid)
  if currentsGeogroups.include?(sid)
    currentsGeogroups[sid]
  else
    gprevsid = gnextsid = nil
    currentsGeogroups.keys.each{|k|
      if k < sid
	gprevsid = k if gprevsid.nil? or k > gprevsid
      else
	gnextsid = k if gnextsid.nil? or k < gnextsid
      end
    }
    raise "Edge case in guessCurrentsGeogroups" if gprevsid.nil? or gnextsid.nil?
    if currentsGeogroups[gprevsid] == currentsGeogroups[gnextsid]
      print "No geogroup for #{sid}; guessed #{currentsGeogroups[gprevsid]}\n"
      currentsGeogroups[gprevsid]
    else
      # Manually arbitrated cases
      if sid == "PCT0896"
	# After the rivers meet, but before the bay, is nothing
	["California", "Suisun Bay", nil]
      elsif sid == "PCT0901"
	["California", "Sacramento River", nil]
      elsif sid == "kb0201"
        # Per kb0101
        ["Georgia", "Cumberland Sound", "St. Mary's River"]
      elsif sid == "kb0401"
        ["Georgia", "Cumberland Sound", "East River"]
      else
	print "** No geogroup for #{sid} and neighbors disagree\n"
	print "** Prev: #{gprevsid} #{currentsGeogroups[gprevsid]}\n"
	print "** Next: #{gnextsid} #{currentsGeogroups[gnextsid]}\n"
	raise "Neighbors disagree"
      end
    end
  end
end

# Some repetitive code for generateName.
def addStateMaybeSuffix(name, state, suffix)
  # State names can legitimately appear in region and subregion names, so
  # suppress them only if they exactly duplicate the final clause and it's
  # not New York.
  if !state.nil?
    clause = ", #{state}"
    name += clause unless name.end_with?(clause) and state != "New York"
  end
  name += " #{suffix}" if !suffix.nil?
  name
end

# New & improved method of normalizing names.
# This started with currents but can/should be applied to tides.
# geogroups is [state, region, subregion], like table currents_geogroups.
# Depth can be nil.
# The length limit is determined in tcd.h,
#   #define ONELINER_LENGTH      90
#   NV_CHAR                 name[ONELINER_LENGTH];
# Null-terminated.
def generateName(cname, depth_ft, geogroups, suffix)
  # raise "Nil name passed to generateName" if cname.nil?
  return nil if cname.nil?
  raise "Empty name passed to generateName" if cname.empty?
  name = cname

  # This one got truncated in currents.json.
  if name == "Point Colville, 3.0 nm east of (Lawson Reef, 1 nm"
    # Should be this, but it's too long:
    # name = "Point Colville, 3.0 nmi E of (Lawson Reef, 1 nmi NW of)"
    name = "Point Colville, 3.0 nmi east of"
  end

  # Name is getting fixed twice because otherwise we get stupid match
  # failures like Mayport Naval Sta., St Johns River, St. Johns River.
  name = deAllCapsify(name) if isAllCaps(name)
  name = fixname(name)

  state, region, subregion = geogroups
  # Federated States of Micronesia
  # Northern Marianas Islands
  region = fixname(region) unless region.nil?
  # St. (Thomas|Croix|John) Island
  subregion = fixname(subregion) unless subregion.nil?

  # Guam is incorrectly placed in the Northern Mariana Islands.
  # Marianas = Northern Mariana Islands + Guam.
  # But we go by postcode.
  #   "MP" => "Marianas" (Northern Mariana Islands)
  #   "GU" => "Guam"
  # This workaround relies on the name saying "Guam" which it should not...
  if name.include?("Guam") and region == "Marianas"
    state = "Guam"
    region = nil
  end

  name += " (depth " + depth_ft.round.to_s + " ft)" unless depth_ft.nil?
  namestub = name
  # Roosevelt Is., west of, off 75th Street, Roosevelt Island, ...
  name += ", " + subregion if !subregion.nil? && !name.include?(subregion) &&
    !name.include?(subregion.sub("Island", "Is."))
  name += ", " + region if !region.nil? && !name.include?(region)
  name = addStateMaybeSuffix(name, state, suffix)
  name = fixname(name)
  if name.length > 89 and !subregion.nil?
    name = namestub
    name += ", " + region if !region.nil? && !name.include?(region)
    name = addStateMaybeSuffix(name, state, suffix)
    name = fixname(name)
  end
  if name.length > 89 and !region.nil?
    name = namestub
    name = addStateMaybeSuffix(name, state, suffix)
    name = fixname(name)
  end
  if name.length > 89
    print "** Failed to shorten name: #{name}\n"
    print "   Region #{region}\n"
    print "   Subregion #{subregion}\n"
    raise "Name shortening failure"
  end
  name
end
