Update correlation sources
[weather.git] / weather.py
index af43de4..78fd79c 100644 (file)
@@ -1,12 +1,12 @@
 """Contains various object definitions needed by the weather utility."""
 
 weather_copyright = """\
-# Copyright (c) 2006-2016 Jeremy Stanley <fungi@yuggoth.org>. Permission to
+# Copyright (c) 2006-2020 Jeremy Stanley <fungi@yuggoth.org>. Permission to
 # use, copy, modify, and distribute this software is granted under terms
 # provided in the LICENSE file distributed with this software.
 #"""
 
-weather_version = "2.3"
+weather_version = "2.4"
 
 radian_to_km = 6372.795484
 radian_to_mi = 3959.871528
@@ -1209,25 +1209,26 @@ def gecos(formatted):
     return tuple(coordinates)
 
 def correlate():
-    import codecs, datetime, hashlib, os, re, sys, tarfile, time, zipfile
+    import codecs, csv, datetime, hashlib, os, re, sys, tarfile, time, zipfile
     if pyversion("3"): import configparser
     else: import ConfigParser as configparser
-    gcounties_an = "2015_Gaz_counties_national.zip"
-    gcounties_fn = "2015_Gaz_counties_national.txt"
-    gcousubs_an = "2015_Gaz_cousubs_national.zip"
-    gcousubs_fn = "2015_Gaz_cousubs_national.txt"
-    gplace_an = "2015_Gaz_place_national.zip"
-    gplace_fn = "2015_Gaz_place_national.txt"
-    gzcta_an = "2015_Gaz_zcta_national.zip"
-    gzcta_fn = "2015_Gaz_zcta_national.txt"
     for filename in os.listdir("."):
-        if re.match("bp[0-9][0-9][a-z][a-z][0-9][0-9].dbx$", filename):
+        if re.match("[0-9]{4}_Gaz_counties_national.zip$", filename):
+            gcounties_an = filename
+            gcounties_fn = filename[:-4] + ".txt"
+        elif re.match("[0-9]{4}_Gaz_cousubs_national.zip$", filename):
+            gcousubs_an = filename
+            gcousubs_fn = filename[:-4] + ".txt"
+        elif re.match("[0-9]{4}_Gaz_place_national.zip$", filename):
+            gplace_an = filename
+            gplace_fn = filename[:-4] + ".txt"
+        elif re.match("[0-9]{4}_Gaz_zcta_national.zip$", filename):
+            gzcta_an = filename
+            gzcta_fn = filename[:-4] + ".txt"
+        elif re.match("bp[0-9]{2}[a-z]{2}[0-9]{2}.dbx$", filename):
             cpfzcf_fn = filename
-            break
     nsdcccc_fn = "nsd_cccc.txt"
-    zcatalog_an = "zonecatalog.curr.tar"
-    metartbl_fn = "metar.tbl"
-    coopstn_fn = "coop-stations.txt"
+    ourairports_fn = "airports.csv"
     overrides_fn = "overrides.conf"
     overrideslog_fn = "overrides.log"
     slist_fn = "slist"
@@ -1242,25 +1243,19 @@ def correlate():
 %s
 # generated by %s on %s from these public domain sources:
 #
-# http://www.census.gov/geo/maps-data/data/gazetteer2015.html
+# https://www.census.gov/geographies/reference-files/time-series/geo/gazetteer-files.html
 # %s %s %s
 # %s %s %s
 # %s %s %s
 # %s %s %s
 #
-# http://www.weather.gov/geodata/catalog/wsom/html/cntyzone.htm
+# https://www.weather.gov/gis/ZoneCounty/
 # %s %s %s
 #
-# http://tgftp.nws.noaa.gov/data/nsd_cccc.txt
+# https://tgftp.nws.noaa.gov/data/
 # %s %s %s
 #
-# http://tgftp.nws.noaa.gov/data/zonecatalog.curr.tar
-# %s %s %s
-#
-# http://www.nco.ncep.noaa.gov/pmb/codes/nwprod/dictionaries/metar.tbl
-# %s %s %s
-#
-# http://www.ncdc.noaa.gov/homr/reports
+# https://ourairports.com/data/
 # %s %s %s
 #
 # ...and these manually-generated or hand-compiled adjustments:
@@ -1303,21 +1298,11 @@ def correlate():
             datetime.datetime.fromtimestamp( os.path.getmtime(nsdcccc_fn) )
         ),
         nsdcccc_fn,
-        hashlib.md5( open(zcatalog_an, "rb").read() ).hexdigest(),
-        datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(zcatalog_an) )
-        ),
-        zcatalog_an,
-        hashlib.md5( open(metartbl_fn, "rb").read() ).hexdigest(),
-        datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(metartbl_fn) )
-        ),
-        metartbl_fn,
-        hashlib.md5( open(coopstn_fn, "rb").read() ).hexdigest(),
+        hashlib.md5( open(ourairports_fn, "rb").read() ).hexdigest(),
         datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(coopstn_fn) )
+            datetime.datetime.fromtimestamp( os.path.getmtime(ourairports_fn) )
         ),
-        coopstn_fn,
+        ourairports_fn,
         hashlib.md5( open(overrides_fn, "rb").read() ).hexdigest(),
         datetime.date.isoformat(
             datetime.datetime.fromtimestamp( os.path.getmtime(overrides_fn) )
@@ -1344,9 +1329,9 @@ def correlate():
     sys.stdout.flush()
     count = 0
     gcounties = zipfile.ZipFile(gcounties_an).open(gcounties_fn, "rU")
-    columns = gcounties.readline().decode("latin1").strip().split("\t")
+    columns = gcounties.readline().decode("utf-8").strip().split("\t")
     for line in gcounties:
-        fields = line.decode("latin1").strip().split("\t")
+        fields = line.decode("utf-8").strip().split("\t")
         f_geoid = fields[ columns.index("GEOID") ].strip()
         f_name = fields[ columns.index("NAME") ].strip()
         f_usps = fields[ columns.index("USPS") ].strip()
@@ -1367,9 +1352,9 @@ def correlate():
     sys.stdout.flush()
     count = 0
     gcousubs = zipfile.ZipFile(gcousubs_an).open(gcousubs_fn, "rU")
-    columns = gcousubs.readline().decode("latin1").strip().split("\t")
+    columns = gcousubs.readline().decode("utf-8").strip().split("\t")
     for line in gcousubs:
-        fields = line.decode("latin1").strip().split("\t")
+        fields = line.decode("utf-8").strip().split("\t")
         f_geoid = fields[ columns.index("GEOID") ].strip()
         f_name = fields[ columns.index("NAME") ].strip()
         f_usps = fields[ columns.index("USPS") ].strip()
@@ -1390,9 +1375,9 @@ def correlate():
     sys.stdout.flush()
     count = 0
     gplace = zipfile.ZipFile(gplace_an).open(gplace_fn, "rU")
-    columns = gplace.readline().decode("latin1").strip().split("\t")
+    columns = gplace.readline().decode("utf-8").strip().split("\t")
     for line in gplace:
-        fields = line.decode("latin1").strip().split("\t")
+        fields = line.decode("utf-8").strip().split("\t")
         f_geoid = fields[ columns.index("GEOID") ].strip()
         f_name = fields[ columns.index("NAME") ].strip()
         f_usps = fields[ columns.index("USPS") ].strip()
@@ -1412,51 +1397,22 @@ def correlate():
     sys.stdout.write(message)
     sys.stdout.flush()
     count = 0
-    slist = codecs.open(slist_fn, "rU")
+    slist = codecs.open(slist_fn, "rU", "utf-8")
     for line in slist:
         icao = line.split("#")[0].strip()
         if icao:
             stations[icao] = {
-                "metar": "http://tgftp.nws.noaa.gov/data/observations/"\
+                "metar": "https://tgftp.nws.noaa.gov/data/observations/"\
                     + "metar/decoded/%s.TXT" % icao.upper()
             }
             count += 1
     slist.close()
     print("done (%s lines)." % count)
-    message = "Reading %s..." % metartbl_fn
-    sys.stdout.write(message)
-    sys.stdout.flush()
-    count = 0
-    metartbl = codecs.open(metartbl_fn, "rU")
-    for line in metartbl:
-        icao = line[:4].strip().lower()
-        if icao in stations:
-            description = []
-            name = " ".join(
-                line[16:48].replace("_", " ").strip().title().split()
-            )
-            if name: description.append(name)
-            st = line[49:51].strip()
-            if st: description.append(st)
-            cn = line[52:54].strip()
-            if cn: description.append(cn)
-            if description:
-                stations[icao]["description"] = ", ".join(description)
-            lat = line[55:60].strip()
-            if lat:
-                lat = int(lat)/100.0
-                lon = line[61:67].strip()
-                if lon:
-                    lon = int(lon)/100.0
-                    stations[icao]["location"] = gecos( "%s,%s" % (lat, lon) )
-        count += 1
-    metartbl.close()
-    print("done (%s lines)." % count)
     message = "Reading %s..." % nsdcccc_fn
     sys.stdout.write(message)
     sys.stdout.flush()
     count = 0
-    nsdcccc = codecs.open(nsdcccc_fn, "rU", "latin1")
+    nsdcccc = codecs.open(nsdcccc_fn, "rU", "utf-8")
     for line in nsdcccc:
         line = str(line)
         fields = line.split(";")
@@ -1481,44 +1437,49 @@ def correlate():
         count += 1
     nsdcccc.close()
     print("done (%s lines)." % count)
-    message = "Reading %s..." % coopstn_fn
+    message = "Reading %s..." % ourairports_fn
     sys.stdout.write(message)
     sys.stdout.flush()
     count = 0
-    coopstn = open(coopstn_fn)
-    for line in coopstn:
-        icao = line[33:37].strip().lower()
+    ourairports = open(ourairports_fn, "rU")
+    for row in csv.reader(ourairports):
+        icao = row[12].decode('utf-8').lower()
         if icao in stations:
-            iata = line[22:26].strip().lower()
+            iata = row[13].decode('utf-8').lower()
             if len(iata) == 3: airports[iata] = { "station": icao }
             if "description" not in stations[icao]:
                 description = []
-                name = " ".join( line[99:129].strip().title().split() )
+                name = row[3].decode('utf-8')
                 if name: description.append(name)
-                st = line[59:61].strip()
-                if st: description.append(st)
-                country = " ".join( line[38:58].strip().title().split() )
-                if country: description.append(country)
+                municipality = row[10].decode('utf-8')
+                if municipality: description.append(municipality)
+                region = row[9].decode('utf-8')
+                country = row[8].decode('utf-8')
+                if region:
+                    if "-" in region:
+                        c,r = region.split("-", 1)
+                        if c == country: region = r
+                    description.append(region)
+                if country:
+                    description.append(country)
                 if description:
                     stations[icao]["description"] = ", ".join(description)
             if "location" not in stations[icao]:
-                lat = line[130:139].strip()
+                lat = row[4].decode('utf-8')
                 if lat:
-                    lat = lat.replace(" ", "-")
-                    lon = line[140:150].strip()
+                    lon = row[5].decode('utf-8')
                     if lon:
-                        lon = lon.replace(" ", "-")
                         stations[icao]["location"] = gecos(
                             "%s,%s" % (lat, lon)
                         )
         count += 1
-    coopstn.close()
+    ourairports.close()
     print("done (%s lines)." % count)
     message = "Reading %s..." % zlist_fn
     sys.stdout.write(message)
     sys.stdout.flush()
     count = 0
-    zlist = codecs.open(zlist_fn, "rU")
+    zlist = codecs.open(zlist_fn, "rU", "utf-8")
     for line in zlist:
         line = line.split("#")[0].strip()
         if line:
@@ -1526,69 +1487,76 @@ def correlate():
             count += 1
     zlist.close()
     print("done (%s lines)." % count)
-    message = "Reading %s:*..." % zcatalog_an
-    sys.stdout.write(message)
-    sys.stdout.flush()
-    count = 0
-    zcatalog = tarfile.open(zcatalog_an)
-    for entry in zcatalog.getmembers():
-        if entry.isfile():
-            fnmatch = re.match(
-                r"([a-z]+z[0-9]+)\.txt$",
-                os.path.basename(entry.name)
-            )
-            if fnmatch:
-                zone = fnmatch.group(1)
-                if zone in zones:
-                    data = zcatalog.extractfile(entry).readlines()
-                    description = data[0].decode("ascii").strip()
-                    zones[zone]["description"] = description
-                    for line in data[1:]:
-                        line = line.decode("latin1").strip()
-                        urimatch = re.match("/webdocs/pub/(.+):(.+) for ",
-                                            line)
-                        if urimatch:
-                            uritype = urimatch.group(2).lower().replace(" ","_")
-                            zones[zone][uritype]  = (
-                                "http://tgftp.nws.noaa.gov/%s"
-                                % urimatch.group(1))
-        count += 1
-    zcatalog.close()
-    print("done (%s files)." % count)
     message = "Reading %s..." % cpfzcf_fn
     sys.stdout.write(message)
     sys.stdout.flush()
     count = 0
     cpfz = {}
-    cpfzcf = open(cpfzcf_fn)
+    cpfzcf = codecs.open(cpfzcf_fn, "rU", "utf-8")
     for line in cpfzcf:
         fields = line.strip().split("|")
         if len(fields) == 11 \
             and fields[0] and fields[1] and fields[9] and fields[10]:
             zone = "z".join( fields[:2] ).lower()
             if zone in zones:
-                zones[zone]["centroid"] = gecos( ",".join( fields[9:11] ) )
-            elif fields[6]:
                 state = fields[0]
-                description = fields[3]
-                county = fields[5]
+                if state:
+                    zones[zone]["coastal_flood_statement"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "flood/coastal/%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["flash_flood_statement"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "flash_flood/statement/%s/%s.txt"
+                        % (state.lower(), zone))
+                    zones[zone]["flash_flood_warning"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "flash_flood/warning/%s/%s.txt"
+                        % (state.lower(), zone))
+                    zones[zone]["flash_flood_watch"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "flash_flood/watch/%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["flood_statement"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "flood/statement/%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["flood_warning"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "flood/warning/%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["severe_thunderstorm_warning"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "thunderstorm/%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["severe_weather_statement"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "severe_weather_stmt/%s/%s.txt"
+                        % (state.lower(), zone))
+                    zones[zone]["short_term_forecast"] = (
+                        "https://tgftp.nws.noaa.gov/data/forecasts/nowcast/"
+                        "%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["special_weather_statement"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "special_weather_stmt/%s/%s.txt"
+                        % (state.lower(), zone))
+                    zones[zone]["state_forecast"] = (
+                        "https://tgftp.nws.noaa.gov/data/forecasts/state/"
+                        "%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["urgent_weather_message"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "non_precip/%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["zone_forecast"] = (
+                        "https://tgftp.nws.noaa.gov/data/forecasts/zone/"
+                        "%s/%s.txt" % (state.lower(), zone))
+                description = fields[3].strip()
                 fips = "fips%s"%fields[6]
-                possible = [
-                    "%s, %s" % (county, state),
-                    "%s County, %s" % (county, state),
-                ]
-                if description.endswith(" Counties"):
-                    description = description[:-9]
-                for addition in description.split(" and "):
-                    possible.append( "%s, %s" % (addition, state) )
-                    possible.append( "%s County, %s" % (addition, state) )
-                if fips in places and "centroid" in places[fips]:
-                    for candidate in zones:
-                        if "centroid" not in zones[candidate] and \
-                            "description" in zones[candidate] and \
-                            zones[candidate]["description"] in possible:
-                            zones[candidate]["centroid"] = \
-                                places[fips]["centroid"]
+                county = fields[5]
+                if county:
+                    if description.endswith(county):
+                        description += " County"
+                    else:
+                        description += ", %s County" % county
+                description += ", %s, US" % state
+                zones[zone]["description"] = description
+                zones[zone]["centroid"] = gecos( ",".join( fields[9:11] ) )
+                if fips in places and not zones[zone]["centroid"]:
+                    zones[zone]["centroid"] = places[fips]["centroid"]
         count += 1
     cpfzcf.close()
     print("done (%s lines)." % count)
@@ -1597,9 +1565,9 @@ def correlate():
     sys.stdout.flush()
     count = 0
     gzcta = zipfile.ZipFile(gzcta_an).open(gzcta_fn, "rU")
-    columns = gzcta.readline().decode("latin1").strip().split("\t")
+    columns = gzcta.readline().decode("utf-8").strip().split("\t")
     for line in gzcta:
-        fields = line.decode("latin1").strip().split("\t")
+        fields = line.decode("utf-8").strip().split("\t")
         f_geoid = fields[ columns.index("GEOID") ].strip()
         f_intptlat = fields[ columns.index("INTPTLAT") ].strip()
         f_intptlong = fields[ columns.index("INTPTLONG") ].strip()
@@ -1967,7 +1935,7 @@ def correlate():
     count = 0
     if os.path.exists(stations_fn):
         os.rename(stations_fn, "%s_old"%stations_fn)
-    stations_fd = codecs.open(stations_fn, "w", "utf8")
+    stations_fd = codecs.open(stations_fn, "w", "utf-8")
     stations_fd.write(header)
     for station in sorted( stations.keys() ):
         stations_fd.write("\n\n[%s]" % station)