Refresh correlation data
[weather.git] / weather.py
index 0e0d436..2dd01b5 100644 (file)
@@ -1,12 +1,12 @@
 """Contains various object definitions needed by the weather utility."""
 
 weather_copyright = """\
-# Copyright (c) 2006-2012 Jeremy Stanley <fungi@yuggoth.org>. Permission to
+# Copyright (c) 2006-2023 Jeremy Stanley <fungi@yuggoth.org>. Permission to
 # use, copy, modify, and distribute this software is granted under terms
 # provided in the LICENSE file distributed with this software.
 #"""
 
-weather_version = "2.0"
+weather_version = "2.4.4"
 
 radian_to_km = 6372.795484
 radian_to_mi = 3959.871528
@@ -79,7 +79,8 @@ class Selections:
                     cache_search=(
                         self.get("cache") and self.get("cache_search")
                     ),
-                    cachedir=self.get("cachedir")
+                    cachedir=self.get("cachedir"),
+                    quiet=self.get_bool("quiet")
                 )
                 self.config.add_section(argument)
                 for item in guessed.items():
@@ -88,17 +89,25 @@ class Selections:
                 return self.config.get(argument, option)
         if option in self.options.__dict__:
             return self.options.__dict__[option]
-        else:
-            import os, sys
-            message = "%s error: no URI defined for %s\n" % (
-                os.path.basename( sys.argv[0] ),
-                option
-            )
-            sys.stderr.write(message)
-            exit(1)
+        import sys
+        message = "WARNING: no URI defined for %s\n" % option
+        sys.stderr.write(message)
+        return None
     def get_bool(self, option, argument=None):
         """Get data and coerce to a boolean if necessary."""
-        return bool(self.get(option, argument))
+        # Mimic configparser's getboolean() method by treating
+        # false/no/off/0 as False and true/yes/on/1 as True values,
+        # case-insensitively
+        value = self.get(option, argument)
+        if isinstance(value, bool):
+            return value
+        if isinstance(value, str):
+            vlower = value.lower()
+            if vlower in ('false', 'no', 'off', '0'):
+                return False
+            elif vlower in ('true', 'yes', 'on', '1'):
+                return True
+        raise ValueError("Not a boolean: %s" % value)
     def getint(self, option, argument=None):
         """Get data and coerce to an integer if necessary."""
         value = self.get(option, argument)
@@ -211,7 +220,7 @@ def get_uri(
             except (IOError, OSError): pass
         dcache_fn = os.path.join(
             dcachedir,
-            uri.split(":")[1].replace("/","_")
+            uri.split(":",1)[1].replace("/","_")
         )
     now = time.time()
     if cache_data and os.access(dcache_fn, os.R_OK) \
@@ -221,22 +230,16 @@ def get_uri(
         dcache_fd.close()
     else:
         try:
-            if pyversion("3"): data = urlopen(uri).read().decode("utf-8")
-            else: data = urlopen(uri).read()
+            data = urlopen(uri).read().decode("utf-8")
         except URLError:
             if ignore_fail: return ""
-            else:
-                import os, sys, traceback
-                message = "%s error: failed to retrieve\n   %s\n   %s" % (
-                        os.path.basename( sys.argv[0] ),
-                        uri,
-                        traceback.format_exception_only(
-                            sys.exc_type,
-                            sys.exc_value
-                        )[0]
-                    )
-                sys.stderr.write(message)
-                sys.exit(1)
+            import os, sys
+            sys.stderr.write("%s error: failed to retrieve\n   %s\n\n" % (
+                os.path.basename( sys.argv[0] ), uri))
+            raise
+        # Some data sources are HTML with the plain text wrapped in pre tags
+        if "<pre>" in data:
+            data = data[data.find("<pre>")+5:data.find("</pre>")]
         if cache_data:
             try:
                 import codecs
@@ -321,11 +324,7 @@ def get_alert(
 ):
     """Return alert notice for the specified URI."""
     if not uri:
-        import os, sys
-        message = "%s error: Alert URI required for alerts\n" % \
-            os.path.basename( sys.argv[0] )
-        sys.stderr.write(message)
-        sys.exit(1)
+        return ""
     alert = get_uri(
         uri,
         ignore_fail=True,
@@ -378,7 +377,7 @@ def get_options(config):
 
     # the -a/--alert option
     if config.has_option("default", "alert"):
-        default_alert = bool(config.get("default", "alert"))
+        default_alert = config.getboolean("default", "alert")
     else: default_alert = False
     option_parser.add_option("-a", "--alert",
         dest="alert",
@@ -397,14 +396,9 @@ def get_options(config):
             + "flash_flood_watch," \
             + "flood_statement," \
             + "flood_warning," \
-            + "marine_weather_statement," \
-            + "river_statement," \
             + "severe_thunderstorm_warning," \
             + "severe_weather_statement," \
-            + "short_term_forecast," \
-            + "special_marine_warning," \
             + "special_weather_statement," \
-            + "tornado_warning," \
             + "urgent_weather_message"
     option_parser.add_option("--atypes",
         dest="atypes",
@@ -438,7 +432,7 @@ def get_options(config):
 
     # the -f/--forecast option
     if config.has_option("default", "forecast"):
-        default_forecast = bool(config.get("default", "forecast"))
+        default_forecast = config.getboolean("default", "forecast")
     else: default_forecast = False
     option_parser.add_option("-f", "--forecast",
         dest="forecast",
@@ -466,7 +460,7 @@ def get_options(config):
 
     # the --imperial option
     if config.has_option("default", "imperial"):
-        default_imperial = bool(config.get("default", "imperial"))
+        default_imperial = config.getboolean("default", "imperial")
     else: default_imperial = False
     option_parser.add_option("--imperial",
         dest="imperial",
@@ -497,7 +491,7 @@ def get_options(config):
 
     # the -m/--metric option
     if config.has_option("default", "metric"):
-        default_metric = bool(config.get("default", "metric"))
+        default_metric = config.getboolean("default", "metric")
     else: default_metric = False
     option_parser.add_option("-m", "--metric",
         dest="metric",
@@ -507,7 +501,7 @@ def get_options(config):
 
     # the -n/--no-conditions option
     if config.has_option("default", "conditions"):
-        default_conditions = bool(config.get("default", "conditions"))
+        default_conditions = config.getboolean("default", "conditions")
     else: default_conditions = True
     option_parser.add_option("-n", "--no-conditions",
         dest="conditions",
@@ -517,7 +511,7 @@ def get_options(config):
 
     # the --no-cache option
     if config.has_option("default", "cache"):
-        default_cache = bool(config.get("default", "cache"))
+        default_cache = config.getboolean("default", "cache")
     else: default_cache = True
     option_parser.add_option("--no-cache",
         dest="cache",
@@ -527,7 +521,7 @@ def get_options(config):
 
     # the --no-cache-data option
     if config.has_option("default", "cache_data"):
-        default_cache_data = bool(config.get("default", "cache_data"))
+        default_cache_data = config.getboolean("default", "cache_data")
     else: default_cache_data = True
     option_parser.add_option("--no-cache-data",
         dest="cache_data",
@@ -537,7 +531,7 @@ def get_options(config):
 
     # the --no-cache-search option
     if config.has_option("default", "cache_search"):
-        default_cache_search = bool(config.get("default", "cache_search"))
+        default_cache_search = config.getboolean("default", "cache_search")
     else: default_cache_search = True
     option_parser.add_option("--no-cache-search",
         dest="cache_search",
@@ -547,7 +541,7 @@ def get_options(config):
 
     # the -q/--quiet option
     if config.has_option("default", "quiet"):
-        default_quiet = bool(config.get("default", "quiet"))
+        default_quiet = config.getboolean("default", "quiet")
     else: default_quiet = False
     option_parser.add_option("-q", "--quiet",
         dest="quiet",
@@ -566,7 +560,7 @@ def get_options(config):
 
     # the -v/--verbose option
     if config.has_option("default", "verbose"):
-        default_verbose = bool(config.get("default", "verbose"))
+        default_verbose = config.getboolean("default", "verbose")
     else: default_verbose = False
     option_parser.add_option("-v", "--verbose",
         dest="verbose",
@@ -608,12 +602,17 @@ def get_config():
     import os
     rcfiles = [
         "/etc/weatherrc",
+        "/etc/weather/weatherrc",
         os.path.expanduser("~/.weather/weatherrc"),
         os.path.expanduser("~/.weatherrc"),
         "weatherrc"
         ]
     for rcfile in rcfiles:
-        if os.access(rcfile, os.R_OK): config.read(rcfile)
+        if os.access(rcfile, os.R_OK):
+            if pyversion("3"):
+                config.read(rcfile, encoding="utf-8")
+            else:
+                config.read(rcfile)
     for section in config.sections():
         if section != section.lower():
             if config.has_section(section.lower()):
@@ -649,7 +648,10 @@ def integrate_search_cache(config, cachedir, setpath):
             pass
         return config
     scache = configparser.ConfigParser()
-    scache.read(scache_fn)
+    if pyversion("3"):
+        scache.read(scache_fn, encoding="utf-8")
+    else:
+        scache.read(scache_fn)
     for section in scache.sections():
         if not config.has_section(section):
             config.add_section(section)
@@ -690,6 +692,8 @@ def data_index(path):
                         os.stat(candidate).st_mtime
                     )
                     break
+            if filename in datafiles:
+                break
     return datafiles
 
 def guess(
@@ -699,7 +703,8 @@ def guess(
     info=False,
     cache_search=False,
     cacheage=900,
-    cachedir="."
+    cachedir=".",
+    quiet=False
 ):
     """Find URIs using airport, gecos, placename, station, ZCTA/ZIP, zone."""
     import codecs, datetime, time, os, re, sys
@@ -732,16 +737,22 @@ def guess(
             (0.995, "excellent"),
             (1.000, "ideal"),
         ]
-    print("Searching via %s..."%searchtype)
+    if not quiet: print("Searching via %s..."%searchtype)
     stations = configparser.ConfigParser()
     dataname = "stations"
     if dataname in datafiles:
         datafile = datafiles[dataname][0]
         if datafile.endswith(".gz"):
             import gzip
-            stations.readfp( gzip.open(datafile) )
+            if pyversion("3"):
+                stations.read_string(
+                    gzip.open(datafile).read().decode("utf-8") )
+            else: stations.readfp( gzip.open(datafile) )
         else:
-            stations.read(datafile)
+            if pyversion("3"):
+                stations.read(datafile, encoding="utf-8")
+            else:
+                stations.read(datafile)
     else:
         message = "%s error: can't find \"%s\" data file\n" % (
             os.path.basename( sys.argv[0] ),
@@ -755,9 +766,14 @@ def guess(
         datafile = datafiles[dataname][0]
         if datafile.endswith(".gz"):
             import gzip
-            zones.readfp( gzip.open(datafile) )
+            if pyversion("3"):
+                zones.read_string( gzip.open(datafile).read().decode("utf-8") )
+            else: zones.readfp( gzip.open(datafile) )
         else:
-            zones.read(datafile)
+            if pyversion("3"):
+                zones.read(datafile, encoding="utf-8")
+            else:
+                zones.read(datafile)
     else:
         message = "%s error: can't find \"%s\" data file\n" % (
             os.path.basename( sys.argv[0] ),
@@ -779,9 +795,15 @@ def guess(
             datafile = datafiles[dataname][0]
             if datafile.endswith(".gz"):
                 import gzip
-                airports.readfp( gzip.open(datafile) )
+                if pyversion("3"):
+                    airports.read_string(
+                        gzip.open(datafile).read().decode("utf-8") )
+                else: airports.readfp( gzip.open(datafile) )
             else:
-                airports.read(datafile)
+                if pyversion("3"):
+                    airports.read(datafile, encoding="utf-8")
+                else:
+                    airports.read(datafile)
         else:
             message = "%s error: can't find \"%s\" data file\n" % (
                 os.path.basename( sys.argv[0] ),
@@ -796,7 +818,8 @@ def guess(
             if stations.has_option(station[0], "zone"):
                 zone = eval( stations.get(station[0], "zone") )
                 dataset = stations
-            if not info and stations.has_option( station[0], "description" ):
+            if not ( info or quiet ) \
+                and stations.has_option( station[0], "description" ):
                 print(
                     "[%s result %s]" % (
                         action,
@@ -819,7 +842,8 @@ def guess(
             if stations.has_option(expression, "zone"):
                 zone = eval( stations.get(expression, "zone") )
                 dataset = stations
-            if not info and stations.has_option(expression, "description"):
+            if not ( info or quiet ) \
+                and stations.has_option(expression, "description"):
                 print(
                     "[%s result %s]" % (
                         action,
@@ -841,7 +865,8 @@ def guess(
             station = eval( zones.get(expression, "station") )
             dataset = zones
             search = (expression, "NWS/NOAA weather zone %s" % expression)
-            if not info and zones.has_option(expression, "description"):
+            if not ( info or quiet ) \
+                and zones.has_option(expression, "description"):
                 print(
                     "[%s result %s]" % (
                         action,
@@ -862,9 +887,15 @@ def guess(
             datafile = datafiles[dataname][0]
             if datafile.endswith(".gz"):
                 import gzip
-                zctas.readfp( gzip.open(datafile) )
+                if pyversion("3"):
+                    zctas.read_string(
+                        gzip.open(datafile).read().decode("utf-8") )
+                else: zctas.readfp( gzip.open(datafile) )
             else:
-                zctas.read(datafile)
+                if pyversion("3"):
+                    zctas.read(datafile, encoding="utf-8")
+                else:
+                    zctas.read(datafile)
         else:
             message = "%s error: can't find \"%s\" data file\n" % (
                 os.path.basename( sys.argv[0] ),
@@ -917,9 +948,15 @@ def guess(
             datafile = datafiles[dataname][0]
             if datafile.endswith(".gz"):
                 import gzip
-                places.readfp( gzip.open(datafile) )
+                if pyversion("3"):
+                    places.read_string(
+                        gzip.open(datafile).read().decode("utf-8") )
+                else: places.readfp( gzip.open(datafile) )
             else:
-                places.read(datafile)
+                if pyversion("3"):
+                    places.read(datafile, encoding="utf-8")
+                else:
+                    places.read(datafile)
         else:
             message = "%s error: can't find \"%s\" data file\n" % (
                 os.path.basename( sys.argv[0] ),
@@ -939,7 +976,8 @@ def guess(
                 )
             if places.has_option(place, "zone"):
                 zone = eval( places.get(place, "zone") )
-            if not info and places.has_option(place, "description"):
+            if not ( info or quiet ) \
+                and places.has_option(place, "description"):
                 print(
                     "[%s result %s]" % (
                         action,
@@ -992,7 +1030,8 @@ def guess(
                     description = zones.get(place, "description")
                     zone = (place, 0.0)
                     search = ( expression, "NWS/NOAA weather zone %s" % place )
-                if not info: print( "[%s result %s]" % (action, description) )
+                if not ( info or quiet ):
+                    print( "[%s result %s]" % (action, description) )
             if not possibilities and not station[0]:
                 message = "No FIPS code/census area match in the %s file.\n" % (
                     datafiles["places"][0]
@@ -1079,7 +1118,7 @@ def guess(
             print(
                 "   (proximity %s, %.3gkm, %.3gmi)" % ( score, km, mi )
             )
-        elif searchtype is "coordinates":
+        elif searchtype == "coordinates":
             print( "   (%.3gkm, %.3gmi)" % (km, mi) )
         if zone[0]:
             print(
@@ -1095,7 +1134,7 @@ def guess(
             print(
                 "   (proximity %s, %.3gkm, %.3gmi)" % ( score, km, mi )
             )
-        elif searchtype is "coordinates" and zone[0]:
+        elif searchtype == "coordinates" and zone[0]:
             print( "   (%.3gkm, %.3gmi)" % (km, mi) )
     if cache_search:
         now = time.time()
@@ -1108,7 +1147,7 @@ def guess(
         )
         search_cache = ["\n"]
         search_cache.append( "[%s]\n" % search[0] ) 
-        search_cache.append( "description = cached %s\n" % nowstamp )
+        search_cache.append( "cached = %s\n" % nowstamp )
         for uriname in sorted(uris.keys()):
             search_cache.append( "%s = %s\n" % ( uriname, uris[uriname] ) )
         real_cachedir = os.path.expanduser(cachedir)
@@ -1134,7 +1173,10 @@ def guess(
             )
         try:
             scache_existing = configparser.ConfigParser()
-            scache_existing.read(scache_fn)
+            if pyversion("3"):
+                scache_existing.read(scache_fn, encoding="utf-8")
+            else:
+                scache_existing.read(scache_fn)
             if not scache_existing.has_section(search[0]):
                 scache_fd = codecs.open(scache_fn, "a", "utf-8")
                 scache_fd.writelines(search_cache)
@@ -1185,25 +1227,26 @@ def gecos(formatted):
     return tuple(coordinates)
 
 def correlate():
-    import codecs, datetime, hashlib, os, re, sys, tarfile, time, zipfile
+    import codecs, csv, datetime, hashlib, os, re, sys, time, zipfile
     if pyversion("3"): import configparser
     else: import ConfigParser as configparser
-    gcounties_an = "Gaz_counties_national.zip"
-    gcounties_fn = "Gaz_counties_national.txt"
-    gcousubs_an = "Gaz_cousubs_national.zip"
-    gcousubs_fn = "Gaz_cousubs_national.txt"
-    gplaces_an = "Gaz_places_national.zip"
-    gplaces_fn = "Gaz_places_national.txt"
-    gzcta_an = "Gaz_zcta_national.zip"
-    gzcta_fn = "Gaz_zcta_national.txt"
     for filename in os.listdir("."):
-        if re.match("bp[0-9][0-9][a-z][a-z][0-9][0-9].dbx$", filename):
+        if re.match("[0-9]{4}_Gaz_counties_national.zip$", filename):
+            gcounties_an = filename
+            gcounties_fn = filename[:-4] + ".txt"
+        elif re.match("[0-9]{4}_Gaz_cousubs_national.zip$", filename):
+            gcousubs_an = filename
+            gcousubs_fn = filename[:-4] + ".txt"
+        elif re.match("[0-9]{4}_Gaz_place_national.zip$", filename):
+            gplace_an = filename
+            gplace_fn = filename[:-4] + ".txt"
+        elif re.match("[0-9]{4}_Gaz_zcta_national.zip$", filename):
+            gzcta_an = filename
+            gzcta_fn = filename[:-4] + ".txt"
+        elif re.match("bp[0-9]{2}[a-z]{2}[0-9]{2}.dbx$", filename):
             cpfzcf_fn = filename
-            break
     nsdcccc_fn = "nsd_cccc.txt"
-    zcatalog_an = "zonecatalog.curr.tar"
-    metartbl_fn = "metar.tbl"
-    coopact_fn = "COOP-ACT.TXT"
+    ourairports_fn = "airports.csv"
     overrides_fn = "overrides.conf"
     overrideslog_fn = "overrides.log"
     slist_fn = "slist"
@@ -1218,25 +1261,19 @@ def correlate():
 %s
 # generated by %s on %s from these public domain sources:
 #
-# http://www.census.gov/geo/www/gazetteer/gazetteer2010.html
+# https://www.census.gov/geographies/reference-files/time-series/geo/gazetteer-files.html
 # %s %s %s
 # %s %s %s
 # %s %s %s
 # %s %s %s
 #
-# http://www.weather.gov/geodata/catalog/wsom/html/cntyzone.htm
+# https://www.weather.gov/gis/ZoneCounty/
 # %s %s %s
 #
-# http://weather.noaa.gov/data/nsd_cccc.txt
+# https://tgftp.nws.noaa.gov/data/
 # %s %s %s
 #
-# http://weather.noaa.gov/pub/data/zonecatalog.curr.tar
-# %s %s %s
-#
-# http://www.nco.ncep.noaa.gov/pmb/codes/nwprod/dictionaries/metar.tbl
-# %s %s %s
-#
-# ftp://ftp.ncdc.noaa.gov/pub/data/inventories/COOP-ACT.TXT
+# https://ourairports.com/data/
 # %s %s %s
 #
 # ...and these manually-generated or hand-compiled adjustments:
@@ -1247,66 +1284,56 @@ def correlate():
         weather_copyright,
         os.path.basename( sys.argv[0] ),
         datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( time.time() )
+            datetime.datetime.utcfromtimestamp( int(os.environ.get('SOURCE_DATE_EPOCH', time.time())) )
         ),
         hashlib.md5( open(gcounties_an, "rb").read() ).hexdigest(),
         datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(gcounties_an) )
+            datetime.datetime.utcfromtimestamp( os.path.getmtime(gcounties_an) )
         ),
         gcounties_an,
         hashlib.md5( open(gcousubs_an, "rb").read() ).hexdigest(),
         datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(gcousubs_an) )
+            datetime.datetime.utcfromtimestamp( os.path.getmtime(gcousubs_an) )
         ),
         gcousubs_an,
-        hashlib.md5( open(gplaces_an, "rb").read() ).hexdigest(),
+        hashlib.md5( open(gplace_an, "rb").read() ).hexdigest(),
         datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(gplaces_an) )
+            datetime.datetime.utcfromtimestamp( os.path.getmtime(gplace_an) )
         ),
-        gplaces_an,
+        gplace_an,
         hashlib.md5( open(gzcta_an, "rb").read() ).hexdigest(),
         datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(gzcta_an) )
+            datetime.datetime.utcfromtimestamp( os.path.getmtime(gzcta_an) )
         ),
         gzcta_an,
         hashlib.md5( open(cpfzcf_fn, "rb").read() ).hexdigest(),
         datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(cpfzcf_fn) )
+            datetime.datetime.utcfromtimestamp( os.path.getmtime(cpfzcf_fn) )
         ),
         cpfzcf_fn,
         hashlib.md5( open(nsdcccc_fn, "rb").read() ).hexdigest(),
         datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(nsdcccc_fn) )
+            datetime.datetime.utcfromtimestamp( os.path.getmtime(nsdcccc_fn) )
         ),
         nsdcccc_fn,
-        hashlib.md5( open(zcatalog_an, "rb").read() ).hexdigest(),
-        datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(zcatalog_an) )
-        ),
-        zcatalog_an,
-        hashlib.md5( open(metartbl_fn, "rb").read() ).hexdigest(),
-        datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(metartbl_fn) )
-        ),
-        metartbl_fn,
-        hashlib.md5( open(coopact_fn, "rb").read() ).hexdigest(),
+        hashlib.md5( open(ourairports_fn, "rb").read() ).hexdigest(),
         datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(coopact_fn) )
+            datetime.datetime.utcfromtimestamp( os.path.getmtime(ourairports_fn) )
         ),
-        coopact_fn,
+        ourairports_fn,
         hashlib.md5( open(overrides_fn, "rb").read() ).hexdigest(),
         datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(overrides_fn) )
+            datetime.datetime.utcfromtimestamp( os.path.getmtime(overrides_fn) )
         ),
         overrides_fn,
         hashlib.md5( open(slist_fn, "rb").read() ).hexdigest(),
         datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(slist_fn) )
+            datetime.datetime.utcfromtimestamp( os.path.getmtime(slist_fn) )
         ),
         slist_fn,
         hashlib.md5( open(zlist_fn, "rb").read() ).hexdigest(),
         datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(zlist_fn) )
+            datetime.datetime.utcfromtimestamp( os.path.getmtime(zlist_fn) )
         ),
         zlist_fn
     )
@@ -1319,16 +1346,22 @@ def correlate():
     sys.stdout.write(message)
     sys.stdout.flush()
     count = 0
-    gcounties = zipfile.ZipFile(gcounties_an).open(gcounties_fn, "rU")
+    gcounties = zipfile.ZipFile(gcounties_an).open(gcounties_fn, "r")
+    columns = gcounties.readline().decode("utf-8").strip().split("\t")
     for line in gcounties:
-        fields = line.decode("latin1").strip().split("\t")
-        if len(fields) == 10 and fields[0] != "STUSPS":
-            fips = "fips%s" % fields[1]
-            description = "%s, %s" % ( fields[3], fields[0] )
-            centroid = gecos( ",".join( fields[8:10] ) )
+        fields = line.decode("utf-8").strip().split("\t")
+        f_geoid = fields[ columns.index("GEOID") ].strip()
+        f_name = fields[ columns.index("NAME") ].strip()
+        f_usps = fields[ columns.index("USPS") ].strip()
+        f_intptlat = fields[ columns.index("INTPTLAT") ].strip()
+        f_intptlong = fields[ columns.index("INTPTLONG") ].strip()
+        if f_geoid and f_name and f_usps and f_intptlat and f_intptlong:
+            fips = "fips%s" % f_geoid
             if fips not in places: places[fips] = {}
-            places[fips]["centroid"] = centroid
-            places[fips]["description"] = description
+            places[fips]["centroid"] = gecos(
+                "%s,%s" % (f_intptlat, f_intptlong)
+            )
+            places[fips]["description"] = "%s, %s" % (f_name, f_usps)
             count += 1
     gcounties.close()
     print("done (%s lines)." % count)
@@ -1336,85 +1369,68 @@ def correlate():
     sys.stdout.write(message)
     sys.stdout.flush()
     count = 0
-    gcousubs = zipfile.ZipFile(gcousubs_an).open(gcousubs_fn, "rU")
+    gcousubs = zipfile.ZipFile(gcousubs_an).open(gcousubs_fn, "r")
+    columns = gcousubs.readline().decode("utf-8").strip().split("\t")
     for line in gcousubs:
-        fields = line.decode("latin1").strip().split("\t")
-        if len(fields) == 10 and fields[0] != "STUSPS":
-            fips = "fips%s" % fields[1]
-            description = "%s, %s" % ( fields[3], fields[0] )
-            centroid = gecos( ",".join( fields[8:10] ) )
+        fields = line.decode("utf-8").strip().split("\t")
+        f_geoid = fields[ columns.index("GEOID") ].strip()
+        f_name = fields[ columns.index("NAME") ].strip()
+        f_usps = fields[ columns.index("USPS") ].strip()
+        f_intptlat = fields[ columns.index("INTPTLAT") ].strip()
+        f_intptlong = fields[ columns.index("INTPTLONG") ].strip()
+        if f_geoid and f_name and f_usps and f_intptlat and f_intptlong:
+            fips = "fips%s" % f_geoid
             if fips not in places: places[fips] = {}
-            places[fips]["centroid"] = centroid
-            places[fips]["description"] = description
+            places[fips]["centroid"] = gecos(
+                "%s,%s" % (f_intptlat, f_intptlong)
+            )
+            places[fips]["description"] = "%s, %s" % (f_name, f_usps)
             count += 1
     gcousubs.close()
     print("done (%s lines)." % count)
-    message = "Reading %s:%s..." % (gplaces_an, gplaces_fn)
+    message = "Reading %s:%s..." % (gplace_an, gplace_fn)
     sys.stdout.write(message)
     sys.stdout.flush()
     count = 0
-    gplaces = zipfile.ZipFile(gplaces_an).open(gplaces_fn, "rU")
-    for line in gplaces:
-        fields = line.decode("latin1").strip().split("\t")
-        if len(fields) == 10 and fields[0] != "STUSPS":
-            fips = "fips%s" % fields[1]
-            description = "%s, %s" % ( fields[3], fields[0] )
-            centroid = gecos( ",".join( fields[8:10] ) )
+    gplace = zipfile.ZipFile(gplace_an).open(gplace_fn, "r")
+    columns = gplace.readline().decode("utf-8").strip().split("\t")
+    for line in gplace:
+        fields = line.decode("utf-8").strip().split("\t")
+        f_geoid = fields[ columns.index("GEOID") ].strip()
+        f_name = fields[ columns.index("NAME") ].strip()
+        f_usps = fields[ columns.index("USPS") ].strip()
+        f_intptlat = fields[ columns.index("INTPTLAT") ].strip()
+        f_intptlong = fields[ columns.index("INTPTLONG") ].strip()
+        if f_geoid and f_name and f_usps and f_intptlat and f_intptlong:
+            fips = "fips%s" % f_geoid
             if fips not in places: places[fips] = {}
-            places[fips]["centroid"] = centroid
-            places[fips]["description"] = description
+            places[fips]["centroid"] = gecos(
+                "%s,%s" % (f_intptlat, f_intptlong)
+            )
+            places[fips]["description"] = "%s, %s" % (f_name, f_usps)
             count += 1
-    gplaces.close()
+    gplace.close()
     print("done (%s lines)." % count)
     message = "Reading %s..." % slist_fn
     sys.stdout.write(message)
     sys.stdout.flush()
     count = 0
-    slist = codecs.open(slist_fn, "rU")
+    slist = codecs.open(slist_fn, "r", "utf-8")
     for line in slist:
         icao = line.split("#")[0].strip()
         if icao:
             stations[icao] = {
-                "metar": "http://weather.noaa.gov/pub/data/observations/"\
+                "metar": "https://tgftp.nws.noaa.gov/data/observations/"\
                     + "metar/decoded/%s.TXT" % icao.upper()
             }
             count += 1
     slist.close()
     print("done (%s lines)." % count)
-    message = "Reading %s..." % metartbl_fn
-    sys.stdout.write(message)
-    sys.stdout.flush()
-    count = 0
-    metartbl = codecs.open(metartbl_fn, "rU")
-    for line in metartbl:
-        icao = line[:4].strip().lower()
-        if icao in stations:
-            description = []
-            name = " ".join(
-                line[16:48].replace("_", " ").strip().title().split()
-            )
-            if name: description.append(name)
-            st = line[49:51].strip()
-            if st: description.append(st)
-            cn = line[52:54].strip()
-            if cn: description.append(cn)
-            if description:
-                stations[icao]["description"] = ", ".join(description)
-            lat = line[55:60].strip()
-            if lat:
-                lat = int(lat)/100.0
-                lon = line[61:67].strip()
-                if lon:
-                    lon = int(lon)/100.0
-                    stations[icao]["location"] = gecos( "%s,%s" % (lat, lon) )
-        count += 1
-    metartbl.close()
-    print("done (%s lines)." % count)
     message = "Reading %s..." % nsdcccc_fn
     sys.stdout.write(message)
     sys.stdout.flush()
     count = 0
-    nsdcccc = codecs.open(nsdcccc_fn, "rU", "latin1")
+    nsdcccc = codecs.open(nsdcccc_fn, "r", "utf-8")
     for line in nsdcccc:
         line = str(line)
         fields = line.split(";")
@@ -1439,44 +1455,49 @@ def correlate():
         count += 1
     nsdcccc.close()
     print("done (%s lines)." % count)
-    message = "Reading %s..." % coopact_fn
+    message = "Reading %s..." % ourairports_fn
     sys.stdout.write(message)
     sys.stdout.flush()
     count = 0
-    coopact = open(coopact_fn)
-    for line in coopact:
-        icao = line[33:37].strip().lower()
+    ourairports = open(ourairports_fn, "r")
+    for row in csv.reader(ourairports):
+        icao = row[12].lower()
         if icao in stations:
-            iata = line[22:26].strip().lower()
+            iata = row[13].lower()
             if len(iata) == 3: airports[iata] = { "station": icao }
             if "description" not in stations[icao]:
                 description = []
-                name = " ".join( line[99:129].strip().title().split() )
+                name = row[3]
                 if name: description.append(name)
-                st = line[59:61].strip()
-                if st: description.append(st)
-                country = " ".join( line[38:58].strip().title().split() )
-                if country: description.append(country)
+                municipality = row[10]
+                if municipality: description.append(municipality)
+                region = row[9]
+                country = row[8]
+                if region:
+                    if "-" in region:
+                        c,r = region.split("-", 1)
+                        if c == country: region = r
+                    description.append(region)
+                if country:
+                    description.append(country)
                 if description:
                     stations[icao]["description"] = ", ".join(description)
             if "location" not in stations[icao]:
-                lat = line[130:139].strip()
+                lat = row[4]
                 if lat:
-                    lat = lat.replace(" ", "-")
-                    lon = line[140:150].strip()
+                    lon = row[5]
                     if lon:
-                        lon = lon.replace(" ", "-")
                         stations[icao]["location"] = gecos(
                             "%s,%s" % (lat, lon)
                         )
         count += 1
-    coopact.close()
+    ourairports.close()
     print("done (%s lines)." % count)
     message = "Reading %s..." % zlist_fn
     sys.stdout.write(message)
     sys.stdout.flush()
     count = 0
-    zlist = codecs.open(zlist_fn, "rU")
+    zlist = codecs.open(zlist_fn, "r", "utf-8")
     for line in zlist:
         line = line.split("#")[0].strip()
         if line:
@@ -1484,68 +1505,76 @@ def correlate():
             count += 1
     zlist.close()
     print("done (%s lines)." % count)
-    message = "Reading %s:*..." % zcatalog_an
-    sys.stdout.write(message)
-    sys.stdout.flush()
-    count = 0
-    zcatalog = tarfile.open(zcatalog_an)
-    for entry in zcatalog.getmembers():
-        if entry.isfile():
-            fnmatch = re.match(
-                r"([a-z]+z[0-9]+)\.txt$",
-                os.path.basename(entry.name)
-            )
-            if fnmatch:
-                zone = fnmatch.group(1)
-                if zone in zones:
-                    data = zcatalog.extractfile(entry).readlines()
-                    description = data[0].decode("ascii").strip()
-                    zones[zone]["description"] = description
-                    for line in data[1:]:
-                        line = line.decode("latin1").strip()
-                        urimatch = re.match("/webdocs/(.+):(.+) for ", line)
-                        if urimatch:
-                            uritype = urimatch.group(2).lower().replace(" ","_")
-                            zones[zone][uritype] \
-                                = "http://weather.noaa.gov/%s" \
-                                % urimatch.group(1)
-        count += 1
-    zcatalog.close()
-    print("done (%s files)." % count)
     message = "Reading %s..." % cpfzcf_fn
     sys.stdout.write(message)
     sys.stdout.flush()
     count = 0
     cpfz = {}
-    cpfzcf = open(cpfzcf_fn)
+    cpfzcf = codecs.open(cpfzcf_fn, "r", "utf-8")
     for line in cpfzcf:
-        fields = line.split("|")
+        fields = line.strip().split("|")
         if len(fields) == 11 \
             and fields[0] and fields[1] and fields[9] and fields[10]:
             zone = "z".join( fields[:2] ).lower()
             if zone in zones:
-                zones[zone]["centroid"] = gecos( ",".join( fields[9:] ) )
-            elif fields[6]:
                 state = fields[0]
-                description = fields[3]
-                county = fields[5]
+                if state:
+                    zones[zone]["coastal_flood_statement"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "flood/coastal/%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["flash_flood_statement"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "flash_flood/statement/%s/%s.txt"
+                        % (state.lower(), zone))
+                    zones[zone]["flash_flood_warning"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "flash_flood/warning/%s/%s.txt"
+                        % (state.lower(), zone))
+                    zones[zone]["flash_flood_watch"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "flash_flood/watch/%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["flood_statement"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "flood/statement/%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["flood_warning"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "flood/warning/%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["severe_thunderstorm_warning"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "thunderstorm/%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["severe_weather_statement"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "severe_weather_stmt/%s/%s.txt"
+                        % (state.lower(), zone))
+                    zones[zone]["short_term_forecast"] = (
+                        "https://tgftp.nws.noaa.gov/data/forecasts/nowcast/"
+                        "%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["special_weather_statement"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "special_weather_stmt/%s/%s.txt"
+                        % (state.lower(), zone))
+                    zones[zone]["state_forecast"] = (
+                        "https://tgftp.nws.noaa.gov/data/forecasts/state/"
+                        "%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["urgent_weather_message"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "non_precip/%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["zone_forecast"] = (
+                        "https://tgftp.nws.noaa.gov/data/forecasts/zone/"
+                        "%s/%s.txt" % (state.lower(), zone))
+                description = fields[3].strip()
                 fips = "fips%s"%fields[6]
-                possible = [
-                    "%s, %s" % (county, state),
-                    "%s County, %s" % (county, state),
-                ]
-                if description.endswith(" Counties"):
-                    description = description[:-9]
-                for addition in description.split(" and "):
-                    possible.append( "%s, %s" % (addition, state) )
-                    possible.append( "%s County, %s" % (addition, state) )
-                if fips in places and "centroid" in places[fips]:
-                    for candidate in zones:
-                        if "centroid" not in zones[candidate] and \
-                            "description" in zones[candidate] and \
-                            zones[candidate]["description"] in possible:
-                            zones[candidate]["centroid"] = \
-                                places[fips]["centroid"]
+                county = fields[5]
+                if county:
+                    if description.endswith(county):
+                        description += " County"
+                    else:
+                        description += ", %s County" % county
+                description += ", %s, US" % state
+                zones[zone]["description"] = description
+                zones[zone]["centroid"] = gecos( ",".join( fields[9:11] ) )
+                if fips in places and not zones[zone]["centroid"]:
+                    zones[zone]["centroid"] = places[fips]["centroid"]
         count += 1
     cpfzcf.close()
     print("done (%s lines)." % count)
@@ -1553,14 +1582,17 @@ def correlate():
     sys.stdout.write(message)
     sys.stdout.flush()
     count = 0
-    gzcta = zipfile.ZipFile(gzcta_an).open(gzcta_fn, "rU")
+    gzcta = zipfile.ZipFile(gzcta_an).open(gzcta_fn, "r")
+    columns = gzcta.readline().decode("utf-8").strip().split("\t")
     for line in gzcta:
-        fields = line.decode("latin1").strip().split("\t")
-        if len(fields) == 7 and fields[0] != "GEOID":
-            zcta = fields[0]
-            if zcta not in zctas: zctas[zcta] = {}
-            zctas[zcta]["centroid"] = gecos(
-                ",".join( ( fields[6], fields[5] ) )
+        fields = line.decode("utf-8").strip().split("\t")
+        f_geoid = fields[ columns.index("GEOID") ].strip()
+        f_intptlat = fields[ columns.index("INTPTLAT") ].strip()
+        f_intptlong = fields[ columns.index("INTPTLONG") ].strip()
+        if f_geoid and f_intptlat and f_intptlong:
+            if f_geoid not in zctas: zctas[f_geoid] = {}
+            zctas[f_geoid]["centroid"] = gecos(
+                "%s,%s" % (f_intptlat, f_intptlong)
             )
             count += 1
     gzcta.close()
@@ -1723,6 +1755,12 @@ def correlate():
     if os.path.exists(overrideslog_fn):
         os.rename(overrideslog_fn, "%s_old"%overrideslog_fn)
     overrideslog_fd = codecs.open(overrideslog_fn, "w", "utf8")
+    import time
+    overrideslog_fd.write(
+        '# Copyright (c) %s Jeremy Stanley <fungi@yuggoth.org>. Permission to\n'
+        '# use, copy, modify, and distribute this software is granted under terms\n'
+        '# provided in the LICENSE file distributed with this software.\n\n'
+        % time.gmtime().tm_year)
     overrideslog_fd.writelines(overrideslog)
     overrideslog_fd.close()
     print("done (%s overridden sections: +%s/-%s/!%s)." % (
@@ -1880,8 +1918,16 @@ def correlate():
     for airport in sorted( airports.keys() ):
         airports_fd.write("\n\n[%s]" % airport)
         for key, value in sorted( airports[airport].items() ):
+            if type(value) is float: value = "%.7f"%value
+            elif type(value) is tuple:
+                elements = []
+                for element in value:
+                    if type(element) is float: elements.append("%.7f"%element)
+                    else: elements.append( repr(element) )
+                value = "(%s)"%", ".join(elements)
             airports_fd.write( "\n%s = %s" % (key, value) )
         count += 1
+    airports_fd.write("\n")
     airports_fd.close()
     print("done (%s sections)." % count)
     message = "Writing %s..." % places_fn
@@ -1895,8 +1941,16 @@ def correlate():
     for fips in sorted( places.keys() ):
         places_fd.write("\n\n[%s]" % fips)
         for key, value in sorted( places[fips].items() ):
+            if type(value) is float: value = "%.7f"%value
+            elif type(value) is tuple:
+                elements = []
+                for element in value:
+                    if type(element) is float: elements.append("%.7f"%element)
+                    else: elements.append( repr(element) )
+                value = "(%s)"%", ".join(elements)
             places_fd.write( "\n%s = %s" % (key, value) )
         count += 1
+    places_fd.write("\n")
     places_fd.close()
     print("done (%s sections)." % count)
     message = "Writing %s..." % stations_fn
@@ -1905,13 +1959,23 @@ def correlate():
     count = 0
     if os.path.exists(stations_fn):
         os.rename(stations_fn, "%s_old"%stations_fn)
-    stations_fd = codecs.open(stations_fn, "w", "utf8")
+    stations_fd = codecs.open(stations_fn, "w", "utf-8")
     stations_fd.write(header)
     for station in sorted( stations.keys() ):
         stations_fd.write("\n\n[%s]" % station)
         for key, value in sorted( stations[station].items() ):
+            if type(value) is float: value = "%.7f"%value
+            elif type(value) is tuple:
+                elements = []
+                for element in value:
+                    if type(element) is float: elements.append("%.7f"%element)
+                    else: elements.append( repr(element) )
+                value = "(%s)"%", ".join(elements)
+            if type(value) is bytes:
+                value = value.decode("utf-8")
             stations_fd.write( "\n%s = %s" % (key, value) )
         count += 1
+    stations_fd.write("\n")
     stations_fd.close()
     print("done (%s sections)." % count)
     message = "Writing %s..." % zctas_fn
@@ -1925,8 +1989,16 @@ def correlate():
     for zcta in sorted( zctas.keys() ):
         zctas_fd.write("\n\n[%s]" % zcta)
         for key, value in sorted( zctas[zcta].items() ):
+            if type(value) is float: value = "%.7f"%value
+            elif type(value) is tuple:
+                elements = []
+                for element in value:
+                    if type(element) is float: elements.append("%.7f"%element)
+                    else: elements.append( repr(element) )
+                value = "(%s)"%", ".join(elements)
             zctas_fd.write( "\n%s = %s" % (key, value) )
         count += 1
+    zctas_fd.write("\n")
     zctas_fd.close()
     print("done (%s sections)." % count)
     message = "Writing %s..." % zones_fn
@@ -1940,23 +2012,46 @@ def correlate():
     for zone in sorted( zones.keys() ):
         zones_fd.write("\n\n[%s]" % zone)
         for key, value in sorted( zones[zone].items() ):
+            if type(value) is float: value = "%.7f"%value
+            elif type(value) is tuple:
+                elements = []
+                for element in value:
+                    if type(element) is float: elements.append("%.7f"%element)
+                    else: elements.append( repr(element) )
+                value = "(%s)"%", ".join(elements)
             zones_fd.write( "\n%s = %s" % (key, value) )
         count += 1
+    zones_fd.write("\n")
     zones_fd.close()
     print("done (%s sections)." % count)
     message = "Starting QA check..."
     sys.stdout.write(message)
     sys.stdout.flush()
     airports = configparser.ConfigParser()
-    airports.read(airports_fn)
+    if pyversion("3"):
+        airports.read(airports_fn, encoding="utf-8")
+    else:
+        airports.read(airports_fn)
     places = configparser.ConfigParser()
-    places.read(places_fn)
+    if pyversion("3"):
+        places.read(places_fn, encoding="utf-8")
+    else:
+        places.read(places_fn)
     stations = configparser.ConfigParser()
-    stations.read(stations_fn)
+    if pyversion("3"):
+        stations.read(stations_fn, encoding="utf-8")
+    else:
+        stations.read(stations_fn)
     zctas = configparser.ConfigParser()
-    zctas.read(zctas_fn)
+    if pyversion("3"):
+        zctas.read(zctas_fn, encoding="utf-8")
+    else:
+        zctas.read(zctas_fn)
     zones = configparser.ConfigParser()
-    zones.read(zones_fn)
+    if pyversion("3"):
+        zones.read(zones_fn, encoding="utf-8")
+    else:
+        zones.read(zones_fn)
     qalog = []
     places_nocentroid = 0
     places_nodescription = 0
@@ -2031,6 +2126,12 @@ def correlate():
     if os.path.exists(qalog_fn):
         os.rename(qalog_fn, "%s_old"%qalog_fn)
     qalog_fd = codecs.open(qalog_fn, "w", "utf8")
+    import time
+    qalog_fd.write(
+        '# Copyright (c) %s Jeremy Stanley <fungi@yuggoth.org>. Permission to\n'
+        '# use, copy, modify, and distribute this software is granted under terms\n'
+        '# provided in the LICENSE file distributed with this software.\n\n'
+        % time.gmtime().tm_year)
     qalog_fd.writelines(qalog)
     qalog_fd.close()
     if qalog: