Fix Py3K compatibility for compressed correlation

[weather.git] / weather.py
diff --git a/weather.py b/weather.py

index 0e0d436..af43de4 100644 (file)
--- a/weather.py
+++ b/weather.py
@@ -1,12 +1,12 @@
  """Contains various object definitions needed by the weather utility."""
  
  weather_copyright = """\
-# Copyright (c) 2006-2012 Jeremy Stanley <fungi@yuggoth.org>. Permission to
+# Copyright (c) 2006-2016 Jeremy Stanley <fungi@yuggoth.org>. Permission to
  # use, copy, modify, and distribute this software is granted under terms
  # provided in the LICENSE file distributed with this software.
  #"""
  
-weather_version = "2.0"
+weather_version = "2.3"
  
  radian_to_km = 6372.795484
  radian_to_mi = 3959.871528
@@ -79,7 +79,8 @@ class Selections:
                      cache_search=(
                          self.get("cache") and self.get("cache_search")
                      ),
-                    cachedir=self.get("cachedir")
+                    cachedir=self.get("cachedir"),
+                    quiet=self.get_bool("quiet")
                  )
                  self.config.add_section(argument)
                  for item in guessed.items():
@@ -608,6 +609,7 @@ def get_config():
      import os
      rcfiles = [
          "/etc/weatherrc",
+        "/etc/weather/weatherrc",
          os.path.expanduser("~/.weather/weatherrc"),
          os.path.expanduser("~/.weatherrc"),
          "weatherrc"
@@ -690,6 +692,8 @@ def data_index(path):
                          os.stat(candidate).st_mtime
                      )
                      break
+            if filename in datafiles:
+                break
      return datafiles
  
  def guess(
@@ -699,7 +703,8 @@ def guess(
      info=False,
      cache_search=False,
      cacheage=900,
-    cachedir="."
+    cachedir=".",
+    quiet=False
  ):
      """Find URIs using airport, gecos, placename, station, ZCTA/ZIP, zone."""
      import codecs, datetime, time, os, re, sys
@@ -732,14 +737,17 @@ def guess(
              (0.995, "excellent"),
              (1.000, "ideal"),
          ]
-    print("Searching via %s..."%searchtype)
+    if not quiet: print("Searching via %s..."%searchtype)
      stations = configparser.ConfigParser()
      dataname = "stations"
      if dataname in datafiles:
          datafile = datafiles[dataname][0]
          if datafile.endswith(".gz"):
              import gzip
-            stations.readfp( gzip.open(datafile) )
+            if pyversion("3"):
+                stations.read_string(
+                    gzip.open(datafile).read().decode("utf-8") )
+            else: stations.readfp( gzip.open(datafile) )
          else:
              stations.read(datafile)
      else:
@@ -755,7 +763,9 @@ def guess(
          datafile = datafiles[dataname][0]
          if datafile.endswith(".gz"):
              import gzip
-            zones.readfp( gzip.open(datafile) )
+            if pyversion("3"):
+                zones.read_string( gzip.open(datafile).read().decode("utf-8") )
+            else: zones.readfp( gzip.open(datafile) )
          else:
              zones.read(datafile)
      else:
@@ -779,7 +789,10 @@ def guess(
              datafile = datafiles[dataname][0]
              if datafile.endswith(".gz"):
                  import gzip
-                airports.readfp( gzip.open(datafile) )
+                if pyversion("3"):
+                    airports.read_string(
+                        gzip.open(datafile).read().decode("utf-8") )
+                else: airports.readfp( gzip.open(datafile) )
              else:
                  airports.read(datafile)
          else:
@@ -796,7 +809,8 @@ def guess(
              if stations.has_option(station[0], "zone"):
                  zone = eval( stations.get(station[0], "zone") )
                  dataset = stations
-            if not info and stations.has_option( station[0], "description" ):
+            if not ( info or quiet ) \
+                and stations.has_option( station[0], "description" ):
                  print(
                      "[%s result %s]" % (
                          action,
@@ -819,7 +833,8 @@ def guess(
              if stations.has_option(expression, "zone"):
                  zone = eval( stations.get(expression, "zone") )
                  dataset = stations
-            if not info and stations.has_option(expression, "description"):
+            if not ( info or quiet ) \
+                and stations.has_option(expression, "description"):
                  print(
                      "[%s result %s]" % (
                          action,
@@ -841,7 +856,8 @@ def guess(
              station = eval( zones.get(expression, "station") )
              dataset = zones
              search = (expression, "NWS/NOAA weather zone %s" % expression)
-            if not info and zones.has_option(expression, "description"):
+            if not ( info or quiet ) \
+                and zones.has_option(expression, "description"):
                  print(
                      "[%s result %s]" % (
                          action,
@@ -862,7 +878,10 @@ def guess(
              datafile = datafiles[dataname][0]
              if datafile.endswith(".gz"):
                  import gzip
-                zctas.readfp( gzip.open(datafile) )
+                if pyversion("3"):
+                    zctas.read_string(
+                        gzip.open(datafile).read().decode("utf-8") )
+                else: zctas.readfp( gzip.open(datafile) )
              else:
                  zctas.read(datafile)
          else:
@@ -917,7 +936,10 @@ def guess(
              datafile = datafiles[dataname][0]
              if datafile.endswith(".gz"):
                  import gzip
-                places.readfp( gzip.open(datafile) )
+                if pyversion("3"):
+                    places.read_string(
+                        gzip.open(datafile).read().decode("utf-8") )
+                else: places.readfp( gzip.open(datafile) )
              else:
                  places.read(datafile)
          else:
@@ -939,7 +961,8 @@ def guess(
                  )
              if places.has_option(place, "zone"):
                  zone = eval( places.get(place, "zone") )
-            if not info and places.has_option(place, "description"):
+            if not ( info or quiet ) \
+                and places.has_option(place, "description"):
                  print(
                      "[%s result %s]" % (
                          action,
@@ -992,7 +1015,8 @@ def guess(
                      description = zones.get(place, "description")
                      zone = (place, 0.0)
                      search = ( expression, "NWS/NOAA weather zone %s" % place )
-                if not info: print( "[%s result %s]" % (action, description) )
+                if not ( info or quiet ):
+                    print( "[%s result %s]" % (action, description) )
              if not possibilities and not station[0]:
                  message = "No FIPS code/census area match in the %s file.\n" % (
                      datafiles["places"][0]
@@ -1188,14 +1212,14 @@ def correlate():
      import codecs, datetime, hashlib, os, re, sys, tarfile, time, zipfile
      if pyversion("3"): import configparser
      else: import ConfigParser as configparser
-    gcounties_an = "Gaz_counties_national.zip"
-    gcounties_fn = "Gaz_counties_national.txt"
-    gcousubs_an = "Gaz_cousubs_national.zip"
-    gcousubs_fn = "Gaz_cousubs_national.txt"
-    gplaces_an = "Gaz_places_national.zip"
-    gplaces_fn = "Gaz_places_national.txt"
-    gzcta_an = "Gaz_zcta_national.zip"
-    gzcta_fn = "Gaz_zcta_national.txt"
+    gcounties_an = "2015_Gaz_counties_national.zip"
+    gcounties_fn = "2015_Gaz_counties_national.txt"
+    gcousubs_an = "2015_Gaz_cousubs_national.zip"
+    gcousubs_fn = "2015_Gaz_cousubs_national.txt"
+    gplace_an = "2015_Gaz_place_national.zip"
+    gplace_fn = "2015_Gaz_place_national.txt"
+    gzcta_an = "2015_Gaz_zcta_national.zip"
+    gzcta_fn = "2015_Gaz_zcta_national.txt"
      for filename in os.listdir("."):
          if re.match("bp[0-9][0-9][a-z][a-z][0-9][0-9].dbx$", filename):
              cpfzcf_fn = filename
@@ -1203,7 +1227,7 @@ def correlate():
      nsdcccc_fn = "nsd_cccc.txt"
      zcatalog_an = "zonecatalog.curr.tar"
      metartbl_fn = "metar.tbl"
-    coopact_fn = "COOP-ACT.TXT"
+    coopstn_fn = "coop-stations.txt"
      overrides_fn = "overrides.conf"
      overrideslog_fn = "overrides.log"
      slist_fn = "slist"
@@ -1218,7 +1242,7 @@ def correlate():
  %s
  # generated by %s on %s from these public domain sources:
  #
-# http://www.census.gov/geo/www/gazetteer/gazetteer2010.html
+# http://www.census.gov/geo/maps-data/data/gazetteer2015.html
  # %s %s %s
  # %s %s %s
  # %s %s %s
@@ -1227,16 +1251,16 @@ def correlate():
  # http://www.weather.gov/geodata/catalog/wsom/html/cntyzone.htm
  # %s %s %s
  #
-# http://weather.noaa.gov/data/nsd_cccc.txt
+# http://tgftp.nws.noaa.gov/data/nsd_cccc.txt
  # %s %s %s
  #
-# http://weather.noaa.gov/pub/data/zonecatalog.curr.tar
+# http://tgftp.nws.noaa.gov/data/zonecatalog.curr.tar
  # %s %s %s
  #
  # http://www.nco.ncep.noaa.gov/pmb/codes/nwprod/dictionaries/metar.tbl
  # %s %s %s
  #
-# ftp://ftp.ncdc.noaa.gov/pub/data/inventories/COOP-ACT.TXT
+# http://www.ncdc.noaa.gov/homr/reports
  # %s %s %s
  #
  # ...and these manually-generated or hand-compiled adjustments:
@@ -1259,11 +1283,11 @@ def correlate():
              datetime.datetime.fromtimestamp( os.path.getmtime(gcousubs_an) )
          ),
          gcousubs_an,
-        hashlib.md5( open(gplaces_an, "rb").read() ).hexdigest(),
+        hashlib.md5( open(gplace_an, "rb").read() ).hexdigest(),
          datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(gplaces_an) )
+            datetime.datetime.fromtimestamp( os.path.getmtime(gplace_an) )
          ),
-        gplaces_an,
+        gplace_an,
          hashlib.md5( open(gzcta_an, "rb").read() ).hexdigest(),
          datetime.date.isoformat(
              datetime.datetime.fromtimestamp( os.path.getmtime(gzcta_an) )
@@ -1289,11 +1313,11 @@ def correlate():
              datetime.datetime.fromtimestamp( os.path.getmtime(metartbl_fn) )
          ),
          metartbl_fn,
-        hashlib.md5( open(coopact_fn, "rb").read() ).hexdigest(),
+        hashlib.md5( open(coopstn_fn, "rb").read() ).hexdigest(),
          datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(coopact_fn) )
+            datetime.datetime.fromtimestamp( os.path.getmtime(coopstn_fn) )
          ),
-        coopact_fn,
+        coopstn_fn,
          hashlib.md5( open(overrides_fn, "rb").read() ).hexdigest(),
          datetime.date.isoformat(
              datetime.datetime.fromtimestamp( os.path.getmtime(overrides_fn) )
@@ -1320,15 +1344,21 @@ def correlate():
      sys.stdout.flush()
      count = 0
      gcounties = zipfile.ZipFile(gcounties_an).open(gcounties_fn, "rU")
+    columns = gcounties.readline().decode("latin1").strip().split("\t")
      for line in gcounties:
          fields = line.decode("latin1").strip().split("\t")
-        if len(fields) == 10 and fields[0] != "STUSPS":
-            fips = "fips%s" % fields[1]
-            description = "%s, %s" % ( fields[3], fields[0] )
-            centroid = gecos( ",".join( fields[8:10] ) )
+        f_geoid = fields[ columns.index("GEOID") ].strip()
+        f_name = fields[ columns.index("NAME") ].strip()
+        f_usps = fields[ columns.index("USPS") ].strip()
+        f_intptlat = fields[ columns.index("INTPTLAT") ].strip()
+        f_intptlong = fields[ columns.index("INTPTLONG") ].strip()
+        if f_geoid and f_name and f_usps and f_intptlat and f_intptlong:
+            fips = "fips%s" % f_geoid
              if fips not in places: places[fips] = {}
-            places[fips]["centroid"] = centroid
-            places[fips]["description"] = description
+            places[fips]["centroid"] = gecos(
+                "%s,%s" % (f_intptlat, f_intptlong)
+            )
+            places[fips]["description"] = "%s, %s" % (f_name, f_usps)
              count += 1
      gcounties.close()
      print("done (%s lines)." % count)
@@ -1337,34 +1367,46 @@ def correlate():
      sys.stdout.flush()
      count = 0
      gcousubs = zipfile.ZipFile(gcousubs_an).open(gcousubs_fn, "rU")
+    columns = gcousubs.readline().decode("latin1").strip().split("\t")
      for line in gcousubs:
          fields = line.decode("latin1").strip().split("\t")
-        if len(fields) == 10 and fields[0] != "STUSPS":
-            fips = "fips%s" % fields[1]
-            description = "%s, %s" % ( fields[3], fields[0] )
-            centroid = gecos( ",".join( fields[8:10] ) )
+        f_geoid = fields[ columns.index("GEOID") ].strip()
+        f_name = fields[ columns.index("NAME") ].strip()
+        f_usps = fields[ columns.index("USPS") ].strip()
+        f_intptlat = fields[ columns.index("INTPTLAT") ].strip()
+        f_intptlong = fields[ columns.index("INTPTLONG") ].strip()
+        if f_geoid and f_name and f_usps and f_intptlat and f_intptlong:
+            fips = "fips%s" % f_geoid
              if fips not in places: places[fips] = {}
-            places[fips]["centroid"] = centroid
-            places[fips]["description"] = description
+            places[fips]["centroid"] = gecos(
+                "%s,%s" % (f_intptlat, f_intptlong)
+            )
+            places[fips]["description"] = "%s, %s" % (f_name, f_usps)
              count += 1
      gcousubs.close()
      print("done (%s lines)." % count)
-    message = "Reading %s:%s..." % (gplaces_an, gplaces_fn)
+    message = "Reading %s:%s..." % (gplace_an, gplace_fn)
      sys.stdout.write(message)
      sys.stdout.flush()
      count = 0
-    gplaces = zipfile.ZipFile(gplaces_an).open(gplaces_fn, "rU")
-    for line in gplaces:
+    gplace = zipfile.ZipFile(gplace_an).open(gplace_fn, "rU")
+    columns = gplace.readline().decode("latin1").strip().split("\t")
+    for line in gplace:
          fields = line.decode("latin1").strip().split("\t")
-        if len(fields) == 10 and fields[0] != "STUSPS":
-            fips = "fips%s" % fields[1]
-            description = "%s, %s" % ( fields[3], fields[0] )
-            centroid = gecos( ",".join( fields[8:10] ) )
+        f_geoid = fields[ columns.index("GEOID") ].strip()
+        f_name = fields[ columns.index("NAME") ].strip()
+        f_usps = fields[ columns.index("USPS") ].strip()
+        f_intptlat = fields[ columns.index("INTPTLAT") ].strip()
+        f_intptlong = fields[ columns.index("INTPTLONG") ].strip()
+        if f_geoid and f_name and f_usps and f_intptlat and f_intptlong:
+            fips = "fips%s" % f_geoid
              if fips not in places: places[fips] = {}
-            places[fips]["centroid"] = centroid
-            places[fips]["description"] = description
+            places[fips]["centroid"] = gecos(
+                "%s,%s" % (f_intptlat, f_intptlong)
+            )
+            places[fips]["description"] = "%s, %s" % (f_name, f_usps)
              count += 1
-    gplaces.close()
+    gplace.close()
      print("done (%s lines)." % count)
      message = "Reading %s..." % slist_fn
      sys.stdout.write(message)
@@ -1375,7 +1417,7 @@ def correlate():
          icao = line.split("#")[0].strip()
          if icao:
              stations[icao] = {
-                "metar": "http://weather.noaa.gov/pub/data/observations/"\
+                "metar": "http://tgftp.nws.noaa.gov/data/observations/"\
                      + "metar/decoded/%s.TXT" % icao.upper()
              }
              count += 1
@@ -1439,12 +1481,12 @@ def correlate():
          count += 1
      nsdcccc.close()
      print("done (%s lines)." % count)
-    message = "Reading %s..." % coopact_fn
+    message = "Reading %s..." % coopstn_fn
      sys.stdout.write(message)
      sys.stdout.flush()
      count = 0
-    coopact = open(coopact_fn)
-    for line in coopact:
+    coopstn = open(coopstn_fn)
+    for line in coopstn:
          icao = line[33:37].strip().lower()
          if icao in stations:
              iata = line[22:26].strip().lower()
@@ -1470,7 +1512,7 @@ def correlate():
                              "%s,%s" % (lat, lon)
                          )
          count += 1
-    coopact.close()
+    coopstn.close()
      print("done (%s lines)." % count)
      message = "Reading %s..." % zlist_fn
      sys.stdout.write(message)
@@ -1503,12 +1545,13 @@ def correlate():
                      zones[zone]["description"] = description
                      for line in data[1:]:
                          line = line.decode("latin1").strip()
-                        urimatch = re.match("/webdocs/(.+):(.+) for ", line)
+                        urimatch = re.match("/webdocs/pub/(.+):(.+) for ",
+                                            line)
                          if urimatch:
                              uritype = urimatch.group(2).lower().replace(" ","_")
-                            zones[zone][uritype] \
-                                = "http://weather.noaa.gov/%s" \
-                                % urimatch.group(1)
+                            zones[zone][uritype]  = (
+                                "http://tgftp.nws.noaa.gov/%s"
+                                % urimatch.group(1))
          count += 1
      zcatalog.close()
      print("done (%s files)." % count)
@@ -1519,12 +1562,12 @@ def correlate():
      cpfz = {}
      cpfzcf = open(cpfzcf_fn)
      for line in cpfzcf:
-        fields = line.split("|")
+        fields = line.strip().split("|")
          if len(fields) == 11 \
              and fields[0] and fields[1] and fields[9] and fields[10]:
              zone = "z".join( fields[:2] ).lower()
              if zone in zones:
-                zones[zone]["centroid"] = gecos( ",".join( fields[9:] ) )
+                zones[zone]["centroid"] = gecos( ",".join( fields[9:11] ) )
              elif fields[6]:
                  state = fields[0]
                  description = fields[3]
@@ -1554,13 +1597,16 @@ def correlate():
      sys.stdout.flush()
      count = 0
      gzcta = zipfile.ZipFile(gzcta_an).open(gzcta_fn, "rU")
+    columns = gzcta.readline().decode("latin1").strip().split("\t")
      for line in gzcta:
          fields = line.decode("latin1").strip().split("\t")
-        if len(fields) == 7 and fields[0] != "GEOID":
-            zcta = fields[0]
-            if zcta not in zctas: zctas[zcta] = {}
-            zctas[zcta]["centroid"] = gecos(
-                ",".join( ( fields[6], fields[5] ) )
+        f_geoid = fields[ columns.index("GEOID") ].strip()
+        f_intptlat = fields[ columns.index("INTPTLAT") ].strip()
+        f_intptlong = fields[ columns.index("INTPTLONG") ].strip()
+        if f_geoid and f_intptlat and f_intptlong:
+            if f_geoid not in zctas: zctas[f_geoid] = {}
+            zctas[f_geoid]["centroid"] = gecos(
+                "%s,%s" % (f_intptlat, f_intptlong)
              )
              count += 1
      gzcta.close()
@@ -1880,8 +1926,16 @@ def correlate():
      for airport in sorted( airports.keys() ):
          airports_fd.write("\n\n[%s]" % airport)
          for key, value in sorted( airports[airport].items() ):
+            if type(value) is float: value = "%.7f"%value
+            elif type(value) is tuple:
+                elements = []
+                for element in value:
+                    if type(element) is float: elements.append("%.7f"%element)
+                    else: elements.append( repr(element) )
+                value = "(%s)"%", ".join(elements)
              airports_fd.write( "\n%s = %s" % (key, value) )
          count += 1
+    airports_fd.write("\n")
      airports_fd.close()
      print("done (%s sections)." % count)
      message = "Writing %s..." % places_fn
@@ -1895,8 +1949,16 @@ def correlate():
      for fips in sorted( places.keys() ):
          places_fd.write("\n\n[%s]" % fips)
          for key, value in sorted( places[fips].items() ):
+            if type(value) is float: value = "%.7f"%value
+            elif type(value) is tuple:
+                elements = []
+                for element in value:
+                    if type(element) is float: elements.append("%.7f"%element)
+                    else: elements.append( repr(element) )
+                value = "(%s)"%", ".join(elements)
              places_fd.write( "\n%s = %s" % (key, value) )
          count += 1
+    places_fd.write("\n")
      places_fd.close()
      print("done (%s sections)." % count)
      message = "Writing %s..." % stations_fn
@@ -1910,8 +1972,16 @@ def correlate():
      for station in sorted( stations.keys() ):
          stations_fd.write("\n\n[%s]" % station)
          for key, value in sorted( stations[station].items() ):
+            if type(value) is float: value = "%.7f"%value
+            elif type(value) is tuple:
+                elements = []
+                for element in value:
+                    if type(element) is float: elements.append("%.7f"%element)
+                    else: elements.append( repr(element) )
+                value = "(%s)"%", ".join(elements)
              stations_fd.write( "\n%s = %s" % (key, value) )
          count += 1
+    stations_fd.write("\n")
      stations_fd.close()
      print("done (%s sections)." % count)
      message = "Writing %s..." % zctas_fn
@@ -1925,8 +1995,16 @@ def correlate():
      for zcta in sorted( zctas.keys() ):
          zctas_fd.write("\n\n[%s]" % zcta)
          for key, value in sorted( zctas[zcta].items() ):
+            if type(value) is float: value = "%.7f"%value
+            elif type(value) is tuple:
+                elements = []
+                for element in value:
+                    if type(element) is float: elements.append("%.7f"%element)
+                    else: elements.append( repr(element) )
+                value = "(%s)"%", ".join(elements)
              zctas_fd.write( "\n%s = %s" % (key, value) )
          count += 1
+    zctas_fd.write("\n")
      zctas_fd.close()
      print("done (%s sections)." % count)
      message = "Writing %s..." % zones_fn
@@ -1940,8 +2018,16 @@ def correlate():
      for zone in sorted( zones.keys() ):
          zones_fd.write("\n\n[%s]" % zone)
          for key, value in sorted( zones[zone].items() ):
+            if type(value) is float: value = "%.7f"%value
+            elif type(value) is tuple:
+                elements = []
+                for element in value:
+                    if type(element) is float: elements.append("%.7f"%element)
+                    else: elements.append( repr(element) )
+                value = "(%s)"%", ".join(elements)
              zones_fd.write( "\n%s = %s" % (key, value) )
          count += 1
+    zones_fd.write("\n")
      zones_fd.close()
      print("done (%s sections)." % count)
      message = "Starting QA check..."