Fix Py3K compatibility for compressed correlation
[weather.git] / weather.py
index 47870ec..af43de4 100644 (file)
@@ -1,12 +1,12 @@
 """Contains various object definitions needed by the weather utility."""
 
 weather_copyright = """\
-# Copyright (c) 2006-2012 Jeremy Stanley <fungi@yuggoth.org>. Permission to
+# Copyright (c) 2006-2016 Jeremy Stanley <fungi@yuggoth.org>. Permission to
 # use, copy, modify, and distribute this software is granted under terms
 # provided in the LICENSE file distributed with this software.
 #"""
 
-weather_version = "2.0"
+weather_version = "2.3"
 
 radian_to_km = 6372.795484
 radian_to_mi = 3959.871528
@@ -692,6 +692,8 @@ def data_index(path):
                         os.stat(candidate).st_mtime
                     )
                     break
+            if filename in datafiles:
+                break
     return datafiles
 
 def guess(
@@ -742,7 +744,10 @@ def guess(
         datafile = datafiles[dataname][0]
         if datafile.endswith(".gz"):
             import gzip
-            stations.readfp( gzip.open(datafile) )
+            if pyversion("3"):
+                stations.read_string(
+                    gzip.open(datafile).read().decode("utf-8") )
+            else: stations.readfp( gzip.open(datafile) )
         else:
             stations.read(datafile)
     else:
@@ -758,7 +763,9 @@ def guess(
         datafile = datafiles[dataname][0]
         if datafile.endswith(".gz"):
             import gzip
-            zones.readfp( gzip.open(datafile) )
+            if pyversion("3"):
+                zones.read_string( gzip.open(datafile).read().decode("utf-8") )
+            else: zones.readfp( gzip.open(datafile) )
         else:
             zones.read(datafile)
     else:
@@ -782,7 +789,10 @@ def guess(
             datafile = datafiles[dataname][0]
             if datafile.endswith(".gz"):
                 import gzip
-                airports.readfp( gzip.open(datafile) )
+                if pyversion("3"):
+                    airports.read_string(
+                        gzip.open(datafile).read().decode("utf-8") )
+                else: airports.readfp( gzip.open(datafile) )
             else:
                 airports.read(datafile)
         else:
@@ -868,7 +878,10 @@ def guess(
             datafile = datafiles[dataname][0]
             if datafile.endswith(".gz"):
                 import gzip
-                zctas.readfp( gzip.open(datafile) )
+                if pyversion("3"):
+                    zctas.read_string(
+                        gzip.open(datafile).read().decode("utf-8") )
+                else: zctas.readfp( gzip.open(datafile) )
             else:
                 zctas.read(datafile)
         else:
@@ -923,7 +936,10 @@ def guess(
             datafile = datafiles[dataname][0]
             if datafile.endswith(".gz"):
                 import gzip
-                places.readfp( gzip.open(datafile) )
+                if pyversion("3"):
+                    places.read_string(
+                        gzip.open(datafile).read().decode("utf-8") )
+                else: places.readfp( gzip.open(datafile) )
             else:
                 places.read(datafile)
         else:
@@ -1196,14 +1212,14 @@ def correlate():
     import codecs, datetime, hashlib, os, re, sys, tarfile, time, zipfile
     if pyversion("3"): import configparser
     else: import ConfigParser as configparser
-    gcounties_an = "Gaz_counties_national.zip"
-    gcounties_fn = "Gaz_counties_national.txt"
-    gcousubs_an = "Gaz_cousubs_national.zip"
-    gcousubs_fn = "Gaz_cousubs_national.txt"
-    gplaces_an = "Gaz_places_national.zip"
-    gplaces_fn = "Gaz_places_national.txt"
-    gzcta_an = "Gaz_zcta_national.zip"
-    gzcta_fn = "Gaz_zcta_national.txt"
+    gcounties_an = "2015_Gaz_counties_national.zip"
+    gcounties_fn = "2015_Gaz_counties_national.txt"
+    gcousubs_an = "2015_Gaz_cousubs_national.zip"
+    gcousubs_fn = "2015_Gaz_cousubs_national.txt"
+    gplace_an = "2015_Gaz_place_national.zip"
+    gplace_fn = "2015_Gaz_place_national.txt"
+    gzcta_an = "2015_Gaz_zcta_national.zip"
+    gzcta_fn = "2015_Gaz_zcta_national.txt"
     for filename in os.listdir("."):
         if re.match("bp[0-9][0-9][a-z][a-z][0-9][0-9].dbx$", filename):
             cpfzcf_fn = filename
@@ -1211,7 +1227,7 @@ def correlate():
     nsdcccc_fn = "nsd_cccc.txt"
     zcatalog_an = "zonecatalog.curr.tar"
     metartbl_fn = "metar.tbl"
-    coopact_fn = "COOP-ACT.TXT"
+    coopstn_fn = "coop-stations.txt"
     overrides_fn = "overrides.conf"
     overrideslog_fn = "overrides.log"
     slist_fn = "slist"
@@ -1226,7 +1242,7 @@ def correlate():
 %s
 # generated by %s on %s from these public domain sources:
 #
-# http://www.census.gov/geo/www/gazetteer/gazetteer2010.html
+# http://www.census.gov/geo/maps-data/data/gazetteer2015.html
 # %s %s %s
 # %s %s %s
 # %s %s %s
@@ -1235,16 +1251,16 @@ def correlate():
 # http://www.weather.gov/geodata/catalog/wsom/html/cntyzone.htm
 # %s %s %s
 #
-# http://weather.noaa.gov/data/nsd_cccc.txt
+# http://tgftp.nws.noaa.gov/data/nsd_cccc.txt
 # %s %s %s
 #
-# http://weather.noaa.gov/pub/data/zonecatalog.curr.tar
+# http://tgftp.nws.noaa.gov/data/zonecatalog.curr.tar
 # %s %s %s
 #
 # http://www.nco.ncep.noaa.gov/pmb/codes/nwprod/dictionaries/metar.tbl
 # %s %s %s
 #
-# ftp://ftp.ncdc.noaa.gov/pub/data/inventories/COOP-ACT.TXT
+# http://www.ncdc.noaa.gov/homr/reports
 # %s %s %s
 #
 # ...and these manually-generated or hand-compiled adjustments:
@@ -1267,11 +1283,11 @@ def correlate():
             datetime.datetime.fromtimestamp( os.path.getmtime(gcousubs_an) )
         ),
         gcousubs_an,
-        hashlib.md5( open(gplaces_an, "rb").read() ).hexdigest(),
+        hashlib.md5( open(gplace_an, "rb").read() ).hexdigest(),
         datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(gplaces_an) )
+            datetime.datetime.fromtimestamp( os.path.getmtime(gplace_an) )
         ),
-        gplaces_an,
+        gplace_an,
         hashlib.md5( open(gzcta_an, "rb").read() ).hexdigest(),
         datetime.date.isoformat(
             datetime.datetime.fromtimestamp( os.path.getmtime(gzcta_an) )
@@ -1297,11 +1313,11 @@ def correlate():
             datetime.datetime.fromtimestamp( os.path.getmtime(metartbl_fn) )
         ),
         metartbl_fn,
-        hashlib.md5( open(coopact_fn, "rb").read() ).hexdigest(),
+        hashlib.md5( open(coopstn_fn, "rb").read() ).hexdigest(),
         datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(coopact_fn) )
+            datetime.datetime.fromtimestamp( os.path.getmtime(coopstn_fn) )
         ),
-        coopact_fn,
+        coopstn_fn,
         hashlib.md5( open(overrides_fn, "rb").read() ).hexdigest(),
         datetime.date.isoformat(
             datetime.datetime.fromtimestamp( os.path.getmtime(overrides_fn) )
@@ -1369,13 +1385,13 @@ def correlate():
             count += 1
     gcousubs.close()
     print("done (%s lines)." % count)
-    message = "Reading %s:%s..." % (gplaces_an, gplaces_fn)
+    message = "Reading %s:%s..." % (gplace_an, gplace_fn)
     sys.stdout.write(message)
     sys.stdout.flush()
     count = 0
-    gplaces = zipfile.ZipFile(gplaces_an).open(gplaces_fn, "rU")
-    columns = gplaces.readline().decode("latin1").strip().split("\t")
-    for line in gplaces:
+    gplace = zipfile.ZipFile(gplace_an).open(gplace_fn, "rU")
+    columns = gplace.readline().decode("latin1").strip().split("\t")
+    for line in gplace:
         fields = line.decode("latin1").strip().split("\t")
         f_geoid = fields[ columns.index("GEOID") ].strip()
         f_name = fields[ columns.index("NAME") ].strip()
@@ -1390,7 +1406,7 @@ def correlate():
             )
             places[fips]["description"] = "%s, %s" % (f_name, f_usps)
             count += 1
-    gplaces.close()
+    gplace.close()
     print("done (%s lines)." % count)
     message = "Reading %s..." % slist_fn
     sys.stdout.write(message)
@@ -1401,7 +1417,7 @@ def correlate():
         icao = line.split("#")[0].strip()
         if icao:
             stations[icao] = {
-                "metar": "http://weather.noaa.gov/pub/data/observations/"\
+                "metar": "http://tgftp.nws.noaa.gov/data/observations/"\
                     + "metar/decoded/%s.TXT" % icao.upper()
             }
             count += 1
@@ -1465,12 +1481,12 @@ def correlate():
         count += 1
     nsdcccc.close()
     print("done (%s lines)." % count)
-    message = "Reading %s..." % coopact_fn
+    message = "Reading %s..." % coopstn_fn
     sys.stdout.write(message)
     sys.stdout.flush()
     count = 0
-    coopact = open(coopact_fn)
-    for line in coopact:
+    coopstn = open(coopstn_fn)
+    for line in coopstn:
         icao = line[33:37].strip().lower()
         if icao in stations:
             iata = line[22:26].strip().lower()
@@ -1496,7 +1512,7 @@ def correlate():
                             "%s,%s" % (lat, lon)
                         )
         count += 1
-    coopact.close()
+    coopstn.close()
     print("done (%s lines)." % count)
     message = "Reading %s..." % zlist_fn
     sys.stdout.write(message)
@@ -1529,12 +1545,13 @@ def correlate():
                     zones[zone]["description"] = description
                     for line in data[1:]:
                         line = line.decode("latin1").strip()
-                        urimatch = re.match("/webdocs/(.+):(.+) for ", line)
+                        urimatch = re.match("/webdocs/pub/(.+):(.+) for ",
+                                            line)
                         if urimatch:
                             uritype = urimatch.group(2).lower().replace(" ","_")
-                            zones[zone][uritype] \
-                                = "http://weather.noaa.gov/%s" \
-                                % urimatch.group(1)
+                            zones[zone][uritype]  = (
+                                "http://tgftp.nws.noaa.gov/%s"
+                                % urimatch.group(1))
         count += 1
     zcatalog.close()
     print("done (%s files)." % count)
@@ -1545,12 +1562,12 @@ def correlate():
     cpfz = {}
     cpfzcf = open(cpfzcf_fn)
     for line in cpfzcf:
-        fields = line.split("|")
+        fields = line.strip().split("|")
         if len(fields) == 11 \
             and fields[0] and fields[1] and fields[9] and fields[10]:
             zone = "z".join( fields[:2] ).lower()
             if zone in zones:
-                zones[zone]["centroid"] = gecos( ",".join( fields[9:] ) )
+                zones[zone]["centroid"] = gecos( ",".join( fields[9:11] ) )
             elif fields[6]:
                 state = fields[0]
                 description = fields[3]
@@ -1909,6 +1926,13 @@ def correlate():
     for airport in sorted( airports.keys() ):
         airports_fd.write("\n\n[%s]" % airport)
         for key, value in sorted( airports[airport].items() ):
+            if type(value) is float: value = "%.7f"%value
+            elif type(value) is tuple:
+                elements = []
+                for element in value:
+                    if type(element) is float: elements.append("%.7f"%element)
+                    else: elements.append( repr(element) )
+                value = "(%s)"%", ".join(elements)
             airports_fd.write( "\n%s = %s" % (key, value) )
         count += 1
     airports_fd.write("\n")
@@ -1925,6 +1949,13 @@ def correlate():
     for fips in sorted( places.keys() ):
         places_fd.write("\n\n[%s]" % fips)
         for key, value in sorted( places[fips].items() ):
+            if type(value) is float: value = "%.7f"%value
+            elif type(value) is tuple:
+                elements = []
+                for element in value:
+                    if type(element) is float: elements.append("%.7f"%element)
+                    else: elements.append( repr(element) )
+                value = "(%s)"%", ".join(elements)
             places_fd.write( "\n%s = %s" % (key, value) )
         count += 1
     places_fd.write("\n")
@@ -1941,6 +1972,13 @@ def correlate():
     for station in sorted( stations.keys() ):
         stations_fd.write("\n\n[%s]" % station)
         for key, value in sorted( stations[station].items() ):
+            if type(value) is float: value = "%.7f"%value
+            elif type(value) is tuple:
+                elements = []
+                for element in value:
+                    if type(element) is float: elements.append("%.7f"%element)
+                    else: elements.append( repr(element) )
+                value = "(%s)"%", ".join(elements)
             stations_fd.write( "\n%s = %s" % (key, value) )
         count += 1
     stations_fd.write("\n")
@@ -1957,6 +1995,13 @@ def correlate():
     for zcta in sorted( zctas.keys() ):
         zctas_fd.write("\n\n[%s]" % zcta)
         for key, value in sorted( zctas[zcta].items() ):
+            if type(value) is float: value = "%.7f"%value
+            elif type(value) is tuple:
+                elements = []
+                for element in value:
+                    if type(element) is float: elements.append("%.7f"%element)
+                    else: elements.append( repr(element) )
+                value = "(%s)"%", ".join(elements)
             zctas_fd.write( "\n%s = %s" % (key, value) )
         count += 1
     zctas_fd.write("\n")
@@ -1973,6 +2018,13 @@ def correlate():
     for zone in sorted( zones.keys() ):
         zones_fd.write("\n\n[%s]" % zone)
         for key, value in sorted( zones[zone].items() ):
+            if type(value) is float: value = "%.7f"%value
+            elif type(value) is tuple:
+                elements = []
+                for element in value:
+                    if type(element) is float: elements.append("%.7f"%element)
+                    else: elements.append( repr(element) )
+                value = "(%s)"%", ".join(elements)
             zones_fd.write( "\n%s = %s" % (key, value) )
         count += 1
     zones_fd.write("\n")