From e7256def4f49815b0248be8118af4dfb940fe1a2 Mon Sep 17 00:00:00 2001 From: Jeremy Stanley Date: Mon, 10 Sep 2012 03:28:49 +0000 Subject: [PATCH] 2010 Census U.S. Gazetteer file layout changed. * weather.py(correlate): The United States Census Bureau altered the format of their 2010 Gazetteer on August 22, 2012, adding and reordering a few fields. The previous version of the parser assumed a fixed field order and ceased to work with the updated data files, so now the order is inferred from the column headings in the first line of each file instead. --- weather.py | 67 +++++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 23 deletions(-) diff --git a/weather.py b/weather.py index 60067f2..dbf7f87 100644 --- a/weather.py +++ b/weather.py @@ -1328,15 +1328,21 @@ def correlate(): sys.stdout.flush() count = 0 gcounties = zipfile.ZipFile(gcounties_an).open(gcounties_fn, "rU") + columns = gcounties.readline().decode("latin1").strip().split("\t") for line in gcounties: fields = line.decode("latin1").strip().split("\t") - if len(fields) == 10 and fields[0] != "STUSPS": - fips = "fips%s" % fields[1] - description = "%s, %s" % ( fields[3], fields[0] ) - centroid = gecos( ",".join( fields[8:10] ) ) + f_geoid = fields[ columns.index("GEOID") ].strip() + f_name = fields[ columns.index("NAME") ].strip() + f_usps = fields[ columns.index("USPS") ].strip() + f_intptlat = fields[ columns.index("INTPTLAT") ].strip() + f_intptlong = fields[ columns.index("INTPTLONG") ].strip() + if f_geoid and f_name and f_usps and f_intptlat and f_intptlong: + fips = "fips%s" % f_geoid if fips not in places: places[fips] = {} - places[fips]["centroid"] = centroid - places[fips]["description"] = description + places[fips]["centroid"] = gecos( + "%s,%s" % (f_intptlat, f_intptlong) + ) + places[fips]["description"] = "%s, %s" % (f_name, f_usps) count += 1 gcounties.close() print("done (%s lines)." % count) @@ -1345,15 +1351,21 @@ def correlate(): sys.stdout.flush() count = 0 gcousubs = zipfile.ZipFile(gcousubs_an).open(gcousubs_fn, "rU") + columns = gcousubs.readline().decode("latin1").strip().split("\t") for line in gcousubs: fields = line.decode("latin1").strip().split("\t") - if len(fields) == 10 and fields[0] != "STUSPS": - fips = "fips%s" % fields[1] - description = "%s, %s" % ( fields[3], fields[0] ) - centroid = gecos( ",".join( fields[8:10] ) ) + f_geoid = fields[ columns.index("GEOID") ].strip() + f_name = fields[ columns.index("NAME") ].strip() + f_usps = fields[ columns.index("USPS") ].strip() + f_intptlat = fields[ columns.index("INTPTLAT") ].strip() + f_intptlong = fields[ columns.index("INTPTLONG") ].strip() + if f_geoid and f_name and f_usps and f_intptlat and f_intptlong: + fips = "fips%s" % f_geoid if fips not in places: places[fips] = {} - places[fips]["centroid"] = centroid - places[fips]["description"] = description + places[fips]["centroid"] = gecos( + "%s,%s" % (f_intptlat, f_intptlong) + ) + places[fips]["description"] = "%s, %s" % (f_name, f_usps) count += 1 gcousubs.close() print("done (%s lines)." % count) @@ -1362,15 +1374,21 @@ def correlate(): sys.stdout.flush() count = 0 gplaces = zipfile.ZipFile(gplaces_an).open(gplaces_fn, "rU") + columns = gplaces.readline().decode("latin1").strip().split("\t") for line in gplaces: fields = line.decode("latin1").strip().split("\t") - if len(fields) == 10 and fields[0] != "STUSPS": - fips = "fips%s" % fields[1] - description = "%s, %s" % ( fields[3], fields[0] ) - centroid = gecos( ",".join( fields[8:10] ) ) + f_geoid = fields[ columns.index("GEOID") ].strip() + f_name = fields[ columns.index("NAME") ].strip() + f_usps = fields[ columns.index("USPS") ].strip() + f_intptlat = fields[ columns.index("INTPTLAT") ].strip() + f_intptlong = fields[ columns.index("INTPTLONG") ].strip() + if f_geoid and f_name and f_usps and f_intptlat and f_intptlong: + fips = "fips%s" % f_geoid if fips not in places: places[fips] = {} - places[fips]["centroid"] = centroid - places[fips]["description"] = description + places[fips]["centroid"] = gecos( + "%s,%s" % (f_intptlat, f_intptlong) + ) + places[fips]["description"] = "%s, %s" % (f_name, f_usps) count += 1 gplaces.close() print("done (%s lines)." % count) @@ -1562,13 +1580,16 @@ def correlate(): sys.stdout.flush() count = 0 gzcta = zipfile.ZipFile(gzcta_an).open(gzcta_fn, "rU") + columns = gzcta.readline().decode("latin1").strip().split("\t") for line in gzcta: fields = line.decode("latin1").strip().split("\t") - if len(fields) == 7 and fields[0] != "GEOID": - zcta = fields[0] - if zcta not in zctas: zctas[zcta] = {} - zctas[zcta]["centroid"] = gecos( - ",".join( ( fields[6], fields[5] ) ) + f_geoid = fields[ columns.index("GEOID") ].strip() + f_intptlat = fields[ columns.index("INTPTLAT") ].strip() + f_intptlong = fields[ columns.index("INTPTLONG") ].strip() + if f_geoid and f_intptlat and f_intptlong: + if f_geoid not in zctas: zctas[f_geoid] = {} + zctas[f_geoid]["centroid"] = gecos( + "%s,%s" % (f_intptlat, f_intptlong) ) count += 1 gzcta.close() -- 2.11.0