Skip to content

Commit

Permalink
Merge pull request #373 from knaaptime/lodesupdate
Browse files Browse the repository at this point in the history
  • Loading branch information
knaaptime authored Oct 2, 2023
2 parents e20d571 + 0c559d1 commit bbbfcdc
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 28 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:

- name: Test geosnap
run: |
pytest -v --color yes --cov geosnap --cov-append --cov-report term-missing --cov-report xml .
pytest -v --color yes --cov geosnap/tests --cov-append --cov-report term-missing --cov-report xml .
- uses: codecov/codecov-action@v3

Expand Down
2 changes: 1 addition & 1 deletion docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ available quickly with no configuration by accessing methods on the class.
DataStore.tracts_1990
DataStore.tracts_2000
DataStore.tracts_2010

DataStore.tracts_2020

Storing data
'''''''''''''''
Expand Down
71 changes: 45 additions & 26 deletions geosnap/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,27 +54,28 @@ def __delitem__(self, key):
class DataStore:
"""Storage for geosnap data. Currently supports data from several U.S. federal agencies and national research centers."""

def __init__(self, data_dir="auto"):
self
def __init__(self, data_dir="auto", disclaimer=False):
appname = "geosnap"
appauthor = "geosnap"

if data_dir == "auto":
self.data_dir = user_data_dir(appname, appauthor)
else:
self.data_dir = data_dir
warn(
"The geosnap data storage class is provided for convenience only. The geosnap developers make no promises "
"regarding data quality, consistency, or availability, nor are they responsible for any use/misuse of the data. "
"The end-user is responsible for any and all analyses or applications created with the package."
)
if disclaimer:
warn(
"The geosnap data storage class is provided for convenience only. The geosnap developers make no promises "
"regarding data quality, consistency, or availability, nor are they responsible for any use/misuse of the data. "
"The end-user is responsible for any and all analyses or applications created with the package."
)

def __dir__(self):

atts = [
"acs",
"blocks_2000",
"blocks_2010",
"blocks_2020",
"codebook",
"counties",
"ejscreen",
Expand All @@ -89,6 +90,7 @@ def __dir__(self):
"tracts_1990",
"tracts_2000",
"tracts_2010",
"tracts_2020"
]

return atts
Expand Down Expand Up @@ -431,9 +433,8 @@ def tracts_2000(self, states=None):
Returns
-------
pandas.DataFrame or geopandas.GeoDataFrame
2000 tracts as a geodataframe or as a dataframe with geometry
stored as well-known binary on the 'wkb' column.
geopandas.GeoDataFrame
2000 tracts as a geodataframe
"""
local = pathlib.Path(self.data_dir, "tracts_2000_500k.parquet")
Expand All @@ -459,9 +460,8 @@ def tracts_2010(
Returns
-------
pandas.DataFrame or geopandas.GeoDataFrame
2010 tracts as a geodataframe or as a dataframe with geometry
stored as well-known binary on the 'wkb' column.
geopandas.GeoDataFrame
2010 tracts as a geodataframe
"""
msg = "Streaming data from S3. Use `geosnap.io.store_census() to store the data locally for better performance"
Expand All @@ -474,6 +474,33 @@ def tracts_2010(
t["year"] = 2010
return t

def tracts_2020(
self,
states=None,
):
"""Nationwide Census Tracts as drawn in 2020 (cartographic 500k).
Parameters
----------
states : list-like
list of state fips to subset the national dataframe
Returns
-------
geopandas.GeoDataFrame
2020 tracts as a geodataframe
"""
msg = "Streaming data from S3. Use `geosnap.io.store_census() to store the data locally for better performance"
local = pathlib.Path(self.data_dir, "tracts_2020_500k.parquet")
remote = "s3://spatial-ucr/census/tracts_cartographic/tracts_2020_500k.parquet"
t = _fetcher(local, remote, msg)

if states:
t = t[t.geoid.str[:2].isin(states)]
t["year"] = 2020
return t

def msas(self):
"""Metropolitan Statistical Areas as drawn in 2020.
Expand All @@ -483,9 +510,8 @@ def msas(self):
Returns
-------
pandas.DataFrame or geopandas.GeoDataFrame
2010 MSAs as a geodataframe or as a dataframe with geometry
stored as well-known binary on the 'wkb' column.
geopandas.GeoDataFrame
2010 MSAs as a geodataframe
"""
local = pathlib.Path(self.data_dir, "msas.parquet")
Expand All @@ -500,9 +526,8 @@ def states(self):
Returns
-------
pandas.DataFrame or geopandas.GeoDataFrame
US States as a geodataframe or as a dataframe with geometry
stored as well-known binary on the 'wkb' column.
geopandas.GeoDataFrame
US States as a geodataframe
"""
local = pathlib.Path(self.data_dir, "states.parquet")
Expand All @@ -515,16 +540,10 @@ def states(self):
def counties(self):
"""Nationwide counties as drawn in 2010.
Parameters
----------
convert : bool
if True, return geodataframe, else return dataframe (the default is True).
Returns
-------
geopandas.GeoDataFrame
2010 counties as a geodataframe or as a dataframe with geometry
stored as well-known binary on the 'wkb' column.
2010 counties as a geodataframe.
"""
local = pathlib.Path(self.data_dir, "counties.parquet")
Expand Down
4 changes: 4 additions & 0 deletions geosnap/tests/test_datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ def test_tracts10():
df = datasets.tracts_2010(states=["11"])
assert df.shape == (179, 194)

def test_tracts20():
df = datasets.tracts_2020(states=["11"])
assert df.shape == (206, 15)


def test_counties():
assert datasets.counties().shape == (3233, 2)
Expand Down

0 comments on commit bbbfcdc

Please sign in to comment.