lazer.gedi¶
Tools for downloading and working with GEDI data.
EarthdataSession (Session)
¶
Authentication guide: wiki.earthdata.nasa.gov/display/EL/How+To+Access+Data+With+Python
__init__(self, username=None, password=None)
special
¶
Creates a session for communicating with the NASA Earthdata API.
Create an account at urs.earthdata.nasa.gov/users/new. You can avoid passing your login credentials by setting the environment variables EARTHDATA_USER and EARTHDATA_PASS.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
username |
str |
Earthdata username. If not set, the function will prompt you to enter. |
None |
password |
str |
Earthdata password. If not set, the function will prompt you to enter. |
None |
Returns:
Type | Description |
---|---|
Session |
a |
Source code in lazer/gedi.py
def __init__(self, username: str = None, password: str = None) -> Session:
"""Creates a session for communicating with the NASA Earthdata API.
Create an account at https://urs.earthdata.nasa.gov/users/new. You can
avoid passing your login credentials by setting the environment
variables EARTHDATA_USER and EARTHDATA_PASS.
Args:
username: Earthdata username. If not set, the function will prompt you to enter.
password: Earthdata password. If not set, the function will prompt you to enter.
Returns:
a `requests` session tracking activity to the Earthdata API.
"""
super().__init__()
# get input
if username is None:
env_user = os.getenv("EARTHDATA_USER")
if env_user is None:
username = getpass("Earthdata username:")
else:
username = env_user
if password is None:
env_pass = os.getenv("EARTHDATA_PASS")
if env_pass is None:
password = getpass("Earthdata password:")
else:
password = env_pass
self.auth = (username, password)
# handle authentication via cookies
cookie_jar = CookieJar()
password_manager = request.HTTPPasswordMgrWithDefaultRealm()
password_manager.add_password(None, self.AUTH_URL, username, password)
self.opener = request.build_opener(
request.HTTPBasicAuthHandler(password_manager),
request.HTTPCookieProcessor(cookie_jar),
)
request.install_opener(self.opener)
def rebuild_auth(self, request: Request, response: Session) -> None:
"""Rebuilds session authentication after a broken connecton or redirect.
Args:
request: the original authentication request.
response: the response from that request.
Returns:
None
"""
headers = request.headers
url = request.url
if "Authorization" in headers:
original_parsed = requests.utils.urlparse(response.request.url)
redirect_parsed = requests.utils.urlparse(url)
if (
(original_parsed.hostname != redirect_parsed.hostname)
and redirect_parsed.hostname != self.AUTH_HOST
and original_parsed.hostname != self.AUTH_HOST
):
del headers["Authorization"]
return
download_urls(session, urls, outdir='.', n_chunks=1000)
¶
Downloads GEDI HDF5 files to a local directory.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
session |
Session |
the Earthdta login session (created by gedi.EarthdataSession()). |
required |
urls |
list |
a list of remote files from |
required |
outdir |
str |
the output directory to store results. |
'.' |
n_chunks |
int |
the number of chunks to break the download into. |
1000 |
Returns:
Type | Description |
---|---|
None |
None. Files are downloaded locally. |
Source code in lazer/gedi.py
def download_urls(
session: Session, urls: list, outdir: str = ".", n_chunks: int = 1000
) -> None:
"""Downloads GEDI HDF5 files to a local directory.
Args:
session: the Earthdta login session (created by gedi.EarthdataSession()).
urls: a list of remote files from `gedi.search_bounds()`.
outdir: the output directory to store results.
n_chunks: the number of chunks to break the download into.
Returns:
None. Files are downloaded locally.
"""
# create the output directory
if not os.path.exists(outdir):
try:
os.makedirs(outdir)
except OSError:
print(f"Creation of directory {outdir} failed")
for url in tqdm(urls, desc="URLs"):
# get the remote file data
req = request.Request(url)
response = request.urlopen(req)
total = int(response.headers.get("content-length"))
# get the bytes to read for each chunk
chunks = [total // (n_chunks - 1)] * (n_chunks - 1)
chunks.append(total % (n_chunks - 1))
# write to the output file
basename = os.path.basename(url)
output_path = os.path.join(outdir, basename)
with open(output_path, "wb") as file:
for chunk in tqdm(chunks, desc=basename, leave=False):
file.write(response.read(chunk))
filter_urls_by_date(urls, start_date, end_date=None, timezone='America/Los_Angeles')
¶
Filters a list of remote file URLs to a range of dates.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
urls |
list |
a list of remote files from |
required |
start_date |
str |
the earliest date to include (in 'YYYY-MM-DD' format). |
required |
end_date |
str |
the latest date to include. Defaults to today if not set. |
None |
timezone |
str |
the timezone string to pass to |
'America/Los_Angeles' |
Returns:
Type | Description |
---|---|
list |
a list of input files filtered to only include the date range provided. |
Source code in lazer/gedi.py
def filter_urls_by_date(
urls: list,
start_date: str,
end_date: str = None,
timezone: str = "America/Los_Angeles",
) -> list:
"""Filters a list of remote file URLs to a range of dates.
Args:
urls: a list of remote files from `gedi.search_bounds()`.
start_date: the earliest date to include (in 'YYYY-MM-DD' format).
end_date: the latest date to include. Defaults to today if not set.
timezone: the timezone string to pass to `datetime.tzinfo()`.
Returns:
a list of input files filtered to only include the date range provided.
"""
timezone = tzinfo(timezone)
# format the dates
syear, smonth, sday = start_date.split("-")
start = datetime(int(syear), int(smonth), int(sday), tzinfo=timezone)
if end_date is None:
end = datetime.today(tzinfo=timezone)
else:
eyear, emonth, eday = end_date.split("-")
end = datetime(int(eyear), int(emonth), int(eday), tzinfo=timezone)
# filter within the date range
in_range = []
for url in urls:
file_date = url.split("/")[5]
fyear, fmonth, fday = file_date.split(".")
acquisition = datetime(int(fyear), int(fmonth), int(fday), tzinfo=timezone)
if start <= acquisition <= end:
in_range.append(url)
return in_range
search_bounds(session, product, bbox, version=1)
¶
Searches for GEDI data that intersects with a bounding box.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
session |
Session |
the Earthdta login session (created by gedi.EarthdataSession()). |
required |
product |
str |
the GEDI product to download. Options currently include [GEDI01_B, GEDI02_A, GEDI02_B]. |
required |
bbox |
list |
the [xmin, ymin, xmax, ymax] bounding box in lat/lon. |
required |
version |
int |
the version of the data product. Options currently include [1, 2]. |
1 |
Returns:
Type | Description |
---|---|
list |
None. Downloads data to disk. |
Source code in lazer/gedi.py
def search_bounds(session: Session, product: str, bbox: list, version: int = 1) -> list:
"""Searches for GEDI data that intersects with a bounding box.
Args:
session: the Earthdta login session (created by gedi.EarthdataSession()).
product: the GEDI product to download. Options currently include [GEDI01_B, GEDI02_A, GEDI02_B].
bbox: the [xmin, ymin, xmax, ymax] bounding box in lat/lon.
version: the version of the data product. Options currently include [1, 2].
Returns:
None. Downloads data to disk.
"""
# format the version string
version_str = f"{version:03d}"
# format the bbox into [ulx, uly, lrx, lry] format
gedi_bbox = str([bbox[3], bbox[0], bbox[1], bbox[2]])
# create the request url
parameters = {
"product": product,
"version": version_str,
"bbox": gedi_bbox,
"output": "json",
}
url = "?".join([DOWNLOAD_URL, parse.urlencode(parameters)])
# submit the request to list the files
response = session.get(url)
response_json = response.json()
file_list = response_json.get("data")
if len(file_list) == 0:
message = response_json.get("message")
_logger.warn(message)
return file_list