Skip to content

lazer.gedi

Tools for downloading and working with GEDI data.

EarthdataSession (Session)

Authentication guide: wiki.earthdata.nasa.gov/display/EL/How+To+Access+Data+With+Python

__init__(self, username=None, password=None) special

Creates a session for communicating with the NASA Earthdata API.

Create an account at urs.earthdata.nasa.gov/users/new. You can avoid passing your login credentials by setting the environment variables EARTHDATA_USER and EARTHDATA_PASS.

Parameters:

Name Type Description Default
username str

Earthdata username. If not set, the function will prompt you to enter.

None
password str

Earthdata password. If not set, the function will prompt you to enter.

None

Returns:

Type Description
Session

a requests session tracking activity to the Earthdata API.

Source code in lazer/gedi.py
def __init__(self, username: str = None, password: str = None) -> Session:
    """Creates a session for communicating with the NASA Earthdata API.

    Create an account at https://urs.earthdata.nasa.gov/users/new. You can
        avoid passing your login credentials by setting the environment
        variables EARTHDATA_USER and EARTHDATA_PASS.

    Args:
        username: Earthdata username. If not set, the function will prompt you to enter.
        password: Earthdata password. If not set, the function will prompt you to enter.

    Returns:
        a `requests` session tracking activity to the Earthdata API.
    """
    super().__init__()

    # get input
    if username is None:
        env_user = os.getenv("EARTHDATA_USER")
        if env_user is None:
            username = getpass("Earthdata username:")
        else:
            username = env_user

    if password is None:
        env_pass = os.getenv("EARTHDATA_PASS")
        if env_pass is None:
            password = getpass("Earthdata password:")
        else:
            password = env_pass

    self.auth = (username, password)

    # handle authentication via cookies
    cookie_jar = CookieJar()
    password_manager = request.HTTPPasswordMgrWithDefaultRealm()
    password_manager.add_password(None, self.AUTH_URL, username, password)

    self.opener = request.build_opener(
        request.HTTPBasicAuthHandler(password_manager),
        request.HTTPCookieProcessor(cookie_jar),
    )

    request.install_opener(self.opener)

    def rebuild_auth(self, request: Request, response: Session) -> None:
        """Rebuilds session authentication after a broken connecton or redirect.

        Args:
            request: the original authentication request.
            response: the response from that request.

        Returns:
            None
        """
        headers = request.headers
        url = request.url

        if "Authorization" in headers:

            original_parsed = requests.utils.urlparse(response.request.url)
            redirect_parsed = requests.utils.urlparse(url)

            if (
                (original_parsed.hostname != redirect_parsed.hostname)
                and redirect_parsed.hostname != self.AUTH_HOST
                and original_parsed.hostname != self.AUTH_HOST
            ):
                del headers["Authorization"]

        return

download_urls(session, urls, outdir='.', n_chunks=1000)

Downloads GEDI HDF5 files to a local directory.

Parameters:

Name Type Description Default
session Session

the Earthdta login session (created by gedi.EarthdataSession()).

required
urls list

a list of remote files from gedi.search_bounds().

required
outdir str

the output directory to store results.

'.'
n_chunks int

the number of chunks to break the download into.

1000

Returns:

Type Description
None

None. Files are downloaded locally.

Source code in lazer/gedi.py
def download_urls(
    session: Session, urls: list, outdir: str = ".", n_chunks: int = 1000
) -> None:
    """Downloads GEDI HDF5 files to a local directory.

    Args:
        session: the Earthdta login session (created by gedi.EarthdataSession()).
        urls: a list of remote files from `gedi.search_bounds()`.
        outdir: the output directory to store results.
        n_chunks: the number of chunks to break the download into.

    Returns:
        None. Files are downloaded locally.
    """
    # create the output directory
    if not os.path.exists(outdir):
        try:
            os.makedirs(outdir)
        except OSError:
            print(f"Creation of directory {outdir} failed")

    for url in tqdm(urls, desc="URLs"):

        # get the remote file data
        req = request.Request(url)
        response = request.urlopen(req)
        total = int(response.headers.get("content-length"))

        # get the bytes to read for each chunk
        chunks = [total // (n_chunks - 1)] * (n_chunks - 1)
        chunks.append(total % (n_chunks - 1))

        # write to the output file
        basename = os.path.basename(url)
        output_path = os.path.join(outdir, basename)
        with open(output_path, "wb") as file:
            for chunk in tqdm(chunks, desc=basename, leave=False):
                file.write(response.read(chunk))

filter_urls_by_date(urls, start_date, end_date=None, timezone='America/Los_Angeles')

Filters a list of remote file URLs to a range of dates.

Parameters:

Name Type Description Default
urls list

a list of remote files from gedi.search_bounds().

required
start_date str

the earliest date to include (in 'YYYY-MM-DD' format).

required
end_date str

the latest date to include. Defaults to today if not set.

None
timezone str

the timezone string to pass to datetime.tzinfo().

'America/Los_Angeles'

Returns:

Type Description
list

a list of input files filtered to only include the date range provided.

Source code in lazer/gedi.py
def filter_urls_by_date(
    urls: list,
    start_date: str,
    end_date: str = None,
    timezone: str = "America/Los_Angeles",
) -> list:
    """Filters a list of remote file URLs to a range of dates.

    Args:
        urls: a list of remote files from `gedi.search_bounds()`.
        start_date: the earliest date to include (in 'YYYY-MM-DD' format).
        end_date: the latest date to include. Defaults to today if not set.
        timezone: the timezone string to pass to `datetime.tzinfo()`.

    Returns:
        a list of input files filtered to only include the date range provided.
    """
    timezone = tzinfo(timezone)

    # format the dates
    syear, smonth, sday = start_date.split("-")
    start = datetime(int(syear), int(smonth), int(sday), tzinfo=timezone)

    if end_date is None:
        end = datetime.today(tzinfo=timezone)
    else:
        eyear, emonth, eday = end_date.split("-")
        end = datetime(int(eyear), int(emonth), int(eday), tzinfo=timezone)

    # filter within the date range
    in_range = []
    for url in urls:

        file_date = url.split("/")[5]
        fyear, fmonth, fday = file_date.split(".")
        acquisition = datetime(int(fyear), int(fmonth), int(fday), tzinfo=timezone)

        if start <= acquisition <= end:
            in_range.append(url)

    return in_range

search_bounds(session, product, bbox, version=1)

Searches for GEDI data that intersects with a bounding box.

Parameters:

Name Type Description Default
session Session

the Earthdta login session (created by gedi.EarthdataSession()).

required
product str

the GEDI product to download. Options currently include [GEDI01_B, GEDI02_A, GEDI02_B].

required
bbox list

the [xmin, ymin, xmax, ymax] bounding box in lat/lon.

required
version int

the version of the data product. Options currently include [1, 2].

1

Returns:

Type Description
list

None. Downloads data to disk.

Source code in lazer/gedi.py
def search_bounds(session: Session, product: str, bbox: list, version: int = 1) -> list:
    """Searches for GEDI data that intersects with a bounding box.

    Args:
        session: the Earthdta login session (created by gedi.EarthdataSession()).
        product: the GEDI product to download. Options currently include [GEDI01_B, GEDI02_A, GEDI02_B].
        bbox: the [xmin, ymin, xmax, ymax] bounding box in lat/lon.
        version: the version of the data product. Options currently include [1, 2].

    Returns:
        None. Downloads data to disk.
    """

    # format the version string
    version_str = f"{version:03d}"

    # format the bbox into [ulx, uly, lrx, lry] format
    gedi_bbox = str([bbox[3], bbox[0], bbox[1], bbox[2]])

    # create the request url
    parameters = {
        "product": product,
        "version": version_str,
        "bbox": gedi_bbox,
        "output": "json",
    }
    url = "?".join([DOWNLOAD_URL, parse.urlencode(parameters)])

    # submit the request to list the files
    response = session.get(url)
    response_json = response.json()
    file_list = response_json.get("data")

    if len(file_list) == 0:
        message = response_json.get("message")
        _logger.warn(message)

    return file_list
Back to top