Oct 22, 2020
Two case studies:
Initialize our packages:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
%matplotlib inline
import intake
import xarray as xr
import cartopy.crs as ccrs
import hvplot.xarray
import hvplot.pandas
import holoviews as hv
import geoviews as gv
from geoviews.tile_sources import EsriImagery
hv.extension('bokeh')
Create the intake data catalog so we can load our datasets:
url = 'https://raw.githubusercontent.com/pyviz-topics/EarthML/master/examples/catalog.yml'
cat = intake.open_catalog(url)
band_info = pd.DataFrame([
(1, "Aerosol", " 0.43 - 0.45", 0.440, "30", "Coastal aerosol"),
(2, "Blue", " 0.45 - 0.51", 0.480, "30", "Blue"),
(3, "Green", " 0.53 - 0.59", 0.560, "30", "Green"),
(4, "Red", " 0.64 - 0.67", 0.655, "30", "Red"),
(5, "NIR", " 0.85 - 0.88", 0.865, "30", "Near Infrared (NIR)"),
(6, "SWIR1", " 1.57 - 1.65", 1.610, "30", "Shortwave Infrared (SWIR) 1"),
(7, "SWIR2", " 2.11 - 2.29", 2.200, "30", "Shortwave Infrared (SWIR) 2"),
(8, "Panc", " 0.50 - 0.68", 0.590, "15", "Panchromatic"),
(9, "Cirrus", " 1.36 - 1.38", 1.370, "30", "Cirrus"),
(10, "TIRS1", "10.60 - 11.19", 10.895, "100 * (30)", "Thermal Infrared (TIRS) 1"),
(11, "TIRS2", "11.50 - 12.51", 12.005, "100 * (30)", "Thermal Infrared (TIRS) 2")],
columns=['Band', 'Name', 'Wavelength Range (µm)', 'Nominal Wavelength (µm)', 'Resolution (m)', 'Description']).set_index(["Band"])
band_info
We'll be downloading publicly available Landsat data from Google Cloud Storage
See: https://cloud.google.com/storage/docs/public-datasets/landsat
The relevant information is stored in our intake catalog:
From our catalog.yml file:
google_landsat_band:
description: Landsat bands from Google Cloud Storage
driver: rasterio
parameters:
path:
description: landsat path
type: int
row:
description: landsat row
type: int
product_id:
description: landsat file id
type: str
band:
description: band
type: int
args:
urlpath: https://storage.googleapis.com/gcp-public-data-landsat/LC08/01/{{ '%03d' % path }}/{{ '%03d' % row }}/{{ product_id }}/{{ product_id }}_B{{ band }}.TIF
chunks:
band: 1
x: 256
y: 256
From the "urlpath" above, you can see we need "path", "row", and "product_id" variables to properly identify a Landsat image:
The path and row corresponding to the area over Philadelphia has already been selected using the Earth Explorer. This tool was also used to find the id of the particular date of interest using the same tool.
# Necessary variables
path = 14
row = 32
product_id = 'LC08_L1TP_014032_20160727_20170222_01_T1'
This will return a specific Landsat band as a dask array.
from random import random
from time import sleep
def get_band_with_exponential_backoff(path, row, product_id, band, maximum_backoff=32):
"""
Google Cloud Storage recommends using exponential backoff
when accessing the API.
https://cloud.google.com/storage/docs/exponential-backoff
"""
n = backoff = 0
while backoff < maximum_backoff:
try:
return cat.google_landsat_band(
path=path, row=row, product_id=product_id, band=band
).to_dask()
except:
backoff = min(2 ** n + random(), maximum_backoff)
sleep(backoff)
n += 1
Loop over each band, load that band using the above function, and then concatenate the results together..
bands = [1, 2, 3, 4, 5, 6, 7, 9, 10, 11]
datasets = []
for band in bands:
da = get_band_with_exponential_backoff(
path=path, row=row, product_id=product_id, band=band
)
da = da.assign_coords(band=[band])
datasets.append(da)
ds = xr.concat(datasets, "band", compat="identical")
ds
CRS is EPSG=32618
def load_google_landsat_metadata(path, row, product_id):
"""
Load Landsat metadata for path, row, product_id from Google Cloud Storage
"""
def parse_type(x):
try:
return eval(x)
except:
return x
baseurl = "https://storage.googleapis.com/gcp-public-data-landsat/LC08/01"
suffix = f"{path:03d}/{row:03d}/{product_id}/{product_id}_MTL.txt"
df = intake.open_csv(
urlpath=f"{baseurl}/{suffix}",
csv_kwargs={
"sep": "=",
"header": None,
"names": ["index", "value"],
"skiprows": 2,
"converters": {"index": (lambda x: x.strip()), "value": parse_type},
},
).read()
metadata = df.set_index("index")["value"]
return metadata
metadata = load_google_landsat_metadata(path, row, product_id)
metadata.head(n=10)
slice()
function discussed last lecture# Load the GeoJSON from the URL
url = "http://data.phl.opendata.arcgis.com/datasets/405ec3da942d4e20869d4e1449a2be48_0.geojson"
city_limits = gpd.read_file(url)
# Convert to the right CRS for this data
city_limits = city_limits.to_crs(epsg=32618)
y
coordinates are in descending order, so you'll slice will need to be in descending order as well# Look up the order of xmin/xmax and ymin/ymax
city_limits.total_bounds?
# Get x/y range of city limits from "total_bounds"
xmin, ymin, xmax, ymax = city_limits.total_bounds
# Slice our xarray data
subset = ds.sel(
x=slice(xmin, xmax),
y=slice(ymax, ymin)
) # NOTE: y coordinate system is in descending order!
subset = subset.compute()
subset
# Original data was about 8000 x 8000 in size
ds.shape
# Sliced data is only about 1000 x 1000 in size!
subset.shape
# City limits plot
limits_plot = city_limits.hvplot.polygons(
line_color="white", alpha=0, line_alpha=1, crs=32618, geo=True
)
# Landsat plot
landsat_plot = subset.sel(band=3).hvplot.image(
x="x",
y="y",
rasterize=True,
cmap="viridis",
frame_width=500,
frame_height=500,
geo=True,
crs=32618,
)
# Combined
landsat_plot * limits_plot
Remember, NDVI = (NIR - Red) / (NIR + Red)
band_info.head()
NIR is band 5 and Red is band 4
# Selet the bands we need
NIR = subset.sel(band=5)
RED = subset.sel(band=4)
# Calculate the NDVI
NDVI = (NIR - RED) / (NIR + RED)
NDVI = NDVI.where(NDVI < np.inf)
NDVI.head()
# NEW: Use datashader to plot the NDVI
p = NDVI.hvplot.image(
x="x",
y="y",
geo=True,
crs=32618,
datashade=True, # NEW: USE DATASHADER
project=True, # NEW: PROJECT THE DATA BEFORE PLOTTING
frame_height=500,
frame_width=500,
cmap="viridis",
)
# NEW: Use a transparent rasterized version of the plot for hover text
# No hover text available with datashaded images so we can use the rasterized version
p_hover = NDVI.hvplot(
x="x",
y="y",
crs=32618,
geo=True,
rasterize=True,
frame_height=500,
frame_width=500,
alpha=0, # SET ALPHA=0 TO MAKE THIS TRANSPARENT
colorbar=False,
)
# City limits
limits_plot = city_limits.hvplot.polygons(
geo=True, crs=32618, line_color="white", line_width=2, alpha=0, line_alpha=1
)
p * p_hover * limits_plot
datashade=True
keywordfrom rio_toa import brightness_temp, toa_utils
def land_surface_temp(BT, w, NDVI):
"""Calculate land surface temperature of Landsat 8
temp = BT/1 + w * (BT /p) * ln(e)
BT = At Satellite temperature (brightness)
w = wavelength of emitted radiance (μm)
where p = h * c / s (1.439e-2 mK)
h = Planck's constant (Js)
s = Boltzmann constant (J/K)
c = velocity of light (m/s)
"""
h = 6.626e-34
s = 1.38e-23
c = 2.998e8
p = (h * c / s) * 1e6
Pv = (NDVI - NDVI.min() / NDVI.max() - NDVI.min()) ** 2
e = 0.004 * Pv + 0.986
return BT / 1 + w * (BT / p) * np.log(e)
def land_surface_temp_for_band(band):
"""
Utility function to get land surface temperature for a specific band
"""
# params from general Landsat info
w = band_info.loc[band]["Nominal Wavelength (µm)"]
# params from specific Landsat data text file for these images
ML = metadata[f"RADIANCE_MULT_BAND_{band}"]
AL = metadata[f"RADIANCE_ADD_BAND_{band}"]
K1 = metadata[f"K1_CONSTANT_BAND_{band}"]
K2 = metadata[f"K2_CONSTANT_BAND_{band}"]
at_satellite_temp = brightness_temp.brightness_temp(
subset.sel(band=band).values, ML, AL, K1, K2
)
return land_surface_temp(at_satellite_temp, w, NDVI)
# temperature for band 10
band = 10
temp_10 = land_surface_temp_for_band(band).compute()
# convert to Farenheit
temp_10_f = toa_utils.temp_rescale(temp_10, 'F')
# Save the numpy array as an xarray
band_10 = subset.sel(band=band).copy(data=temp_10_f)
# Get the land surface temp
band = 11
temp_11 = land_surface_temp_for_band(band).compute()
# Convert to Farenheit
temp_11_f = toa_utils.temp_rescale(temp_11, "F")
# Save as an xarray DataArray
band_11 = subset.sel(band=band).copy(data=temp_11_f)
# plot for band 10
plot_10 = band_10.hvplot.image(
x="x",
y="y",
rasterize=True,
cmap="fire_r",
crs=32618,
geo=True,
frame_height=400,
frame_width=400,
title="Band 10",
)
# plot for band 11
plot_11 = band_11.hvplot.image(
x="x",
y="y",
rasterize=True,
cmap="fire_r",
crs=32618,
geo=True,
frame_height=400,
frame_width=400,
title="Band 11",
)
plot_10 + plot_11
# Let's average the results of these two bands
mean_temp_f = (band_10 + band_11) / 2
# IMPORTANT: copy over the metadata
mean_temp_f.attrs = band_10.attrs
mean_temp_f
p = mean_temp_f.hvplot.image(
x="x",
y="y",
rasterize=True,
crs=32618,
geo=True,
frame_width=600,
frame_height=600,
cmap="rainbow",
alpha=0.5,
legend=False,
title="Mean Surface Temp (F)"
)
img = p * EsriImagery
img
Key insight: color of the building roof plays a very important role in urban heat islands
We can use the .where()
function in xarray to identify the heat islands across the city
To do:
where()
function takes a boolean array where each pixel is True/False based on the selection criteriahot_spots = mean_temp_f.where(mean_temp_f > 90)
hot_spots_plot = hot_spots.hvplot.image(
x="x",
y="y",
cmap="fire",
crs=32618,
geo=True,
frame_height=500,
frame_width=500,
colorbar=False,
legend=False,
rasterize=True,
title="Mean Temp (F) > 90",
alpha=0.7
)
hot_spots_plot * EsriImagery
Let's select things less than 75 degrees as well...
cold_spots = mean_temp_f.where(mean_temp_f < 75)
cold_spots_plot = cold_spots.hvplot.image(
x="x",
y="y",
cmap="fire",
geo=True,
crs=32618,
frame_height=500,
frame_width=500,
colorbar=False,
legend=False,
rasterize=True,
title="Mean Temp (F) < 75",
alpha=0.5
)
cold_spots_plot * EsriImagery
In this section, we'll use zonal statistics to calculate the average temperatures for neighborhoods and parks, and compare this to the number of street trees in the city.
.tif
fileimport rioxarray
# Save to a raster GeoTIFF file!
mean_temp_f.rio.to_raster("./mean_temp_f.tif")
./data/parks.geojson
rasterstats
package to calculate the zonal statsfrom rasterstats import zonal_stats
import rasterio as rio
# Open the GeoTIFF file using rasterio
f = rio.open('./mean_temp_f.tif')
f
# Load the parks
parks = gpd.read_file("./data/parks.geojson")
# Convert to the CRS from the mean temp file
CRS = f.crs.data
print(CRS)
parks = parks.to_crs(CRS['init'])
parks.head()
# Use zonal_stats to get the mean temperature values within each park
# FIRST ARGUMENT: vector polygons
# SECOND ARGUMENT: mean temperature raster data
park_stats = zonal_stats(parks, mean_temp_f.values, affine=f.transform, stats=['mean'])
# Store the mean temp back in the original parks dataframe
parks['mean_temp'] = [s['mean'] for s in park_stats]
parks.head()
parks['mean_temp'].values
# Create our figure and axes
fig, ax = plt.subplots(figsize=(8,6))
# Citywide mean temp
citywide_mean_temp = np.nanmean(mean_temp_f) # use nanmean() to skip NaN pixels automatically!
# Subtract out city wide median
# NOTE: the ".values" here will grab the numpy array from the pandas dataframe
park_temps = parks['mean_temp'].values - citywide_mean_temp
# Plot
ax.hist(park_temps, bins='auto')
# Add a vertical line at x=0
ax.axvline(x=0, lw=2, color='black')
# Format
ax.set_xlabel("Difference from Citywide Mean Temp (Degrees F)", fontsize=18)
ax.set_ylabel("Number of Parks", fontsize=18);
# Load the neighborhoods
url = "https://github.com/azavea/geo-data/raw/master/Neighborhoods_Philadelphia/Neighborhoods_Philadelphia.geojson"
hoods = gpd.read_file(url)
f.crs.data
# Convert to the CRS of the Landsat data
hoods = hoods.to_crs(f.crs.data["init"])
# Get the mean temp values within the neighborhoods
# FIRST ARGUMENT: vector polygons
# SECOND ARGUMENT: raster temp data
stats = zonal_stats(hoods, mean_temp_f.values, affine=f.transform, stats=['mean'])
hoods['mean_temp'] = [s['mean'] for s in stats]
hoods.head()
# Initialize the figure/axes
fig, ax = plt.subplots(figsize=(10,10))
# Make the choropleth map
hoods.plot(cmap='viridis', column='mean_temp', ax=ax)
# Format
ax.set_axis_off()
ax.set_aspect("equal")
import altair as alt
# Plot interactive map
# IMPORTANT: remember you need to convert the input data to EPSG=4326 for altair
meanT_chart = (
alt.Chart(hoods.to_crs(epsg=4326))
.mark_geoshape(stroke="#444444")
.properties(
width=400,
height=450,
title="Mean Neighborhood Temperatures in Philadelphia",
)
.encode(
tooltip=[
alt.Tooltip("mean_temp:Q", format=".1f", title="Mean Temp (℉)"),
alt.Tooltip("listname:N", title="Neighborhood"),
],
color=alt.Color(
"mean_temp:Q",
scale=alt.Scale(scheme="viridis"),
legend=alt.Legend(orient="bottom", title="Mean Temp (℉)"),
),
)
.configure(background="white")
)
meanT_chart
sjoin()
function from geopandas to join the tree dataset to the neighborhoods dataset and then groupby the neighborhoods to find the number of trees per neighborhood# Load the street trees inventory
url = "http://data.phl.opendata.arcgis.com/datasets/957f032f9c874327a1ad800abd887d17_0.geojson"
trees = gpd.read_file(url)
trees.head()
# Conver to the CRS of the Landsat data
trees = trees.to_crs(f.crs.data['init'])
# Add the neighborhood name to the trees dataset
trees = gpd.sjoin(trees, hoods, op='within')
trees.head()
# Calculate the number of trees per neighborhood
trees_per_hood = trees.groupby('listname').size().reset_index(name='trees_per_hood')
trees_per_hood.head()
# Merge the hood geometries back in
hoods = hoods.merge(trees_per_hood, on='listname')
# Initialize the figure
fig, ax = plt.subplots(figsize=(10,10))
# Plot
hoods.to_crs(epsg=3857).plot(cmap='viridis', column='trees_per_hood', ax=ax)
# Format
ax.set_axis_off()
ax.set_aspect("equal")
# plot map, remembering to convert to EPSG=4326 first
ntrees_chart = (
alt.Chart(hoods.to_crs(epsg=4326))
.mark_geoshape(stroke="#444444")
.properties(
width=400,
height=450,
title="The Distribution of Street Trees in Philadelphia",
)
.encode(
tooltip=[
alt.Tooltip(f"trees_per_hood:Q", format=".1f", title="# Trees"),
alt.Tooltip("listname:N", title="Neighborhood"),
],
color=alt.Color(
f"trees_per_hood:Q",
scale=alt.Scale(scheme="viridis"),
legend=alt.Legend(orient="bottom", title="# Trees"),
),
)
)
ntrees_chart
kdeplot()
function to plot the relationship between the number of trees per neighborhood and the mean temperature per neighborhoodmerge()
the relevant data frames together based on the neighborhood nameWhat are the major trends? Do you see the relationship you expect?"
import seaborn as sns
fig, ax = plt.subplots(figsize=(8,6))
# plot
sns.kdeplot(hoods['trees_per_hood'], hoods['mean_temp'], shade=True, ax=ax)
# format
ax.set_xlabel("Number of Trees per Neighborhood", fontsize=18)
ax.set_ylabel("Mean Temperature per Neighborhood", fontsize=18);