Land Cover Classification with Python

!pip install rasterio

Collecting rasterio
  Downloading rasterio-1.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Requirement already satisfied: attrs in /usr/local/lib/python3.10/dist-packages (from rasterio) (24.2.0)
Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from rasterio) (2024.8.30)
Requirement already satisfied: click>=4.0 in /usr/local/lib/python3.10/dist-packages (from rasterio) (8.1.7)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Requirement already satisfied: numpy>=1.24 in /usr/local/lib/python3.10/dist-packages (from rasterio) (1.26.4)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl.metadata (6.4 kB)
Requirement already satisfied: pyparsing in /usr/local/lib/python3.10/dist-packages (from rasterio) (3.2.0)
Downloading rasterio-1.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.2 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 22.2/22.2 MB 56.2 MB/s eta 0:00:00
Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Installing collected packages: cligj, click-plugins, affine, rasterio
Successfully installed affine-2.4.0 click-plugins-1.1.1 cligj-0.7.2 rasterio-1.4.3

import geopandas as gpd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
import rasterio as rio
from rasterio.enums import Resampling
import json
import pandas as pd
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as mpatches
from PIL import ImageColor
import skimage as ski
from skimage.exposure import rescale_intensity
import scipy
from rasterio.features import shapes

# Location of data
lc_dir = '/content/data/lc.json'
sample_dir = "/content/data/Sample_LC_v1.geojson"
palsar_dir = "https://storage.googleapis.com/gee-ramiqcom-bucket/youtube/lc_python/Palsar_Kerinci_2023_v1.tif"
landsat_dir = "https://storage.googleapis.com/gee-ramiqcom-bucket/youtube/lc_python/Landsat_Kerinci_2023_v1.tif"

# Load Land Cover Parameter
lc = json.load(open(lc_dir))
lc_df = pd.DataFrame(lc)
lc_df["values_normalize"] = lc_df.index + 1
lc_df["palette"] = "#" + lc_df["palette"]

# Mapping from old to new values
values = lc_df["values"].to_list()
values_norm = lc_df["values_normalize"].to_list()
palette = lc_df["palette"].to_list()
labels = lc_df["label"].to_list()
dict_values = {}
dict_label = {}
dict_palette = {}
dict_palette_hex = {}
for x in range(0, len(values)):
    dict_values[values[x]] = values_norm[x]
    dict_label[values_norm[x]] = labels[x]
    dict_palette[values_norm[x]] = ImageColor.getrgb(palette[x])
    dict_palette_hex[values_norm[x]] = palette[x]

# Create colormap from values and palette
cmap = ListedColormap(palette)

# Patches legend
patches = [
    mpatches.Patch(color=palette[i], label=labels[i]) for i in range(len(values))
]
legend = {
    "handles": patches,
    "bbox_to_anchor": (1.05, 1),
    "loc": 2,
    "borderaxespad": 0.0,
}

lc_df

	palette	values	label	values_normalize
0	#006400	2001	Primary dryland forest	1
1	#228B22	2002	Secondary dryland forest	2
2	#4B0082	2004	Primary mangrove forest	3
3	#808000	2005	Primary swamp forest	4
4	#32CD32	2006	Plantation forest	5
5	#90EE90	2007	Dry shrub	6
6	#FF4500	2010	Estate crop	7
7	#F08080	2012	Settlement	8
8	#D2B48C	2014	Bare ground	9
9	#ADFF2F	3000	Savanna and grasses	10
10	#87CEFA	5001	Open water	11
11	#6A5ACD	20041	Secondary mangrove forest	12
12	#6B8E23	20051	Secondary swamp forest	13
13	#66CDAA	20071	Wet shrub	14
14	#FFD700	20091	Pure dry agriculture	15
15	#FFA500	20092	Mixed dry agriculture	16
16	#008080	20093	Paddy field	17
17	#E6E6FA	20094	Fish pond/aquaculture	18
18	#B22222	20121	Port or harbour	19
19	#C71585	20122	Transmigration areas	20
20	#A0522D	20141	Mining	21
21	#7FFFD4	50011	Swamp	22

# Load sample
sample = gpd.read_file(sample_dir)
sample["value"] = sample["lc"].map(dict_values)
sample["label"] = sample["value"].map(dict_label)

# Plot sample
sample.plot(column="value", cmap=cmap, markersize=1)
plt.legend(**legend)

# Sample with extract
sample_extract = sample.copy()
coords = [
    (x, y) for x, y in zip(sample_extract["geometry"].x, sample_extract["geometry"].y)
]
print(sample_extract.shape)

(14029, 5)

# Load landsat image
landsat = rio.open(landsat_dir)
landsat_image = landsat.read() / 1e4

# False color composite
out_range = (0, 1)
red = rescale_intensity(landsat_image[4], in_range=(0.1, 0.4), out_range=out_range)
green = rescale_intensity(landsat_image[5], in_range=(0.05, 0.3), out_range=out_range)
blue = rescale_intensity(landsat_image[6], in_range=(0.025, 0.25), out_range=out_range)
arr_image = np.stack(
    [red, green, blue]
).T
composite = np.rot90(np.flip(arr_image, 1), 1)

# Plot landsat image
plt.imshow(composite)

# Extract raster value
landsat_extract = np.stack(
    [x for x in landsat.sample(coords)]
) / 1e4
sample_extract[["B1", "B2", "B3", "B4", "B5", "B6", "B7", "B9"]] = landsat_extract
sample_extract

	id	lc	geometry	value	label	B1	B2	B3	B4	B5	B6	B7	B9
0	0	2001	POINT (101.67155 -1.86099)	1	Primary dryland forest	0.0091	0.0136	0.0321	0.0205	0.2243	0.0994	0.0408	0.0019
1	1	2001	POINT (101.43871 -2.23694)	1	Primary dryland forest	0.0101	0.0159	0.0439	0.0292	0.3572	0.1479	0.0560	0.0027
2	2	2001	POINT (101.49341 -1.77152)	1	Primary dryland forest	0.0205	0.0223	0.0389	0.0273	0.2772	0.1287	0.0554	0.0033
3	3	2001	POINT (101.75132 -2.02161)	1	Primary dryland forest	0.0058	0.0133	0.0385	0.0247	0.2979	0.1405	0.0579	0.0027
4	4	2001	POINT (101.1242 -1.96125)	1	Primary dryland forest	0.0174	0.0209	0.0400	0.0252	0.3285	0.1337	0.0543	0.0024
...	...	...	...	...	...	...	...	...	...	...	...	...	...
14024	14024	50011	POINT (101.32983 -1.83297)	22	Swamp	0.0187	0.0235	0.0419	0.0425	0.1372	0.1336	0.0820	0.0026
14025	14025	50011	POINT (101.33306 -1.82461)	22	Swamp	0.0205	0.0240	0.0407	0.0496	0.1509	0.1315	0.0788	0.0027
14026	14026	50011	POINT (101.33064 -1.8362)	22	Swamp	0.0239	0.0278	0.0546	0.0443	0.2668	0.1430	0.0719	0.0023
14027	14027	50011	POINT (101.30207 -1.80063)	22	Swamp	0.0315	0.0407	0.0621	0.0588	0.2338	0.1386	0.0839	0.0031
14028	14028	50011	POINT (101.32363 -1.8362)	22	Swamp	0.0213	0.0251	0.0450	0.0389	0.1899	0.1339	0.0697	0.0021

14029 rows × 13 columns

# Load palsar image
palsar = rio.open(palsar_dir)
palsar_image = palsar.read(
    out_shape=(palsar.count, landsat_image.shape[1], landsat_image.shape[2]),
    resampling=Resampling.bilinear,
) / 1e3

# Plot palsar image
plt.imshow(palsar_image[0], cmap="gray", vmin=-15, vmax=-5)

# Extract raster value
palsar_extract = np.stack([x for x in palsar.sample(coords)]) / 1e3
sample_extract[["HH", "HV"]] = palsar_extract
sample_extract

	id	lc	geometry	value	label	B1	B2	B3	B4	B5	B6	B7	B9	HH	HV
0	0	2001	POINT (101.67155 -1.86099)	1	Primary dryland forest	0.0091	0.0136	0.0321	0.0205	0.2243	0.0994	0.0408	0.0019	-7.608	-11.281
1	1	2001	POINT (101.43871 -2.23694)	1	Primary dryland forest	0.0101	0.0159	0.0439	0.0292	0.3572	0.1479	0.0560	0.0027	-7.931	-11.993
2	2	2001	POINT (101.49341 -1.77152)	1	Primary dryland forest	0.0205	0.0223	0.0389	0.0273	0.2772	0.1287	0.0554	0.0033	-7.901	-11.999
3	3	2001	POINT (101.75132 -2.02161)	1	Primary dryland forest	0.0058	0.0133	0.0385	0.0247	0.2979	0.1405	0.0579	0.0027	-7.223	-11.135
4	4	2001	POINT (101.1242 -1.96125)	1	Primary dryland forest	0.0174	0.0209	0.0400	0.0252	0.3285	0.1337	0.0543	0.0024	-8.136	-12.624
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
14024	14024	50011	POINT (101.32983 -1.83297)	22	Swamp	0.0187	0.0235	0.0419	0.0425	0.1372	0.1336	0.0820	0.0026	-12.472	-19.294
14025	14025	50011	POINT (101.33306 -1.82461)	22	Swamp	0.0205	0.0240	0.0407	0.0496	0.1509	0.1315	0.0788	0.0027	-11.963	-19.554
14026	14026	50011	POINT (101.33064 -1.8362)	22	Swamp	0.0239	0.0278	0.0546	0.0443	0.2668	0.1430	0.0719	0.0023	-9.416	-15.761
14027	14027	50011	POINT (101.30207 -1.80063)	22	Swamp	0.0315	0.0407	0.0621	0.0588	0.2338	0.1386	0.0839	0.0031	-10.746	-18.832
14028	14028	50011	POINT (101.32363 -1.8362)	22	Swamp	0.0213	0.0251	0.0450	0.0389	0.1899	0.1339	0.0697	0.0021	-9.975	-15.855

14029 rows × 15 columns

# Split sample to train and test
seeds = 2
train, test = train_test_split(sample_extract, train_size=0.7, random_state=seeds)
print(f'Train size: {len(train)}\nTest size: {len(test)}')

Train size: 9820
Test size: 4209

# Make random forest model
predictors = ["B2", "B3", 'B4', 'B5', 'B6', 'B7', 'HH', 'HV']
model = RandomForestClassifier(100)
model.fit(
    train[predictors],
    train["value"]
)

RandomForestClassifier()

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

# Test model
test_apply = model.predict(test[predictors])

# Confusion matrix
cm = confusion_matrix(test['value'], test_apply)
display = ConfusionMatrixDisplay(cm)
display.plot()

# Report
report = classification_report(test['value'], test_apply)
print(report)

              precision    recall  f1-score   support

           1       0.53      0.74      0.62       298
           2       0.43      0.43      0.43       309
           5       0.53      0.75      0.62       322
           6       0.22      0.11      0.14       293
           7       0.51      0.61      0.56       280
           8       0.59      0.56      0.58       303
           9       0.42      0.35      0.38       321
          11       0.95      0.88      0.91       295
          14       0.57      0.61      0.59       303
          15       0.28      0.20      0.24       291
          16       0.28      0.22      0.25       289
          17       0.65      0.68      0.66       297
          20       0.70      0.74      0.72       308
          21       0.33      0.14      0.20         7
          22       0.82      0.84      0.83       293

    accuracy                           0.55      4209
   macro avg       0.52      0.53      0.51      4209
weighted avg       0.53      0.55      0.54      4209

# Load image
combine_image = np.concatenate((landsat_image, palsar_image), 0)
image_transpose = combine_image.T
transpose_shape = image_transpose.shape
table_image = pd.DataFrame(
    image_transpose.reshape(-1, transpose_shape[2]),
    columns=["B1", "B2", "B3", "B4", "B5", "B6", "B7", "B9", "HH", "HV"],
)
table_image

	B1	B2	B3	B4	B5	B6	B7	B9	HH	HV
0	0.0151	0.0165	0.0329	0.0213	0.2570	0.1103	0.0428	0.0023	-7.973	-12.063
1	0.0166	0.0184	0.0335	0.0215	0.2700	0.1119	0.0458	0.0025	-8.477	-12.280
2	0.0188	0.0198	0.0384	0.0216	0.3084	0.1288	0.0518	0.0026	-8.904	-12.676
3	0.0204	0.0221	0.0420	0.0241	0.3381	0.1348	0.0528	0.0025	-8.938	-13.058
4	0.0210	0.0231	0.0476	0.0274	0.3625	0.1518	0.0588	0.0024	-8.600	-13.174
...	...	...	...	...	...	...	...	...	...	...
7405687	0.0098	0.0139	0.0331	0.0209	0.2861	0.1183	0.0460	0.0026	-7.839	-11.874
7405688	0.0091	0.0137	0.0331	0.0202	0.2860	0.1191	0.0452	0.0027	-7.465	-11.422
7405689	0.0091	0.0136	0.0337	0.0208	0.2934	0.1197	0.0465	0.0027	-7.270	-11.149
7405690	0.0107	0.0146	0.0353	0.0207	0.2875	0.1156	0.0434	0.0027	-6.970	-11.184
7405691	0.0110	0.0146	0.0331	0.0205	0.2601	0.1142	0.0434	0.0026	-7.012	-11.148

7405692 rows × 10 columns

# Predict table image
prediction = model.predict(table_image[predictors])
prediction

array([1, 1, 1, ..., 1, 1, 1])

# Prediction to image again
prediction_image = np.rot90(np.flip(prediction.reshape(transpose_shape[0], transpose_shape[1]), 1), 1)

# Show to plot
plt.figure(figsize=(10, 10))
plt.imshow(prediction_image, cmap=cmap, interpolation="nearest")
plt.legend(**legend)

# Save image to geotiff
output = rio.open(
    "data/LULC.tif",
    "w",
    "COG",
    count=1,
    width=prediction_image.shape[1],
    height=prediction_image.shape[0],
    crs=landsat.crs,
    transform=landsat.transform,
    dtype="uint8",
    nodata=0,
    compress="lzw",
    resampling="mode",
    tiled=True,
)
output.write_colormap(1, dict_palette)
output.write(prediction_image, 1)
output.close()

# Image segmentation

# Do uniform filter to composite image
seed_image = composite
plt.figure(figsize=(5, 5))
plt.imshow(seed_image)

# Segmentation
segment = ski.segmentation.slic(
    seed_image, n_segments=10000, compactness=5, sigma=5
)
plt.figure(figsize=(5, 5))
plt.imshow(ski.segmentation.mark_boundaries(composite, segment, outline_color=(0, 255, 255)))

# Get the mode of each segment
segment_unique = np.unique(segment)
lc_segment = segment.copy()
for x in segment_unique:
    lc_segment[segment == x] = scipy.stats.mode(prediction_image[segment == x]).mode
lc_segment

array([[1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       ...,
       [7, 7, 7, ..., 1, 1, 1],
       [7, 7, 7, ..., 1, 1, 1],
       [7, 7, 7, ..., 1, 1, 1]])

# Show to plot the segmented LC
plt.figure(figsize=(10, 10))
plt.imshow(lc_segment, cmap=cmap, interpolation="nearest")
plt.legend(**legend)

# Save image to geotiff
output = rio.open(
    "data/LULC_Segment.tif",
    "w",
    "COG",
    count=1,
    width=lc_segment.shape[1],
    height=lc_segment.shape[0],
    crs=landsat.crs,
    transform=landsat.transform,
    dtype="uint8",
    nodata=0,
    compress="lzw",
    resampling="mode",
    tiled=True,
)
output.write_colormap(1, dict_palette)
output.write(lc_segment, 1)
output.close()

# Convert raster to shapefile
lc_vector = [{ "type": "Feature", "properties": { "lc": x[1] }, "geometry": x[0] } for x in shapes(lc_segment.astype('uint8'), transform=landsat.transform)]
lc_vector = json.dumps({
    "type": "FeatureCollection",
    "properties": {},
    "features": lc_vector
})

# Read as geodataframe
lc_df = gpd.read_file(lc_vector, driver='GeoJSON')

# Plot it
lc_df.plot(column="lc", cmap=cmap)
plt.legend(**legend)

# Add another column such as palette and label
lc_df["palette"] = lc_df["lc"].map(dict_palette_hex)
lc_df["label"] = lc_df["lc"].map(dict_label)

# Save the file
lc_df.to_file("data/LULC_Shapefile.shp")