Programming for beginners: A Beginner's Guide to heatmap in Pyplot

Heatmap represents individual values of data in different colors. Sing heat map, we can clearly understand which areas have high values and which areas have low values.

For example, consider following companies sales data.

Company	Jan	Feb	Mar
A	1000	1100	1900
B	1500	1200	1400
C	1200	1100	800

Above snippet generates below heat map.

Using the above figure, we can clearly identifies the yellow color box has the maximum value.

Find the below working application.

hello_world.py

import matplotlib.pyplot as plt
import pandas as pd

import pandas as pd

# Sample data
data = {
    'Company': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
    'Month': ['Jan', 'Jan', 'Jan', 'Feb', 'Feb', 'Feb', 'Mar', 'Mar', 'Mar'],
    'Sales': [1000, 1500, 1200, 1100, 1200, 1100, 1900, 1400, 800]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Pivot the data for heatmap
heatmap_data = df.pivot(index='Company', columns='Month', values='Sales')

# Create a heatmap using imshow
plt.imshow(heatmap_data, cmap='viridis', interpolation='nearest')

# Add color bar for reference
plt.colorbar()

# Add labels and title
plt.xticks(range(len(heatmap_data.columns)), heatmap_data.columns)
plt.yticks(range(len(heatmap_data.index)), heatmap_data.index)
plt.xlabel('Month')
plt.ylabel('Company')
plt.title('Sales Heatmap')

# Display the heatmap
plt.show()

In this code, I created the sales data and created a dataframe ‘df’ from it. Then, I used the pivot() function to reshape the data into a suitable format for a heatmap.

Month Feb Jan Mar

Company

A 1100 1000 1900

B 1200 1500 1400

C 1100 1200 800

The resulting heatmap_data DataFrame contains sales values as rows (companies) and columns (months).

Finally I used pyplot.imshow() method to create the heatmap, specifying the 'viridis' colormap and interpolation method.

‘plt.colorbar()’ method adds a color bar for reference.

Add values in the heatmap cells

Using text annotations to the heatmap, we can display the actual values in the cells of the heatmap.

Example

# Add text annotations to display values
for i in range(len(heatmap_data)):
    for j in range(len(heatmap_data.columns)):
        plt.text(j, i, heatmap_data.iloc[i, j], ha='center', va='center', color='black')

Find the below working application.

add_values_in_heatmap.py

import matplotlib.pyplot as plt
import pandas as pd

import pandas as pd

# Sample data
data = {
    'Company': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
    'Month': ['Jan', 'Jan', 'Jan', 'Feb', 'Feb', 'Feb', 'Mar', 'Mar', 'Mar'],
    'Sales': [1000, 1500, 1200, 1100, 1200, 1100, 1900, 1400, 800]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Pivot the data for heatmap
heatmap_data = df.pivot(index='Company', columns='Month', values='Sales')
print(heatmap_data)

# Create a heatmap using imshow
plt.imshow(heatmap_data, cmap='viridis', interpolation='nearest')

# Add color bar for reference
plt.colorbar()

# Add text annotations to display values
for i in range(len(heatmap_data)):
    for j in range(len(heatmap_data.columns)):
        plt.text(j, i, heatmap_data.iloc[i, j], ha='center', va='center', color='black')


# Add labels and title
plt.xticks(range(len(heatmap_data.columns)), heatmap_data.columns)
plt.yticks(range(len(heatmap_data.index)), heatmap_data.index)
plt.xlabel('Month')
plt.ylabel('Company')
plt.title('Sales Heatmap')

# Display the heatmap
plt.show()

Output

As you see above image, column names are coming is alphabetical order, but I want to preserve the column names in the order like Jan, Feb and Mar etc.,

We can preserve columns order by creating a categorical data type for the columns you want to pivot on.

month_order = ['Jan', 'Feb', 'Mar']

df['Month'] = pd.Categorical(df['Month'], categories=month_order, ordered=True)

Find the below working application.

preserve_column_names.py

import matplotlib.pyplot as plt
import pandas as pd

import pandas as pd

# Sample data
data = {
    'Company': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
    'Month': ['Jan', 'Jan', 'Jan', 'Feb', 'Feb', 'Feb', 'Mar', 'Mar', 'Mar'],
    'Sales': [1000, 1500, 1200, 1100, 1200, 1100, 1900, 1400, 800]
}

# Create a DataFrame
df = pd.DataFrame(data)

month_order = ['Jan', 'Feb', 'Mar']
df['Month'] = pd.Categorical(df['Month'], categories=month_order, ordered=True)

# Pivot the data for heatmap
heatmap_data = df.pivot(index='Company', columns='Month', values='Sales')
print(heatmap_data)

# Create a heatmap using imshow
plt.imshow(heatmap_data, cmap='viridis', interpolation='nearest')

# Add color bar for reference
plt.colorbar()

# Add text annotations to display values
for i in range(len(heatmap_data)):
    for j in range(len(heatmap_data.columns)):
        plt.text(j, i, heatmap_data.iloc[i, j], ha='center', va='center', color='black')


# Add labels and title
plt.xticks(range(len(heatmap_data.columns)), heatmap_data.columns)
plt.yticks(range(len(heatmap_data.index)), heatmap_data.index)
plt.xlabel('Month')
plt.ylabel('Company')
plt.title('Sales Heatmap')

# Display the heatmap
plt.show()

Output

Customize cell colors using a custom color map

We can customize the colors of heatmap cells, either by a built-in color map or using matplotlib.colors.LinearSegmentedColormap.

Using built-in color map 'coolwarm'

custom_colormap = plt.cm.get_cmap('coolwarm')

plt.imshow(heatmap_data, cmap=custom_colormap, interpolation='nearest')

customize-using-built-in-color-map.py

import matplotlib.pyplot as plt
import pandas as pd

import pandas as pd

# Sample data
data = {
    'Company': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
    'Month': ['Jan', 'Jan', 'Jan', 'Feb', 'Feb', 'Feb', 'Mar', 'Mar', 'Mar'],
    'Sales': [1000, 1500, 1200, 1100, 1200, 1100, 1900, 1400, 800]
}

# Create a DataFrame
df = pd.DataFrame(data)

month_order = ['Jan', 'Feb', 'Mar']
df['Month'] = pd.Categorical(df['Month'], categories=month_order, ordered=True)

# Pivot the data for heatmap
heatmap_data = df.pivot(index='Company', columns='Month', values='Sales')
print(heatmap_data)

custom_colormap = plt.cm.get_cmap('coolwarm')

# Create a heatmap using imshow
plt.imshow(heatmap_data, cmap=custom_colormap, interpolation='nearest')

# Add color bar for reference
plt.colorbar()

# Add text annotations to display values
for i in range(len(heatmap_data)):
    for j in range(len(heatmap_data.columns)):
        plt.text(j, i, heatmap_data.iloc[i, j], ha='center', va='center', color='black')

# Add labels and title
plt.xticks(range(len(heatmap_data.columns)), heatmap_data.columns)
plt.yticks(range(len(heatmap_data.index)), heatmap_data.index)
plt.xlabel('Month')
plt.ylabel('Company')
plt.title('Sales Heatmap')

# Display the heatmap
plt.show()

Output

We can even customize the colormap range using vmin and vmax parameters, these allows to emphasize specific ranges of values.

Example

custom_colormap = plt.cm.get_cmap('coolwarm')

plt.imshow(data, cmap=custom_colormap, vmin=0.2, vmax=0.8)

specify_colormap_range.py

import matplotlib.pyplot as plt
import numpy as np

data = np.random.random((5, 5))

custom_colormap = plt.cm.get_cmap('coolwarm')
plt.imshow(data, cmap=custom_colormap, vmin=0.2, vmax=0.8)

plt.colorbar()
plt.title('Modified Colormap Range')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')

plt.show()

Output

colormaps like 'coolwarm', 'RdBu', or 'seismic' are used highlight positive and negative values differently.

Create custom color map using LinearSegmentedColormap

# Define custom colors and their positions in the colormap
custom_colors = [(0, 'white'), (0.2, 'purple'), (0.4, 'blue'), (0.6, 'green'), (1, 'yellow')]
custom_colormap = mcolors.LinearSegmentedColormap.from_list('custom', custom_colors)

# Create a heatmap using imshow
plt.imshow(heatmap_data, cmap=custom_colormap, interpolation='nearest')

Find the below working application.

custom_color_map.py

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

import pandas as pd

# Sample data
data = {
    'Company': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
    'Month': ['Jan', 'Jan', 'Jan', 'Feb', 'Feb', 'Feb', 'Mar', 'Mar', 'Mar'],
    'Sales': [1000, 1500, 1200, 1100, 1200, 1100, 1900, 1400, 800]
}

# Create a DataFrame
df = pd.DataFrame(data)

month_order = ['Jan', 'Feb', 'Mar']
df['Month'] = pd.Categorical(df['Month'], categories=month_order, ordered=True)

# Pivot the data for heatmap
heatmap_data = df.pivot(index='Company', columns='Month', values='Sales')
print(heatmap_data)

# Define custom colors and their positions in the colormap
custom_colors = [(0, 'white'), (0.2, 'purple'), (0.4, 'blue'), (0.6, 'green'), (1, 'yellow')]
# Create a custom colormap
custom_colormap = mcolors.LinearSegmentedColormap.from_list('custom', custom_colors)

# Create a heatmap using imshow
plt.imshow(heatmap_data, cmap=custom_colormap, interpolation='nearest')

# Add color bar for reference
plt.colorbar()

# Add text annotations to display values
for i in range(len(heatmap_data)):
    for j in range(len(heatmap_data.columns)):
        plt.text(j, i, heatmap_data.iloc[i, j], ha='center', va='center', color='black')

# Add labels and title
plt.xticks(range(len(heatmap_data.columns)), heatmap_data.columns)
plt.yticks(range(len(heatmap_data.index)), heatmap_data.index)
plt.xlabel('Month')
plt.ylabel('Company')
plt.title('Sales Heatmap')

# Display the heatmap
plt.show()

Output

Previous Next Home

Programming for beginners

Wednesday, 11 October 2023

A Beginner's Guide to heatmap in Pyplot

No comments:

Post a Comment