Wednesday, 11 October 2023

A Beginner's Guide to heatmap in Pyplot

Heatmap represents individual values of data in different colors. Sing heat map, we can clearly understand which areas have high values and which areas have low values.

 

For example, consider following companies sales data.

 

Company

Jan

Feb

Mar

A

1000

1100

1900

B

1500

1200

1400

C

1200

1100

800

 

 Above snippet generates below heat map.

 


 

Using the above figure, we can clearly identifies the yellow color box has the maximum value.

 

Find the below working application.

 

hello_world.py

import matplotlib.pyplot as plt
import pandas as pd

import pandas as pd

# Sample data
data = {
    'Company': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
    'Month': ['Jan', 'Jan', 'Jan', 'Feb', 'Feb', 'Feb', 'Mar', 'Mar', 'Mar'],
    'Sales': [1000, 1500, 1200, 1100, 1200, 1100, 1900, 1400, 800]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Pivot the data for heatmap
heatmap_data = df.pivot(index='Company', columns='Month', values='Sales')

# Create a heatmap using imshow
plt.imshow(heatmap_data, cmap='viridis', interpolation='nearest')

# Add color bar for reference
plt.colorbar()

# Add labels and title
plt.xticks(range(len(heatmap_data.columns)), heatmap_data.columns)
plt.yticks(range(len(heatmap_data.index)), heatmap_data.index)
plt.xlabel('Month')
plt.ylabel('Company')
plt.title('Sales Heatmap')

# Display the heatmap
plt.show()

In this code, I created the sales data and created a dataframe ‘df’ from it. Then, I used the pivot() function to reshape the data into a suitable format for a heatmap.

 

Month     Feb   Jan   Mar

Company                 

A        1100  1000  1900

B        1200  1500  1400

C        1100  1200   800

 

The resulting heatmap_data DataFrame contains sales values as rows (companies) and columns (months).

 

Finally I used pyplot.imshow() method to create the heatmap, specifying the 'viridis' colormap and interpolation method.

 

‘plt.colorbar()’ method adds a color bar for reference.

 

Add values in the heatmap cells

Using text annotations to the heatmap, we can display the actual values in the cells of the heatmap.

 

Example

# Add text annotations to display values
for i in range(len(heatmap_data)):
    for j in range(len(heatmap_data.columns)):
        plt.text(j, i, heatmap_data.iloc[i, j], ha='center', va='center', color='black')

Find the below working application.

 

add_values_in_heatmap.py

import matplotlib.pyplot as plt
import pandas as pd

import pandas as pd

# Sample data
data = {
    'Company': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
    'Month': ['Jan', 'Jan', 'Jan', 'Feb', 'Feb', 'Feb', 'Mar', 'Mar', 'Mar'],
    'Sales': [1000, 1500, 1200, 1100, 1200, 1100, 1900, 1400, 800]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Pivot the data for heatmap
heatmap_data = df.pivot(index='Company', columns='Month', values='Sales')
print(heatmap_data)

# Create a heatmap using imshow
plt.imshow(heatmap_data, cmap='viridis', interpolation='nearest')

# Add color bar for reference
plt.colorbar()

# Add text annotations to display values
for i in range(len(heatmap_data)):
    for j in range(len(heatmap_data.columns)):
        plt.text(j, i, heatmap_data.iloc[i, j], ha='center', va='center', color='black')


# Add labels and title
plt.xticks(range(len(heatmap_data.columns)), heatmap_data.columns)
plt.yticks(range(len(heatmap_data.index)), heatmap_data.index)
plt.xlabel('Month')
plt.ylabel('Company')
plt.title('Sales Heatmap')

# Display the heatmap
plt.show()

Output



As you see above image, column names are coming is alphabetical order, but I want to preserve the column names in the order like Jan, Feb and Mar etc.,

 

We can preserve columns order by creating a categorical data type for the columns you want to pivot on.

 

month_order = ['Jan', 'Feb', 'Mar']

df['Month'] = pd.Categorical(df['Month'], categories=month_order, ordered=True)

 

Find the below working application.

 

preserve_column_names.py

import matplotlib.pyplot as plt
import pandas as pd

import pandas as pd

# Sample data
data = {
    'Company': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
    'Month': ['Jan', 'Jan', 'Jan', 'Feb', 'Feb', 'Feb', 'Mar', 'Mar', 'Mar'],
    'Sales': [1000, 1500, 1200, 1100, 1200, 1100, 1900, 1400, 800]
}

# Create a DataFrame
df = pd.DataFrame(data)

month_order = ['Jan', 'Feb', 'Mar']
df['Month'] = pd.Categorical(df['Month'], categories=month_order, ordered=True)

# Pivot the data for heatmap
heatmap_data = df.pivot(index='Company', columns='Month', values='Sales')
print(heatmap_data)

# Create a heatmap using imshow
plt.imshow(heatmap_data, cmap='viridis', interpolation='nearest')

# Add color bar for reference
plt.colorbar()

# Add text annotations to display values
for i in range(len(heatmap_data)):
    for j in range(len(heatmap_data.columns)):
        plt.text(j, i, heatmap_data.iloc[i, j], ha='center', va='center', color='black')


# Add labels and title
plt.xticks(range(len(heatmap_data.columns)), heatmap_data.columns)
plt.yticks(range(len(heatmap_data.index)), heatmap_data.index)
plt.xlabel('Month')
plt.ylabel('Company')
plt.title('Sales Heatmap')

# Display the heatmap
plt.show()

Output

 


Customize cell colors using a custom color map

We can customize the colors of heatmap cells, either by a built-in color map or using matplotlib.colors.LinearSegmentedColormap.

 

Using built-in color map 'coolwarm'

custom_colormap = plt.cm.get_cmap('coolwarm')

plt.imshow(heatmap_data, cmap=custom_colormap, interpolation='nearest')

 

customize-using-built-in-color-map.py

import matplotlib.pyplot as plt
import pandas as pd

import pandas as pd

# Sample data
data = {
    'Company': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
    'Month': ['Jan', 'Jan', 'Jan', 'Feb', 'Feb', 'Feb', 'Mar', 'Mar', 'Mar'],
    'Sales': [1000, 1500, 1200, 1100, 1200, 1100, 1900, 1400, 800]
}

# Create a DataFrame
df = pd.DataFrame(data)

month_order = ['Jan', 'Feb', 'Mar']
df['Month'] = pd.Categorical(df['Month'], categories=month_order, ordered=True)

# Pivot the data for heatmap
heatmap_data = df.pivot(index='Company', columns='Month', values='Sales')
print(heatmap_data)

custom_colormap = plt.cm.get_cmap('coolwarm')

# Create a heatmap using imshow
plt.imshow(heatmap_data, cmap=custom_colormap, interpolation='nearest')

# Add color bar for reference
plt.colorbar()

# Add text annotations to display values
for i in range(len(heatmap_data)):
    for j in range(len(heatmap_data.columns)):
        plt.text(j, i, heatmap_data.iloc[i, j], ha='center', va='center', color='black')

# Add labels and title
plt.xticks(range(len(heatmap_data.columns)), heatmap_data.columns)
plt.yticks(range(len(heatmap_data.index)), heatmap_data.index)
plt.xlabel('Month')
plt.ylabel('Company')
plt.title('Sales Heatmap')

# Display the heatmap
plt.show()

 Output


 

 

We can even customize the colormap range using  vmin and vmax parameters, these allows to emphasize specific ranges of values.

 

Example

custom_colormap = plt.cm.get_cmap('coolwarm')

plt.imshow(data, cmap=custom_colormap, vmin=0.2, vmax=0.8)

 

specify_colormap_range.py

import matplotlib.pyplot as plt
import numpy as np

data = np.random.random((5, 5))

custom_colormap = plt.cm.get_cmap('coolwarm')
plt.imshow(data, cmap=custom_colormap, vmin=0.2, vmax=0.8)

plt.colorbar()
plt.title('Modified Colormap Range')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')

plt.show()

Output



colormaps like 'coolwarm', 'RdBu', or 'seismic'  are used highlight positive and negative values differently.

 

Create custom color map using LinearSegmentedColormap

# Define custom colors and their positions in the colormap
custom_colors = [(0, 'white'), (0.2, 'purple'), (0.4, 'blue'), (0.6, 'green'), (1, 'yellow')]
custom_colormap = mcolors.LinearSegmentedColormap.from_list('custom', custom_colors)

# Create a heatmap using imshow
plt.imshow(heatmap_data, cmap=custom_colormap, interpolation='nearest')

Find the below working application.

 

custom_color_map.py

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

import pandas as pd

# Sample data
data = {
    'Company': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
    'Month': ['Jan', 'Jan', 'Jan', 'Feb', 'Feb', 'Feb', 'Mar', 'Mar', 'Mar'],
    'Sales': [1000, 1500, 1200, 1100, 1200, 1100, 1900, 1400, 800]
}

# Create a DataFrame
df = pd.DataFrame(data)

month_order = ['Jan', 'Feb', 'Mar']
df['Month'] = pd.Categorical(df['Month'], categories=month_order, ordered=True)

# Pivot the data for heatmap
heatmap_data = df.pivot(index='Company', columns='Month', values='Sales')
print(heatmap_data)

# Define custom colors and their positions in the colormap
custom_colors = [(0, 'white'), (0.2, 'purple'), (0.4, 'blue'), (0.6, 'green'), (1, 'yellow')]
# Create a custom colormap
custom_colormap = mcolors.LinearSegmentedColormap.from_list('custom', custom_colors)

# Create a heatmap using imshow
plt.imshow(heatmap_data, cmap=custom_colormap, interpolation='nearest')

# Add color bar for reference
plt.colorbar()

# Add text annotations to display values
for i in range(len(heatmap_data)):
    for j in range(len(heatmap_data.columns)):
        plt.text(j, i, heatmap_data.iloc[i, j], ha='center', va='center', color='black')

# Add labels and title
plt.xticks(range(len(heatmap_data.columns)), heatmap_data.columns)
plt.yticks(range(len(heatmap_data.index)), heatmap_data.index)
plt.xlabel('Month')
plt.ylabel('Company')
plt.title('Sales Heatmap')

# Display the heatmap
plt.show()

Output




Previous                                                    Next                                                    Home

No comments:

Post a Comment