
Photo by Jack Anstey on Unsplash
En este artículo, repasaré primero algunas secciones para preparar el conocimiento de fondo para algunos lectores que son nuevos en Matplotlib:
- Comprender las dos diferentes interfaces de Matplotlib (¡Esto ha causado mucha confusión!).
- Comprender los elementos de una figura, para que puedan buscar fácilmente las APIs para resolver su problema.
- Echa un vistazo a algunos tipos comunes de gráficos para que los lectores tengan una mejor idea de cuándo/cómo usarlos.
- Aprende a aumentar la "dimensión" de tus gráficos.
- Aprende a hacer una partición de la figura usando GridSpec.
- Establecer un objetivo.
- Preparar las variables.
- Preparar la visualización.
import matplotlib.pyplot as plt
plt.figure()
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.title('Test figure')
plt.show()import matplotlib.pyplot as plt fig, ax = plt.subplots(figsize=(3,3)) ax.bar(x=['A','B','C'], height=[3.1,7,4.2], color='r') ax.set_xlabel(xlabel='X title', size=20) ax.set_ylabel(ylabel='Y title' , color='b', size=20) plt.show()
Veamos un ejemplo sencillo de cómo crear un gráfico de líneas con una interfaz orientada a objetos.
fig, ax = plt.subplots(figsize=(3,3))
ax.plot(['Alice','Bob','Catherine'], [4,6,3], color='r')
ax.set_xlabel('TITLE 1')
for tick in ax.get_xticklabels():
tick.set_rotation(45)
plt.show()# 2D kde plots
import numpy as np
import matplotlib.pyplot as plt
import seaborn as snsnp.random.seed(1)
numerical_1 = np.random.randn(100)
np.random.seed(2)
numerical_2 = np.random.randn(100)fig, ax = plt.subplots(figsize=(3,3))
sns.kdeplot(data=numerical_1,
data2= numerical_2,
ax=ax,
shade=True,
color="blue",
bw=1)
plt.show()# heat mapimport numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as snsdf = pd.DataFrame(dict(categorical_1=['apple', 'banana', 'grapes',
'apple', 'banana', 'grapes',
'apple', 'banana', 'grapes'],
categorical_2=['A','A','A','B','B','B','C','C','C'],
value=[10,2,5,7,3,15,1,6,8]))
pivot_table = df.pivot("categorical_1", "categorical_2", "value")# try printing out pivot_table to see what it looks like!fig, ax = plt.subplots(figsize=(5,5))sns.heatmap(data=pivot_table,
cmap=sns.color_palette("Blues"),
ax=ax)
plt.show()- Gráficas de superposición
fig, ax = plt.subplots(figsize=(4,4)) sns.lineplot(x=['A','B','C','D'], y=[4,2,5,3], color='r', ax=ax) sns.lineplot(x=['A','B','C','D'], y=[1,6,2,4], color='b', ax=ax) ax.legend(['alpha', 'beta'], facecolor='w') plt.show()
sns.set(style="white", rc={"lines.linewidth": 3})fig, ax1 = plt.subplots(figsize=(4,4))
ax2 = ax1.twinx()sns.barplot(x=['A','B','C','D'],
y=[100,200,135,98],
color='#004488',
ax=ax1)sns.lineplot(x=['A','B','C','D'],
y=[4,2,5,3],
color='r',
marker="o",
ax=ax2)
plt.show()
sns.set()import matplotlib.pyplot as pltcategorical_1 = ['A', 'B', 'C', 'D']
colors = ['green', 'red', 'blue', 'orange']
numerical = [[6, 9, 2, 7],
[6, 7, 3, 8],
[9, 11, 13, 15],
[3, 5, 9, 6]]number_groups = len(categorical_1)
bin_width = 1.0/(number_groups+1)fig, ax = plt.subplots(figsize=(6,6))for i in range(number_groups):
ax.bar(x=np.arange(len(categorical_1)) + i*bin_width,
height=numerical[i],
width=bin_width,
color=colors[i],
align='center')ax.set_xticks(np.arange(len(categorical_1)) + number_groups/(2*(number_groups+1)))# number_groups/(2*(number_groups+1)): offset of xticklabelax.set_xticklabels(categorical_1)
ax.legend(categorical_1, facecolor='w')plt.show()- Facet - mapear el conjunto de datos en múltiples ejes, y que difieren por una o dos variables categóricas. El lector puede encontrar un montón de ejemplos en https://seaborn.pydata.org/generated/seaborn.FacetGrid.html
- Color / Forma / Tamaño de los nodos en un gráfico de dispersión: El siguiente ejemplo de código tomado de la API de Seaborn Scatter Plot muestra cómo funciona. (https://seaborn.pydata.org/generated/seaborn.scatterplot.html)
import seaborn as snstips = sns.load_dataset("tips")
ax = sns.scatterplot(x="total_bill", y="tip",
hue="size", size="size",
sizes=(20, 200), hue_norm=(0, 7),
legend="full", data=tips)
plt.show()fig = plt.figure(figsize=(7,7))
gs = gridspec.GridSpec(nrows=3,
ncols=3,
figure=fig,
width_ratios= [1, 1, 1],
height_ratios=[1, 1, 1],
wspace=0.3,
hspace=0.3)ax1 = fig.add_subplot(gs[0, 0])
ax1.text(0.5, 0.5, 'ax1: gs[0, 0]', fontsize=12, fontweight="bold", va="center", ha="center") # adding text to ax1ax2 = fig.add_subplot(gs[0, 1:3])
ax2.text(0.5, 0.5, 'ax2: gs[0, 1:3]', fontsize=12, fontweight="bold", va="center", ha="center")ax3 = fig.add_subplot(gs[1:3, 0:2])
ax3.text(0.5, 0.5, 'ax3: gs[1:3, 0:2]', fontsize=12, fontweight="bold", va="center", ha="center")ax4 = fig.add_subplot(gs[1:3, 2])
ax4.text(0.5, 0.5, 'ax4: gs[1:3, 2]', fontsize=12, fontweight="bold", va="center", ha="center")plt.show()df = pd.read_csv('BlackFriday.csv', usecols = ['User_ID', 'Gender', 'Age', 'Purchase'])
df_gp_1 = df[['User_ID', 'Purchase']].groupby('User_ID').agg(np.mean).reset_index()
df_gp_2 = df[['User_ID', 'Gender', 'Age']].groupby('User_ID').agg(max).reset_index()
df_gp = pd.merge(df_gp_1, df_gp_2, on = ['User_ID'])freq = ((df_gp.Age.value_counts(normalize = True).reset_index().sort_values(by = 'index').Age)*100).tolist()
number_gp = 7
# freq = the percentage for each age group, and there’re 7 age groups.
def ax_settings(ax, var_name, x_min, x_max):
ax.set_xlim(x_min,x_max)
ax.set_yticks([])
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_edgecolor('#444444')
ax.spines['bottom'].set_linewidth(2)
ax.text(0.02, 0.05, var_name, fontsize=17, fontweight="bold", transform = ax.transAxes)
return None
# Manipulate each axes object in the left. Try to tune some parameters and you'll know how each command works.
fig = plt.figure(figsize=(12,7))
gs = gridspec.GridSpec(nrows=number_gp,
ncols=2,
figure=fig,
width_ratios= [3, 1],
height_ratios= [1]*number_gp,
wspace=0.2, hspace=0.05
)ax = [None]*(number_gp + 1)
features = ['0-17', '18-25', '26-35', '36-45', '46-50', '51-55', '55+']
# Create a figure, partition the figure into 7*2 boxes, set up an ax array to store axes objects, and create a list of age group names.
for i in range(number_gp):
ax[i] = fig.add_subplot(gs[i, 0])
ax_settings(ax[i], 'Age: ' + str(features[i]), -1000, 20000)
sns.kdeplot(data=df_gp[(df_gp.Gender == 'M') & (df_gp.Age == features[i])].Purchase,
ax=ax[i], shade=True, color="blue", bw=300, legend=False)
sns.kdeplot(data=df_gp[(df_gp.Gender == 'F') & (df_gp.Age == features[i])].Purchase,
ax=ax[i], shade=True, color="red", bw=300, legend=False)
if i < (number_gp - 1):
ax[i].set_xticks([])
# this 'for loop' is to create a bunch of axes objects, and link them to GridSpec boxes. Then, we manipulate them with sns.kdeplot() and ax_settings() we just defined.
ax[0].legend(['Male', 'Female'], facecolor='w')
# adding legends on the top axes object
ax[number_gp] = fig.add_subplot(gs[:, 1])
ax[number_gp].spines['right'].set_visible(False)
ax[number_gp].spines['top'].set_visible(False)ax[number_gp].barh(features, freq, color='#004c99', height=0.4)
ax[number_gp].set_xlim(0,100)
ax[number_gp].invert_yaxis()
ax[number_gp].text(1.09, -0.04, '(%)', fontsize=10, transform = ax[number_gp].transAxes)
ax[number_gp].tick_params(axis='y', labelsize = 14)
# manipulate the bar plot on the right. Try to comment out some of the commands to see what they actually do to the bar plot.
plt.show()