In my opinion, the smoothing function that they implement with python does not really compensate the loss of precision with the visual appealing.
Why not plotting the whole distribution, then?
I modified the violin plots source code that I found here HERE
And produced... let's called them blob plots:
Here is the code.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import matplotlib as mpl | |
import matplotlib.pyplot as plt | |
import numpy as np | |
def blob_plot(ax,data,list_pos, extra=False): | |
lextra=[] | |
for d,p in zip(data,list_pos): | |
#this makes the histogram that constitutes the blob plot | |
m=min(d) | |
M=max(d) | |
nbins=20 | |
x = np.linspace(m,M,nbins) # support for histogram | |
his,bins = np.histogram(d, bins=nbins) | |
#scale the histogram to a proper size (you may have to fiddle with this), then place it at position 'pos' | |
scale_blob=2. | |
shift_his_plus_pos = [ p + h*scale_blob/float(len(d)) for h in his] | |
shift_his_minus_pos = [ p - h*scale_blob/float(len(d)) for h in his] | |
facecolor,alpha=color_alpha_blobs # assign color and transparency | |
#this is the matplotlib function that does the trick | |
ax.fill_betweenx(x,shift_his_minus_pos, shift_his_plus_pos, linewidth=0.0, facecolor= facecolor, alpha=alpha) | |
#calculates median or mean, if you want | |
if extra=='median': lextra.append( np.median(d) ) | |
elif extra=='mean': lextra.append( np.mean(d) ) | |
#and plots it | |
if extra != False: | |
color = 'orangered' | |
ax.plot(list_pos,lextra, color=color_mean_or_median,linestyle='-',marker='D',markersize=5, lw=1.5) | |
# MAKE UP SOME DATA | |
list_positions=[0,1,2,3,4] #this is their position on the x-axis | |
data_for_blobplot=[] # this will contain a list for each blob we want in the plot | |
extra='median' | |
for pos in list_positions: | |
if pos <= 2: some_data = np.random.gumbel(pos,1, 1000) # generate some data | |
else: some_data = np.random.normal(pos,1, 1000) # generate some other data | |
data_for_blobplot.append(some_data) # and append it to the list | |
# PLOTTING | |
color_alpha_blobs=('midnightblue',0.7) #this is the color of the blobplot | |
color_mean_or_median='orangered' #this is the colot of the median or mean, whatever you specify | |
fig = plt.figure() # makes a figure | |
ax = fig.add_subplot(111) # adds a single subplot | |
#this is the blobplotting function | |
blob_plot(ax, data_for_blobplot, list_pos=list_positions, extra=extra) #makes blob plots, if extra is not spcified, makes no median/mean | |
ax.set_xlim(-1., len(list_positions)) #to display all of them | |
ax.set_ylim(-2., 12.) | |
plt.title('Beautiful blob plots',fontsize=18) | |
plt.xlabel('$\mu$ (yup, that is Latex)', fontsize=14) | |
plt.ylabel('distribution of my variable', fontsize=14) | |
#################################################### | |
# So far this is all you need to make the blob plot | |
# Below is if you want a legend for the medain/mean | |
#################################################### | |
# draw temporary dot to use it for a legend, later set to invisible | |
h = ax.scatter( [1],[1], marker='D', color=color_mean_or_median, label='extra') | |
#draws the legend, with larger symbols | |
leg=plt.legend(loc= 'upper left',scatterpoints=1) | |
for legobj in leg.legendHandles: | |
legobj.set_linewidth(10.0) | |
h.set_visible(False) | |
plt.savefig('blobplot.png') # saving to png, alternatively you can write 'blobplot.pdf', or other formats | |
plt.show() #prints to screen your splendid blobplot |
If you have ideas to improve it let me know!
If you like the result, go say thanks to the violin plot maker (I did very little)