In [1]:
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
In [2]:
def listAttr(obj, search = None):
if not search:
return [item for item in dir(obj) if not (item.startswith("_"))]
search = search.lower()
return [item for item in dir(obj) if not (item.startswith("_")) and search in item]
pass
In [3]:
listAttr(sns)
Out[3]:
In [4]:
listAttr(sns, "load_dataset")
Out[4]:
In [5]:
sns.get_dataset_names()
Out[5]:
In [6]:
tips = sns.load_dataset('tips')
tips
Out[6]:
In [7]:
sns.set(color_codes=True)
In [8]:
ax = sns.scatterplot(x = 'total_bill', y = 'tip', data = tips)
In [9]:
sns.set_style('ticks')
ax = sns.barplot(x="total_bill", y="tip", data=tips)
In [10]:
ax = sns.barplot(x = "total_bill", y = "tip", data = tips)
In [11]:
ax = sns.scatterplot(x="total_bill", y="tip", hue="day", data=tips)
In [12]:
ax = sns.scatterplot(x="total_bill", y="tip", hue="day", style="time", data=tips)
to enhance a scatterplot to include a linear regression model (and its uncertainty) using lmplot():
In [13]:
sns.lmplot(x="total_bill", y="tip", data=tips)
Out[13]:
In [14]:
sns.lmplot(x = "total_bill", y = "tip", data = tips, hue = "time")
Out[14]:
In [15]:
sns.lmplot(x = "total_bill", y = "tip", data = tips, hue="day")
Out[15]:
In [16]:
sns.catplot(x="day", y="total_bill", hue="smoker", kind="swarm", data=tips);
In [17]:
tips.query("size != 3")
Out[17]:
In [18]:
sns.catplot(x="size", y="total_bill", kind="swarm",
data=tips.query("size != 3"));
In [19]:
sns.catplot(x="day", y="total_bill", hue="smoker", kind="violin", data=tips);
In [20]:
sns.catplot(x="day", y="total_bill", hue="smoker",
kind="bar", data=tips);
In [21]:
g = sns.catplot(x = "total_bill", y = "day", hue="time", kind = 'box', legend=False, data = tips)
g.add_legend(title = "Meal")
Out[21]:
In [22]:
g = sns.catplot(x = "total_bill", y = "day", hue="time", kind = 'box', legend=False, data = tips)
g.add_legend(title = "Meal")
g.fig.set_size_inches(10.5, 5.5)
g.set_axis_labels("Total bill ($)", "")
Out[22]:
In [23]:
g = sns.catplot(x="total_bill", y="day", hue="time",
height=3.5, aspect=1.5,
kind="boxen", legend=False, data=tips);
In [24]:
g = sns.catplot(x="total_bill", y="day", hue="time",
height=3.5, aspect=1.5,
kind="box", legend=False, data=tips);
g.add_legend(title="Meal")
g.set_axis_labels("Total bill ($)", "")
g.set(xlim=(0, 60), yticklabels=["Thursday", "Friday", "Saturday", "Sunday"])
g.despine(trim=True)
g.fig.set_size_inches(6.5, 3.5)
g.ax.set_xticks([5, 15, 25, 35, 45, 55], minor=True);
plt.setp(g.ax.get_yticklabels(), rotation=30);
In [25]:
sns.distplot(tips['total_bill'])
Out[25]:
"bin" (or "bucket") the range of values—that is, divide the entire range of values into a series of intervals and then count how many values fall into each interval.
In [26]:
sns.distplot(tips['total_bill'], bins=20, kde=False)
Out[26]:
In [27]:
#kde(Kernel density estimation) - plotting the shape of a distribution
sns.distplot(tips['total_bill'], kde=False)
Out[27]:
In [28]:
tips.time.unique()
Out[28]:
This particular plot shows the relationship between five variables in the tips dataset. Three are numeric, and two are categorical. Two numeric variables (total_bill and tip) determined the position of each point on the axes, and the third (size) determined the size of each point. One categorical variable split the dataset onto two different axes (facets), and the other determined the color and shape of each point.
In [29]:
sns.relplot(x="total_bill", y="tip", col="time",
hue="smoker", style="smoker", size="size",
data=tips)
Out[29]:
In [30]:
sns.relplot(x="total_bill", y="tip", col="time",
hue="smoker", style="smoker", size="size", kind="line", data=tips)
Out[30]: