100字范文,内容丰富有趣,生活中的好帮手!
100字范文 > 数据科学和人工智能技术笔记 二十 数据可视化

数据科学和人工智能技术笔记 二十 数据可视化

时间:2021-08-06 00:29:05

相关推荐

数据科学和人工智能技术笔记 二十 数据可视化

二十、数据可视化

作者:Chris Albon

译者:飞龙

协议:CC BY-NC-SA 4.0

MatPlotLib 中的双向条形图

%matplotlib inlineimport pandas as pdimport matplotlib.pyplot as pltimport numpy as np# 创建数据帧raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],'pre_score': [4, 24, 31, 2, 3],'mid_score': [25, 94, 57, 62, 70],'post_score': [5, 43, 23, 23, 51]}df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])df

# 输入数据,特别是第二和# 第三行,跳过第一列x1 = df.ix[1, 1:]x2 = df.ix[2, 1:]# 创建条形标签bar_labels = ['Pre Score', 'Mid Score', 'Post Score']# 创建图形fig = plt.figure(figsize=(8,6))# 设置 y 的位置y_pos = np.arange(len(x1))y_pos = [x for x in y_pos]plt.yticks(y_pos, bar_labels, fontsize=10)# 在 y_pos 的位置上创建水平条形plt.barh(y_pos, # 使用数据 x1x1, # 中心对齐align='center', # 透明度为 0.4alpha=0.4, # 颜色为绿色color='#263F13')# 在 y_pos 的位置上创建水平条形plt.barh(y_pos, # 使用数据 -x2-x2,# 中心对齐align='center', # 透明度为 0.4alpha=0.4, # 颜色为绿色color='#77A61D')# 注解和标签plt.xlabel('Tina\'s Score: Light Green. Molly\'s Score: Dark Green')t = plt.title('Comparison of Molly and Tina\'s Score')plt.ylim([-1,len(x1)+0.1])plt.xlim([-max(x2)-10, max(x1)+10])plt.grid()plt.show()

MatPlotLib 中的条形图

%matplotlib inlineimport pandas as pdimport matplotlib.pyplot as pltimport numpy as np# 创建数据帧raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],'pre_score': [4, 24, 31, 2, 3],'mid_score': [25, 94, 57, 62, 70],'post_score': [5, 43, 23, 23, 51]}df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])df

# 为每个变量创建得分均值的列表mean_values = [df['pre_score'].mean(), df['mid_score'].mean(), df['post_score'].mean()]# 创建变动列表,设为得分上下 .25variance = [df['pre_score'].mean() * 0.25, df['pre_score'].mean() * 0.25, df['pre_score'].mean() * 0.25]# 设置条形标签bar_labels = ['Pre Score', 'Mid Score', 'Post Score']# 创建条形的 x 位置x_pos = list(range(len(bar_labels)))# 在 x 位置上创建条形图plt.bar(x_pos,# 使用 mean_values 中的数据mean_values, # y-error 直线设置为变动yerr=variance, # 中心对齐align='center',# 颜色color='#FFC222',# 透明度为 0.5alpha=0.5)# 添加网格plt.grid()# 设置 y 轴高度max_y = max(zip(mean_values, variance)) # returns a tuple, here: (3, 5)plt.ylim([0, (max_y[0] + max_y[1]) * 1.1])# 设置轴标签和标题plt.ylabel('Score')plt.xticks(x_pos, bar_labels)plt.title('Mean Scores For Each Test')plt.show()

Seaborn 中的调色板

import pandas as pd%matplotlib inlineimport matplotlib.pyplot as pltimport seaborn as sns# 创建数据帧data = {'date': ['-05-01 18:47:05.069722', '-05-01 18:47:05.119994', '-05-02 18:47:05.178768', '-05-02 18:47:05.230071', '-05-02 18:47:05.230071', '-05-02 18:47:05.280592', '-05-03 18:47:05.332662', '-05-03 18:47:05.385109', '-05-04 18:47:05.436523', '-05-04 18:47:05.486877'], 'deaths_regiment_1': [34, 43, 14, 15, 15, 14, 31, 25, 62, 41],'deaths_regiment_2': [52, 66, 78, 15, 15, 5, 25, 25, 86, 1],'deaths_regiment_3': [13, 73, 82, 58, 52, 87, 26, 5, 56, 75],'deaths_regiment_4': [44, 75, 26, 15, 15, 14, 54, 25, 24, 72],'deaths_regiment_5': [25, 24, 25, 15, 57, 68, 21, 27, 62, 5],'deaths_regiment_6': [84, 84, 26, 15, 15, 14, 26, 25, 62, 24],'deaths_regiment_7': [46, 57, 26, 15, 15, 14, 26, 25, 62, 41]}df = pd.DataFrame(data, columns = ['date', 'battle_deaths', 'deaths_regiment_1', 'deaths_regiment_2','deaths_regiment_3', 'deaths_regiment_4', 'deaths_regiment_5','deaths_regiment_6', 'deaths_regiment_7'])df = df.set_index(df.date)sns.palplot(sns.color_palette("deep", 10))

sns.palplot(sns.color_palette("muted", 10))

sns.palplot(sns.color_palette("bright", 10))

sns.palplot(sns.color_palette("dark", 10))

sns.palplot(sns.color_palette("colorblind", 10))

sns.palplot(sns.color_palette("Paired", 10))

sns.palplot(sns.color_palette("BuGn", 10))

sns.palplot(sns.color_palette("GnBu", 10))

sns.palplot(sns.color_palette("OrRd", 10))

sns.palplot(sns.color_palette("PuBu", 10))

sns.palplot(sns.color_palette("YlGn", 10))

sns.palplot(sns.color_palette("YlGnBu", 10))

sns.palplot(sns.color_palette("YlOrBr", 10))

sns.palplot(sns.color_palette("YlOrRd", 10))

sns.palplot(sns.color_palette("BrBG", 10))

sns.palplot(sns.color_palette("PiYG", 10))

sns.palplot(sns.color_palette("PRGn", 10))

sns.palplot(sns.color_palette("PuOr", 10))

sns.palplot(sns.color_palette("RdBu", 10))

sns.palplot(sns.color_palette("RdGy", 10))

sns.palplot(sns.color_palette("RdYlBu", 10))

sns.palplot(sns.color_palette("RdYlGn", 10))

sns.palplot(sns.color_palette("Spectral", 10))

# 创建调色板并将其设为当前调色板flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]sns.set_palette(flatui)sns.palplot(sns.color_palette())

# 设置绘图颜色sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], color="#34495e")# <matplotlib.axes._subplots.AxesSubplot at 0x116f5db70>

使用 Seaborn 和 pandas 创建时间序列绘图

import pandas as pd%matplotlib inlineimport matplotlib.pyplot as pltimport seaborn as snsdata = {'date': ['-05-01 18:47:05.069722', '-05-01 18:47:05.119994', '-05-02 18:47:05.178768', '-05-02 18:47:05.230071', '-05-02 18:47:05.230071', '-05-02 18:47:05.280592', '-05-03 18:47:05.332662', '-05-03 18:47:05.385109', '-05-04 18:47:05.436523', '-05-04 18:47:05.486877'], 'deaths_regiment_1': [34, 43, 14, 15, 15, 14, 31, 25, 62, 41],'deaths_regiment_2': [52, 66, 78, 15, 15, 5, 25, 25, 86, 1],'deaths_regiment_3': [13, 73, 82, 58, 52, 87, 26, 5, 56, 75],'deaths_regiment_4': [44, 75, 26, 15, 15, 14, 54, 25, 24, 72],'deaths_regiment_5': [25, 24, 25, 15, 57, 68, 21, 27, 62, 5],'deaths_regiment_6': [84, 84, 26, 15, 15, 14, 26, 25, 62, 24],'deaths_regiment_7': [46, 57, 26, 15, 15, 14, 26, 25, 62, 41]}df = pd.DataFrame(data, columns = ['date', 'battle_deaths', 'deaths_regiment_1', 'deaths_regiment_2','deaths_regiment_3', 'deaths_regiment_4', 'deaths_regiment_5','deaths_regiment_6', 'deaths_regiment_7'])df = df.set_index(df.date)sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], color="indianred")# <matplotlib.axes._subplots.AxesSubplot at 0x1140be780>

# 带有置信区间直线,但是没有直线的时间序列绘图sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], err_style="ci_bars", interpolate=False)# <matplotlib.axes._subplots.AxesSubplot at 0x116400668>

使用 Seaborn 创建散点图

import pandas as pd%matplotlib inlineimport randomimport matplotlib.pyplot as pltimport seaborn as sns# 创建空数据帧df = pd.DataFrame()# 添加列df['x'] = random.sample(range(1, 1000), 5)df['y'] = random.sample(range(1, 1000), 5)df['z'] = [1,0,0,1,0]df['k'] = ['male','male','male','female','female']# 查看前几行数据df.head()

# 设置散点图样式sns.set_context("notebook", font_scale=1.1)sns.set_style("ticks")# 创建数据帧的散点图sns.lmplot('x', # 横轴'y', # 纵轴data=df, # 数据源fit_reg=False, # 不要拟合回归直线hue="z", # 设置颜色scatter_kws={"marker": "D", # 设置标记样式"s": 100}) # 设置标记大小# 设置标题plt.title('Histogram of IQ')# 设置横轴标签plt.xlabel('Time')# 设置纵轴标签plt.ylabel('Deaths')# <matplotlib.text.Text at 0x112b7bb70>

MatPlotLib 中的分组条形图

%matplotlib inlineimport pandas as pdimport matplotlib.pyplot as pltimport numpy as npraw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],'pre_score': [4, 24, 31, 2, 3],'mid_score': [25, 94, 57, 62, 70],'post_score': [5, 43, 23, 23, 51]}df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])df

# 设置条形的位置和宽度pos = list(range(len(df['pre_score']))) width = 0.25 # 绘制条形fig, ax = plt.subplots(figsize=(10,5))# 使用 pre_score 数据,# 在位置 pos 上创建条形plt.bar(pos, # 使用数据 df['pre_score']df['pre_score'], # 宽度width, # 透明度为 0.5alpha=0.5, # 颜色color='#EE3224', # 标签是 first_name 的第一个值label=df['first_name'][0]) # 使用 mid_score 数据,# 在位置 pos + 一定宽度上创建条形plt.bar([p + width for p in pos], # 使用数据 df['mid_score']df['mid_score'],# 宽度width, # 透明度为 0.5alpha=0.5, # 颜色color='#F78F1E', # 标签是 first_name 的第二个值label=df['first_name'][1]) # 使用 post_score 数据,# 在位置 pos + 一定宽度上创建条形plt.bar([p + width*2 for p in pos], # 使用数据 df['post_score']df['post_score'], # 宽度width, # 透明度为 0.5alpha=0.5, # 颜色color='#FFC222', # 标签是 first_name 的第三个值label=df['first_name'][2]) # 设置纵轴标签ax.set_ylabel('Score')# 设置标题ax.set_title('Test Subject Scores')# 设置 x 刻度的位置ax.set_xticks([p + 1.5 * width for p in pos])# 设置 x 刻度的标签ax.set_xticklabels(df['first_name'])# 设置横轴和纵轴的区域plt.xlim(min(pos)-width, max(pos)+width*4)plt.ylim([0, max(df['pre_score'] + df['mid_score'] + df['post_score'])] )# 添加图例并展示绘图plt.legend(['Pre Score', 'Mid Score', 'Post Score'], loc='upper left')plt.grid()plt.show()

MatPlotLib 中的直方图

%matplotlib inlineimport pandas as pdimport matplotlib.pyplot as pltimport numpy as npimport math# 设置 ipython 的最大行数pd.set_option('display.max_row', 1000)# 将 ipython 的最大列宽设为 50pd.set_option('display.max_columns', 50)df = pd.read_csv('/s/52cb7kcflr8qm2u/5kings_battles_v1.csv?dl=1')df.head()

# 制作攻击方和防守方大小的两个变量# 但是当有超过 10000 个攻击方时将其排除在外data1 = df['attacker_size'][df['attacker_size'] < 90000]data2 = df['defender_size'][df['attacker_size'] < 90000]# 创建 2000 个桶bins = np.arange(data1.min(), data2.max(), 2000) # 固定桶的大小# 绘制攻击方大小的直方图plt.hist(data1, bins=bins, alpha=0.5, color='#EDD834',label='Attacker')# 绘制防守方大小的直方图plt.hist(data2, bins=bins, alpha=0.5, color='#887E43',label='Defender')# 设置图形的 x 和 y 边界plt.ylim([0, 10])# 设置标题和标签plt.title('Histogram of Attacker and Defender Size')plt.xlabel('Number of troops')plt.ylabel('Number of battles')plt.legend(loc='upper right')plt.show()

# 制作攻击方和防守方大小的两个变量# 但是当有超过 10000 个攻击方时将其排除在外data1 = df['attacker_size'][df['attacker_size'] < 90000]data2 = df['defender_size'][df['attacker_size'] < 90000]# 创建 10 个桶,最小值为 # data1 和 data2 的最小值bins = np.linspace(min(data1 + data2), # 最大值为它们的最大值max(data1 + data2),# 并分为 10 个桶10)# 绘制攻击方大小的直方图plt.hist(data1, # 使用定义好的桶bins=bins, # 透明度alpha=0.5, # 颜色color='#EDD834',# 攻击方的标签label='Attacker')# 绘制防守方大小的直方图plt.hist(data2, # 使用定义好的桶bins=bins, # 透明度alpha=0.5, # 颜色color='#887E43',# 防守方的标签label='Defender')# 设置图形的 x 和 y 边界plt.ylim([0, 10])# 设置标题和标签plt.title('Histogram of Attacker and Defender Size')plt.xlabel('Number of troops')plt.ylabel('Number of battles')plt.legend(loc='upper right')plt.show()

从 Pandas 数据帧生成 MatPlotLib 散点图

%matplotlib inlineimport pandas as pdimport matplotlib.pyplot as pltimport numpy as npraw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'], 'female': [0, 1, 1, 0, 1],'age': [42, 52, 36, 24, 73], 'preTestScore': [4, 24, 31, 2, 3],'postTestScore': [25, 94, 57, 62, 70]}df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'age', 'female', 'preTestScore', 'postTestScore'])df

# preTestScore 和 postTestScore 的散点图# 每个点的大小取决于年龄plt.scatter(df.preTestScore, df.postTestScore, s=df.age)# <matplotlib.collections.PathCollection at 0x10ca42b00>

# preTestScore 和 postTestScore 的散点图# 大小为 300,颜色取决于性别plt.scatter(df.preTestScore, df.postTestScore, s=300, c=df.female)# <matplotlib.collections.PathCollection at 0x10cb90a90>

Matplotlib 的简单示例

# 让 Jupyter 加载 matplotlib # 并内联创建所有绘图(也就是在页面上)%matplotlib inlineimport matplotlib.pyplot as pyplotpyplot.plot([1.6, 2.7])# [<matplotlib.lines.Line2D at 0x10c4e7978>]

MatPlotLib 中的饼图

%matplotlib inlineimport pandas as pdimport matplotlib.pyplot as pltraw_data = {'officer_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],'jan_arrests': [4, 24, 31, 2, 3],'feb_arrests': [25, 94, 57, 62, 70],'march_arrests': [5, 43, 23, 23, 51]}df = pd.DataFrame(raw_data, columns = ['officer_name', 'jan_arrests', 'feb_arrests', 'march_arrests'])df

# 创建一列,其中包含每个官员的总逮捕数df['total_arrests'] = df['jan_arrests'] + df['feb_arrests'] + df['march_arrests']df

# (从 iWantHue)创建一列颜色colors = ["#E13F29", "#D69A80", "#D63B59", "#AE5552", "#CB5C3B", "#EB8076", "#96624E"]# 创建饼图plt.pie(# 使用数据 total_arrestsdf['total_arrests'],# 标签为官员名称labels=df['officer_name'],# 没有阴影shadow=False,# 颜色colors=colors,# 将一块扇形移出去explode=(0, 0, 0, 0, 0.15),# 起始角度为 90 度startangle=90,# 将百分比列为分数autopct='%1.1f%%',)# 使饼状图为正圆plt.axis('equal')# 查看绘图plt.tight_layout()plt.show()

MatPlotLib 中的散点图

%matplotlib inlineimport pandas as pdimport matplotlib.pyplot as pltimport numpy as np# 展示 ipython 的最大行数pd.set_option('display.max_row', 1000)# 将 ipython 的最大列宽设为 50pd.set_option('display.max_columns', 50)df = pd.read_csv('/chrisalbon/war_of_the_five_kings_dataset/master/5kings_battles_v1.csv')df.head()

# 创建图形plt.figure(figsize=(10,8))# 创建散点图# 298 年的攻击方大小为 x 轴plt.scatter(df['attacker_size'][df['year'] == 298], # 298 年的防守方大小为 y 轴df['defender_size'][df['year'] == 298], # 标记marker='x', # 颜色color='b',# 透明度alpha=0.7,# 大小s = 124,# 标签label='Year 298')# 299 年的攻击方大小为 x 轴plt.scatter(df['attacker_size'][df['year'] == 299], # 299 年的防守方大小为 y 轴df['defender_size'][df['year'] == 299], # 标记marker='o', # 颜色color='r', # 透明度alpha=0.7,# 大小s = 124,# 标签label='Year 299')# 300 年的攻击方大小为 x 轴plt.scatter(df['attacker_size'][df['year'] == 300], # 300 年的防守方大小为 x 轴df['defender_size'][df['year'] == 300], # 标记marker='^', # 颜色color='g', # 透明度alpha=0.7, # 大小s = 124,# 标签label='Year 300')# 标题plt.title('Battles Of The War Of The Five Kings')# y 标签plt.ylabel('Defender Size')# x 标签plt.xlabel('Attacker Size')# 图例plt.legend(loc='upper right')# 设置图形边界plt.xlim([min(df['attacker_size'])-1000, max(df['attacker_size'])+1000])plt.ylim([min(df['defender_size'])-1000, max(df['defender_size'])+1000])plt.show()

MatPlotLib 中的栈式百分比条形图

%matplotlib inlineimport pandas as pdimport matplotlib.pyplot as pltraw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],'pre_score': [4, 24, 31, 2, 3],'mid_score': [25, 94, 57, 62, 70],'post_score': [5, 43, 23, 23, 51]}df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])df

# 创建带有一个子图的图形f, ax = plt.subplots(1, figsize=(10,5))# 将条宽设为 1bar_width = 1# 条形左边界的位置bar_l = [i for i in range(len(df['pre_score']))] # x 轴刻度的位置(条形的中心是条形标签)tick_pos = [i+(bar_width/2) for i in bar_l] # 创建每个参与者的总得分totals = [i+j+k for i,j,k in zip(df['pre_score'], df['mid_score'], df['post_score'])]# 创建每个参与者的 pre_score 和总得分的百分比pre_rel = [i / j * 100 for i,j in zip(df['pre_score'], totals)]# 创建每个参与者的 mid_score 和总得分的百分比mid_rel = [i / j * 100 for i,j in zip(df['mid_score'], totals)]# 创建每个参与者的 post_score 和总得分的百分比post_rel = [i / j * 100 for i,j in zip(df['post_score'], totals)]# 在位置 bar_1 创建条形图ax.bar(bar_l, # 使用数据 pre_relpre_rel, # 标签 label='Pre Score', # 透明度alpha=0.9, # 颜色color='#019600',# 条形宽度width=bar_width,# 边框颜色edgecolor='white')# 在位置 bar_1 创建条形图ax.bar(bar_l, # 使用数据 mid_relmid_rel, # 底部为 pre_relbottom=pre_rel, # 标签label='Mid Score', # 透明度alpha=0.9, # 颜色color='#3C5F5A', # 条形宽度width=bar_width,# 边框颜色edgecolor='white')# Create a bar chart in position bar_1ax.bar(bar_l, # 使用数据 post_relpost_rel, # 底部为 pre_rel 和 mid_relbottom=[i+j for i,j in zip(pre_rel, mid_rel)], # 标签label='Post Score',# 透明度alpha=0.9, # 颜色color='#219AD8', # 条形宽度width=bar_width,# 边框颜色edgecolor='white')# 将刻度设为 first_nameplt.xticks(tick_pos, df['first_name'])ax.set_ylabel("Percentage")ax.set_xlabel("")# 设置图形边界plt.xlim([min(tick_pos)-bar_width, max(tick_pos)+bar_width])plt.ylim(-10, 110)# 旋转轴标签plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right')# 展示绘图plt.show()

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。