铁达尼号的灾难

are we going down or will we fly

Featured image
二分类问题 死或生

铁达尼号的灾难

下载数据

使用 pandas 查看

import pandas as pd #数据分析
import numpy as np #科学计算
from pandas import Series,DataFrame

data_train = pd.read_csv("E:/Kaggle/Titanic/Train.csv")
data_train

得到 dataframe 格式 Snipaste_2020-04-11_17-42-38

其实和Excel看起来差不多 Snipaste_2020-04-11_17-43-18

数据字典:

######

制细图 再看
import matplotlib as mpl
from matplotlib.pyplot import MultipleLocator  ##刻度
#y_major_locator=MultipleLocator(100)

import matplotlib.pyplot as plt
mpl.rcParams['font.sans-serif'] = ['KaiTi']
mpl.rcParams['font.serif'] = ['KaiTi']
mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题,或者转换负号为字符串
fig = plt.figure()
fig.set(alpha=0.2)  # 设定图表颜色alpha参数
plt.figure(figsize=(10,10))
plt.subplot2grid((2,3),(0,0))             # 在一张大图里分列几个小图
data_train.Survived.value_counts().plot(kind='bar')# 柱状图
plt.title(u"获救情况 (1为获救)") # 标题
plt.ylabel(u"人数")  

plt.subplot2grid((2,3),(0,1))
data_train.Pclass.value_counts().plot(kind="bar")
plt.ylabel(u"人数")
plt.title(u"乘客等级分布")

plt.subplot2grid((2,3),(0,2))
plt.scatter(data_train.Survived, data_train.Age)
plt.ylabel(u"年龄")                         # 设定纵坐标名称
plt.grid(b=True, which='major', axis='y')
plt.title(u"按年龄看获救分布 (1为获救)")


plt.subplot2grid((2,3),(1,0), colspan=2)
data_train.Age[data_train.Pclass == 1].plot(kind='kde')   
data_train.Age[data_train.Pclass == 2].plot(kind='kde')
data_train.Age[data_train.Pclass == 3].plot(kind='kde')
plt.xlabel(u"年龄")# plots an axis lable
plt.ylabel(u"密度")
plt.title(u"各等级的乘客年龄分布")
plt.legend((u'头等舱', u'2等舱',u'3等舱'),loc='best') # sets our legend for our graph.
plt.rcParams['figure.dpi'] = 3000


#ax=plt.gca()
#ax.yaxis.set_major_locator(y_major_locator)

plt.subplot2grid((2,3),(1,2))
data_train.Embarked.value_counts().plot(kind='bar')
plt.title(u"各登船口岸上船人数")
plt.ylabel(u"人数")  
fig.tight_layout()#tight_layout 让图别挤在一起
plt.show()

Snipaste_2020-04-12_05-14-39