Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

PYTHON Hello I need help creating this python program I have provided the requir

ID: 3816924 • Letter: P

Question

PYTHON

Hello I need help creating this python program I have provided the requirements below. Thank you in advance.

titanic.csv

PassengerId Survived Pclass Sex Age SibSp Parch Fare Embarked 1 0 3 male 22 1 0 7.25 S 2 1 1 female 38 1 0 71.2833 C 3 1 3 female 26 0 0 7.925 S 4 1 1 female 35 1 0 53.1 S 5 0 3 male 35 0 0 8.05 S 6 0 3 male 0 0 8.4583 Q 7 0 1 male 54 0 0 51.8625 S 8 0 3 male 2 3 1 21.075 S 9 1 3 female 27 0 2 11.1333 S 10 1 2 female 14 1 0 30.0708 C 11 1 3 female 4 1 1 16.7 S 12 1 1 female 58 0 0 26.55 S 13 0 3 male 20 0 0 8.05 S 14 0 3 male 39 1 5 31.275 S 15 0 3 female 14 0 0 7.8542 S 16 1 2 female 55 0 0 16 S 17 0 3 male 2 4 1 29.125 Q 18 1 2 male 0 0 13 S 19 0 3 female 31 1 0 18 S 20 1 3 female 0 0 7.225 C 21 0 2 male 35 0 0 26 S 22 1 2 male 34 0 0 13 S 23 1 3 female 15 0 0 8.0292 Q 24 1 1 male 28 0 0 35.5 S 25 0 3 female 8 3 1 21.075 S 26 1 3 female 38 1 5 31.3875 S 27 0 3 male 0 0 7.225 C 28 0 1 male 19 3 2 263 S 29 1 3 female 0 0 7.8792 Q 30 0 3 male 0 0 7.8958 S 31 0 1 male 40 0 0 27.7208 C 32 1 1 female 1 0 146.5208 C 33 1 3 female 0 0 7.75 Q 34 0 2 male 66 0 0 10.5 S 35 0 1 male 28 1 0 82.1708 C 36 0 1 male 42 1 0 52 S 37 1 3 male 0 0 7.2292 C 38 0 3 male 21 0 0 8.05 S 39 0 3 female 18 2 0 18 S 40 1 3 female 14 1 0 11.2417 C 41 0 3 female 40 1 0 9.475 S 42 0 2 female 27 1 0 21 S 43 0 3 male 0 0 7.8958 C 44 1 2 female 3 1 2 41.5792 C 45 1 3 female 19 0 0 7.8792 Q 46 0 3 male 0 0 8.05 S 47 0 3 male 1 0 15.5 Q 48 1 3 female 0 0 7.75 Q 49 0 3 male 2 0 21.6792 C

Explanation / Answer


import os
os.chdir(path=r"C:UsersDesktop itanic")


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import KFold

# reading the inout files
train = pd.read_csv("train.csv", dtype={"Age": np.float64}, )
test = pd.read_csv("test.csv", dtype={"Age": np.float64}, )
PassengerId = test['PassengerId']
combine = [train, test]

#finding substrings
def substrings_in_string(big_string, substrings):
for substring in substrings:
if substring in big_string:
return substring
return np.nan


#Mappings of paramters
title_list=['Mrs', 'Mr', 'Master', 'Miss', 'Major', 'Rev',
'Dr', 'Ms', 'Mlle','Col', 'Capt', 'Mme', 'Countess',
'Don', 'Jonkheer']

cabin_list = ['A', 'B', 'C', 'D', 'E', 'F', 'T', 'G', 'Unknown']

title_mapping = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5}

# so fill nan row with the mode
fare_mode = test[test['Pclass']==3]['Fare'].mode()
test['Fare'] = test['Fare'].fillna(fare_mode[0])

#Find the mode of passengers with the same class and similar fare
emb_mode = train[(train['Pclass']==1)&(train['Fare']<=85)&(train['Fare']>75)]['Embarked'].mode()
train['Embarked'] = train['Embarked'].fillna(emb_mode[0])

for df in combine:
# Converting male and female groups to integer form
df["Sex"][df["Sex"] == "male"] = 0
df["Sex"][df["Sex"] == "female"] = 1
  
# Create Title Feature
df['Title'] = df['Name'].astype(str).map(lambda x: substrings_in_string(x, title_list))
df['Title'] = df['Title'].replace(['Lady', 'Countess','Capt', 'Col',
    'Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')
df['Title'] = df['Title'].replace('Mlle', 'Miss')
df['Title'] = df['Title'].replace('Ms', 'Miss')
df['Title'] = df['Title'].replace('Mme', 'Mrs')
df['Title'] = df['Title'].map(title_mapping)
df['Title'] = df['Title'].fillna(0)
  
# Create Deck feature
df['Deck'] = df['Cabin'].astype(str).map(lambda x: substrings_in_string(x, cabin_list))
df["Deck"][df["Deck"] == "A"] = 1
df["Deck"][df["Deck"] == "B"] = 2
df["Deck"][df["Deck"] == "C"] = 3
df["Deck"][df["Deck"] == "D"] = 4
df["Deck"][df["Deck"] == "E"] = 5
df["Deck"][df["Deck"] == "F"] = 6
df["Deck"][df["Deck"] == "G"] = 7
df["Deck"][df["Deck"] == "T"] = 8
df["Deck"] = df["Deck"].fillna(0)
  
# Family size, Fare per person, and isAlone features
df['Family_size'] = df['SibSp']+df['Parch']+1
  
# isAlone feature based off family size
df['isAlone']=0
df.loc[df['Family_size']==1, 'isAlone'] = 1
  
# Convert the classes to integer form
df["Embarked"][df["Embarked"] == "S"] = 0
df["Embarked"][df["Embarked"] == "C"] = 1
df["Embarked"][df["Embarked"] == "Q"] = 2

#Impute Age one standard deviation from the mean
age_avg = df['Age'].mean()
age_std = df['Age'].std()
age_null_count = df['Age'].isnull().sum()
age_null_random_list = np.random.randint(age_avg - age_std, age_avg + age_std, size=age_null_count)
df['Age'][np.isnan(df['Age'])] = age_null_random_list
  
# Mapping of Age and removal of child feature
df.loc[ df['Age'] <= 16, 'Age']                    = 0
df.loc[(df['Age'] > 16) & (df['Age'] <= 32), 'Age'] = 1
df.loc[(df['Age'] > 32) & (df['Age'] <= 48), 'Age'] = 2
df.loc[(df['Age'] > 48) & (df['Age'] <= 64), 'Age'] = 3
df.loc[ df['Age'] > 64, 'Age'] = 4


#Creating target feature set
excl = ['PassengerId', 'Ticket', 'Cabin', 'Name', 'SibSp', 'Parch']
train = train.drop(excl, axis = 1)
test = test.drop(excl, axis = 1)

# checking correlation
corr = train.corr()
plt.figure(figsize=(10,10))
sns.heatmap(corr, vmax=1, annot=True, square=True)
plt.title('feature correlations')


#Seting parameters for ensembling using k fold
ntrain = train.shape[0]
ntest = test.shape[0]
seed = 10
nfolds = 5
kf = KFold(ntrain, n_folds = nfolds, random_state=seed)


#Sklearn class building for model training

class SklearnHandler(object):
def __init__(self, clf, seed=0, params=None):
params['random_state'] = seed
self.clf = clf(**params)
  
def train(self, x_train, y_train):
self.clf.fit(x_train, y_train)
  
def predict(self, x):
return self.clf.predict(x)
  
def fit(self, x, y):
return self.clf.fit(x,y)
  
def feature_importances(self, x, y):
return self.clf.fit(x, y).feature_importances_

#Class to get out-of-fold predictions for k fold
def get_oof(clf, x_train, y_train, x_test):
oof_train = np.zeros((ntrain,))
oof_test = np.zeros((ntest,))
oof_test_skf = np.empty((nfolds, ntest))
  
for i, (train_index, test_index) in enumerate(kf):
x_tr = x_train[train_index]
y_tr = y_train[train_index]
x_te = x_train[test_index]
  
clf.train(x_tr, y_tr)
  
oof_train[test_index] = clf.predict(x_te)
oof_test_skf[i, :] = clf.predict(x_test)
  
oof_test[:] = oof_test_skf.mean(axis=0)
return oof_train.reshape(-1,1), oof_test.reshape(-1, 1)
  
  
  
#Random Forest parameters
rf_params = {
'n_jobs': -1,
'n_estimators': 1000,
'warm_start': True,
'max_depth': 6,
'min_samples_leaf': 2,
'max_features' : 'sqrt',
'verbose': 0
}

  
  
#Create models
rf = SklearnHandler(clf=RandomForestClassifier, seed=seed, params=rf_params)


#Create arrays for the models
y_train = train['Survived'].ravel()
train = train.drop(['Survived'], axis=1)
x_train = train.values
x_test = test.values


# out of fold train and test
rf_oof_train, rf_oof_test = get_oof(rf,x_train, y_train, x_test) # Random Forest

# feature importance for RF
rf_feature = rf.feature_importances(x_train,y_train)

# feature inportance
cols = train.columns.values
#Create a dataframe with features
feature_dataframe = pd.DataFrame( {'features': cols,
'Random Forest feature importances': rf_feature,})


# feature inportance plot
plt.figure(figsize=(12,8))
sns.barplot(feature_dataframe['features'], feature_dataframe['Random Forest feature importances'])


# feature column
feature_dataframe['mean'] = feature_dataframe.mean(axis= 1) # axis = 1 computes the mean row-wise
feature_dataframe