# Create a Function and Prepare Test Data in R

When we build the model, we will need the same set of columns in the test data also and will also need to apply all the same transformations that we have done to the test_data also.

### Kept Columns

> str_c("'",paste(colnames(data_train),collapse="','"),"’")
keep_columns = c('term_60_months','sub_gradeA2','sub_gradeA3','sub_gradeA4','sub_gradeA5','sub_gradeB1','sub_gradeB2','sub_gradeB3','sub_gradeB4','sub_gradeB5','sub_gradeC1','sub_gradeC2','sub_gradeC3','sub_gradeC4','sub_gradeC5','sub_gradeD1','sub_gradeD2','sub_gradeD3','sub_gradeD4','sub_gradeD5','sub_gradeE1','sub_gradeE2','sub_gradeE3','sub_gradeE4','sub_gradeE5','sub_gradeF1','sub_gradeF2','sub_gradeF3','sub_gradeF4','sub_gradeF5','sub_gradeG1','sub_gradeG2','sub_gradeG3','sub_gradeG4','sub_gradeG5','emp_lengthn_a','home_ownershipRENT','verification_statusSource_Verified','verification_statusVerified','purposecredit_card','purposedebt_consolidation','purposehome_improvement','purposemajor_purchase','purposemedical','purposemoving','purposeother','purposesmall_business','purposevacation','dti','delinq_2yrs','earliest_cr_line','mths_since_last_delinq','application_typeJoint_App','max_bal_bc','all_util','inq_fi','total_cu_tl','avg_cur_bal','bc_open_to_buy','mort_acc','mths_since_recent_bc','num_actv_bc_tl','num_bc_tl','num_tl_90g_dpd_24m','percent_bc_gt_75','is_ny','is_pa','is_nj','is_co','is_ga','loan_status')


### Create Function

applyFeatureTransformations <- function(dt,use_kept_column = keep_columns,use_median_impute_model=median_impute_model,  use_dummy_model=dummy_model,use_trans_model=trans_model){
#consolidate loan status
dt$loan_status = ifelse(str_detect(dt$loan_status,"Paid"),dt$loan_status,"Default") #parse int_rate dt$int_rate = (as.numeric(gsub(pattern = "%",replacement = "",x = dt$int_rate))) #impute median dt = predict(median_impute_model,dt) #parse revol_util dt$revol_util = (as.numeric(gsub(pattern = "%",replacement = "",x = dt$int_rate))) dt$earliest_cr_line = parse_date_time(str_c("01",dt$issue_d),"dmy" ) - parse_date_time(str_c("01",dt$earliest_cr_line),"dmy" )
dt$earliest_cr_line = as.numeric(dt$earliest_cr_line,units = "days")
dt$is_ny = ifelse(dt$addr_state=="NY",1,0)
dt$is_pa = ifelse(dt$addr_state=="PA",1,0)
dt$is_nj = ifelse(dt$addr_state=="NJ",1,0)
dt$is_oh = ifelse(dt$addr_state=="OH",1,0)
dt$is_fl = ifelse(dt$addr_state=="FL",1,0)
dt$is_co = ifelse(dt$addr_state=="CO",1,0)
dt$is_ga = ifelse(dt$addr_state=="GA",1,0)
dt$is_va = ifelse(dt$addr_state=="VA",1,0)
dt$is_az = ifelse(dt$addr_state=="AZ",1,0)
dt$is_ca = ifelse(dt$addr_state=="CA",1,0)
#transform transactions
dt$annual_inc = dt$annual_inc/dt$funded_amnt dt$revol_bal = dt$revol_bal/dt$funded_amnt
dt$avg_cur_bal = dt$avg_cur_bal/dt$funded_amnt dt$bc_open_to_buy = dt$bc_open_to_buy/dt$funded_amnt
#if purpose falling outside of recognized values
dt$purpose = ifelse(dt$purpose %in% all_purpose,dt$purpose,"other") #create dummy variables loan_status = dt$loan_status
dt = as.data.frame(predict(use_dummy_model,dt))
dt$loan_status = loan_status #center,scale data trans_model_test = preProcess(dt,method=c("center","scale")) dt = predict(trans_model_test, dt) #remove all unused features colnames(dt) = str_replace_all(colnames(dt)," ","_") colnames(dt) = str_replace_all(colnames(dt),"<","_") colnames(dt) = str_replace_all(colnames(dt),"/","_") dt = dt[use_kept_column] #set loan with status 'Fully Paid' as a positive sample dt$loan_status = ifelse(dt$loan_status == "Fully Paid","Fully.Paid",dt$loan_status)
dt$loan_status = factor(dt$loan_status,levels = c("Default","Fully.Paid"))
return(dt)
}


### Prepare Test Data

We will now take our test data and apply our data transformations to it.

data_test


Before we apply the transformations, we will create some variables with some data from the test dataset which we will use later for evaluating the investment.

#later used for evaluate investment
data_test_grade = data_test$grade data_test_funded_amnt = data_test$funded_amnt
data_test_total_pymnt = data_test\$total_pymnt


We will now apply the data transformations using the function we created.

data_test = applyFeatureTransformations(data_test)


