MRDT.calc.GM <- function(directory,ages,birth.year){ ################################################################################################################# # Author: Daniela Kuruczova # # Purpose: Nonlinear fitting of Gompertz-Makeham model and calculating mortality rate doubling time for # # data downloaded from http://www.mortality.org/ # # Arguments: directory = directory where downloaded data files are stored in format "Mx_1x1_Country name.txt" # # ages = sequence of ages that will be used for the cohort # # birth year = birth year of the cohort # # Output: data frame with colums: "country": Country name # # "MRDT.M": MRDT for males # # "MRDT.F": MRDT for females # # "p.B": p-value for the difference between male and female MRDT # # "M.A","M.B","M.C": Gompertz-Makeham model coefficients for males # # "F.A","F.B","F.C": Gompertz-Makeham model coefficients for females # # "miss.years": years with unavailable data # # Example of use: MRDT.calc.GM("~/Mortality/Data/",30:60,1954) # # # # 2018/11/14 Initial version (DK) # # 2019/06/27 Constrains on Makeham term added (DK) # ################################################################################################################# #Years to extract from data years <- birth.year+ages #Get file names from directory file.names <- dir(directory, pattern =".txt") results <- data.frame() #Loop through all files in directory for(i in 1:length(file.names)){ #Read data file dt <- read.table(paste(directory,file.names[i],sep=''),header=T,skip=2,na.strings='.') #Replace character + in Age dt$Age <- as.integer(gsub("+", "", as.character(dt$Age),fixed=T)) #Get country name from filename country.name <- substr(file.names[i], 8, regexpr('.txt', file.names[i])[1] - 1) country <- country.name print(country) #Transform data for further use require(reshape2) dt.new <- melt(dt,id.vars=c('Age','Year')) names(dt.new)[3] <- "Gender" #Replace zeroes with NA as we do not want to use them dt.new$value[dt.new$value==0 & !is.na(dt.new$value)] <- NA #Filter out data for specified cohort dt.to.use <- dt.new[dt.new$Age %in% ages & dt.new$Year %in% years & dt.new$Year-dt$Age==birth.year & dt.new$Gender != 'Total',] dt.to.use$gen <- as.numeric(dt.to.use$Gender)-1 #Model calculation model <- try(nls(value~ (C+c*gen) + (A+a*gen)*exp((B+b*gen)*Age), data=dt.to.use, start = list( A = 0.0009, a= 0, B = 0.085, b=0, C=0.0005, c=0),algorithm="port",lower=c(-Inf,-Inf,-Inf,-Inf,0,0),control = list(maxiter = 500))) if(inherits(model, "try-error")){ #If model fails write NA F.A <- NA F.B <- NA F.C <- NA M.A <- NA M.B <- NA M.C <- NA MRDT.M <- NA MRDT.F <- NA p.B <- NA p.C1 <- NA p.C2 <- NA }else{ sum <- summary(model) coef <- coef(model) F.A <- coef[1] F.B <- coef[3] F.C <- coef[5] M.A <- coef[1] + coef[2] M.B <- coef[3] + coef[4] M.C <- coef[5] + coef[6] MRDT.M <- log(2)/(M.B) MRDT.F <- log(2)/(F.B) p.B <- sum$coefficients[4,4] p.C1 <- sum$coefficients[5,4] p.C2 <- sum$coefficients[6,4] } #Missing data for years: actual.years <- min(dt.to.use$Year):max(dt.to.use$Year) missing.years <- years[!(years %in% actual.years)] miss.years <- paste(missing.years,collapse=', ') vysl <- data.frame(country,MRDT.M,MRDT.F,p.B,p.C1,p.C2, M.A,M.B,M.C,F.A,F.B,F.C,miss.years) results <- rbind(results,vysl) } return(results) } res <- data.frame(MRDT.calc.GM("C:/Users/kuruczova/ownCloud/Praca/Mortality/Selected/",30:64,1950)) res <- rbind(res,MRDT.calc.GM("C:/Users/kuruczova/ownCloud/Praca/Mortality/Selected/",30:63,1951)) res <- rbind(res,MRDT.calc.GM("C:/Users/kuruczova/ownCloud/Praca/Mortality/Selected/",30:62,1952)) res <- rbind(res,MRDT.calc.GM("C:/Users/kuruczova/ownCloud/Praca/Mortality/Selected/",30:61,1953)) res <- rbind(res,MRDT.calc.GM("C:/Users/kuruczova/ownCloud/Praca/Mortality/Selected/",30:60,1954)) require(xlsx) wb<-createWorkbook(type="xlsx") sheet <- createSheet(wb, sheetName = 'Vyslekdy') addDataFrame(res, sheet, startRow=1, startColumn=1,col.names=T) saveWorkbook(wb,"C:/Users/kuruczova/ownCloud/Praca/Mortality/vysl_GM_new.xlsx")