MRDT.calc.G <- function(directory,ages,birth.year){ ################################################################################################################# # Author: Daniela Kuruczova # # Purpose: Nonlinear fitting of Gompertz model and calculating mortality rate doubling time for # # data downloaded from http://www.mortality.org/ # # Arguments: directory = directory where downloaded data files are stored in format "Mx_1x1_Country name.txt" # # ages = sequence of ages that will be used for the cohort # # birth year = birth year of the cohort # # Output: data frame with colums: "country": Country name # # "MRDT.M": MRDT for males # # "MRDT.F": MRDT for females # # "p.B": p-value for the difference between male and female MRDT # # "M.A","M.B": Gomperz model coefficients for males # # "F.A","F.B": Gomperz model coefficients for females # # "miss.years": years with unavailable data # # Example of use: MRDT.calc.G("~/Mortality/Data/",30:60,1954) # # # # 2018/11/14 Initial version (DK) # ################################################################################################################# #Years to extract from data years <- birth.year+ages #Get file names from directory file.names <- dir(directory, pattern =".txt") results <- data.frame() #Loop through all files in directory for(i in 1:length(file.names)){ #Read data file dt <- read.table(paste(directory,file.names[i],sep=''),header=T,skip=2,na.strings='.') #Replace character + in Age dt$Age <- as.integer(gsub("+", "", as.character(dt$Age),fixed=T)) #Get country name from filename country.name <- substr(file.names[i], 8, regexpr('.txt', file.names[i])[1] - 1) country <- country.name print(country) #Transform data for further use require(reshape2) dt.new <- melt(dt,id.vars=c('Age','Year')) names(dt.new)[3] <- "Gender" #Replace zeroes with NA as we do not want to use them dt.new$value[dt.new$value==0 & !is.na(dt.new$value)] <- NA #Filter out data for specified cohort dt.to.use <- dt.new[dt.new$Age %in% ages & dt.new$Year %in% years & dt.new$Year-dt$Age==birth.year & dt.new$Gender != 'Total',] dt.to.use$gen <- as.numeric(dt.to.use$Gender)-1 #Model calculation model <- try(nls(value~ (A+a*gen)*exp((B+b*gen)*Age), data=dt.to.use, start = list( A = 0.0009, a= 0, B = 0.085, b=0),control = list(maxiter = 500))) if(inherits(model, "try-error")){ #If model fails write NA F.A <- NA F.B <- NA M.A <- NA M.B <- NA MRDT.M <- NA MRDT.F <- NA p.B <- NA }else{ sum <- summary(model) coef <- coef(model) F.A <- coef[1] F.B <- coef[3] M.A <- coef[1] + coef[2] M.B <- coef[3] + coef[4] MRDT.M <- log(2)/(M.B) MRDT.F <- log(2)/(F.B) p.B <- sum$coefficients[4,4] } #Missing data for years: actual.years <- min(dt.to.use$Year):max(dt.to.use$Year) missing.years <- years[!(years %in% actual.years)] miss.years <- paste(missing.years,collapse=', ') vysl <- data.frame(country,MRDT.M,MRDT.F,p.B, M.A,M.B,F.A,F.B,miss.years) results <- rbind(results,vysl) } return(results) }