library(MASS) # Generating Data for Animal Model, # Estimation of Genetic Parameters by Gibbs Sampling # Solving MME by Iteration on Data # Simple pedigree for demonstration purposes anim = c("G","F","H","K","L","M","P") sire = c("A","A","B","B","C","C","C") dam = c("D","E","D","F","E","F","G") cbind(anim,sire,dam) sl = levels(as.factor(sire)) dl = levels(as.factor(dam)) al = levels(as.factor(anim)) cl = c(sl,dl,al) cl aid = levels(as.factor(cl)) aid nanim = length(aid) nanim k = match(aid,anim) sid = sire[k] sid did = dam[k] pedc = data.frame(aid,sid,did) pedc # Number animals - for loopings aaid = match(aid,aid) ssid = match(sid,aid) ddid = match(did,aid) ped = data.frame(aaid,ssid,ddid) ped bii = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2) # Simulation of data for an animal model # Model: CG, Age, b(Marker), animal # Assign contemporary groups to animals - randomly here using 'sample' ncg = c(1:3) cg = sample(ncg,nanim,replace=TRUE) cg # OR ncg = c(34, 15, 29) cg = sample(ncg,nanim,replace=TRUE) cg # OR cgpr = c(.2, .3, .5) cg = sample(ncg,nanim,replace=TRUE,prob=cgpr) cg # CG is a random factor with variance = 100 cgsd = 10 cgfx = rnorm(length(ncg),0,cgsd) cgfx mean(cgfx) var(cgfx) # Assign age groups to animals - randomly here nage = c(1:4) age = sample(nage,nanim,replace=TRUE) age agefx = c(0, 5, 10, 20) # Given, would be constant for each replicate # Marker genotypes - for nanim animals - inherited frq = c(0.5, 0.5) # Two alleles assumed, frequency of each = 0.5 geno = c(1, 2) mrki = rep(0,nanim) # array for first allele mrkj = mrki # array for second allele gtyp = rep(0,nanim) # will contain genotype - 1=11, 2=12, 3=22 # Making marker alleles inherited from parent to offspring for(i in 1:nanim){ kis = ssid[i] kid = ddid[i] if(is.na(kis)){ mrki[i] = sample(geno,1,prob=frq) mrkj[i] = sample(geno,1,prob=frq) g = mrki[i]*10 + mrkj[i] gtyp[i] = 2 if(g == 11)gtyp[i]=1 if(g == 22)gtyp[i]=3 } else { ai = mrki[kis] if(runif(1)>0.5)ai = mrkj[kis] aj = mrki[kid] if(runif(1)>0.5)aj = mrkj[kid] mrki[i] = ai mrkj[i] = aj g = ai*10 + aj gtyp[i] = 2 if(g == 11)gtyp[i]=1 if(g == 22)gtyp[i]=3 } } gtyp # Effects of the marker genotypes - given mrkfx = c(8, 0, -8) bslope = -1.6 # Assign animal additive genetic values - random effects gsd = sqrt(300) tbv = rnorm(nanim,0,gsd) tbv for(i in 1:nanim){ kis = ssid[i] kid = ddid[i] dii = sqrt(1/bii[i]) if(!is.na(kis)){ pa = 0.5*(tbv[kis]+tbv[kid])+dii*tbv[i] tbv[i] = pa } } tbv # Note difference from first tbv # Now put together pieces into observations # Residual effects rsd = sqrt(700) resfx = rnorm(nanim,0,rsd) # How it works cg cgfx cgfx[cg] yyy = cgfx[cg] + agefx[age] + bslope*gtyp + tbv + resfx yyy obs = round(yyy) obs mean(obs) var(obs) obs[is.na(ssid)] = NA obs # make a data from of all info bats = data.frame(aaid,ssid,ddid,bii,cg,age,gtyp,obs) bats # Constructing and Solving Mixed Model Equations by Iteration on Data # Preliminary preparations # Set up some arrays before iteration b2 = bats[ !is.na(obs), ] b2 # data.frame only for animals with data cgfac = factor(b2$cg) # cg indicator array agefac = factor(b2$age) # age indicator array anfac = factor(b2$aaid) # animal indicator array cov = b2$gtyp # marker genotype covariates y = b2$obs # observation vector cgfac agefac anfac cov y # Set up a coded pedigree list, and order it # Coded list is not needed to solve MME, but it is # necessary for proper Gibbs sampling nped = 0 aaa = rep(0,3*nanim) sss = aaa ddd = aaa cods = aaa nanimp = nanim + 1 ssid[is.na(ssid)] = nanimp ddid[is.na(ddid)] = nanimp for(i in 1:nanim){ ka = aaid[i] ks = ssid[i] kd = ddid[i] nped = nped + 1 cods[nped]=0 aaa[nped] = ka sss[nped] = ks ddd[nped] = kd if(ks < nanimp){ nped = nped + 1 cods[nped]=1 aaa[nped] = ks sss[nped] = ka ddd[nped] = kd } if(kd < nanimp){ nped = nped + 1 cods[nped]=2 aaa[nped] = kd sss[nped] = ka ddd[nped] = ks } } aaa = aaa[1:nped] cods = cods[1:nped] sss = sss[1:nped] ddd = ddd[1:nped] kord = order(aaa) # Sorting the coded pedigree data - in kord aaa[kord] zz = rep(0,nanim) # diagonals of Z'Z zz k = b2$aaid zz[k] = 1 zz adiag = rep(0,nanim+1) # diagonals of A-inverse bii = c(bii,1) # bii values for animals # initialize solutions sage = tapply(y,agefac,mean) # simple means for age solutions sage scg = tapply(y,cgfac,mean) # simple means for CG - forced to zero sum scg = scg - mean(scg) scg xx = t(as.matrix(cov)) %*% as.matrix(cov) # covariate matrix X'X xx = ginv(xx) # generalized inverse of X'X covt = t(as.matrix(cov)) # covariate row vector bhat = xx %*% covt %*% as.matrix(y) # initial estimate bhat sanm = rep(0,nanim+1) # Initial animal solutions set to 0 ratcg = 700/100 # Variance ratio for CG ratan = 700/300 # Variance ratio for Animal Add Genetic pricg = ratcg # prior value for Bayesian methods prian = ratan # prior value for Bayesian methods pridf = nanim # prior degrees of belief # Set up diagonals of A-inverse for(i in 1:nanim){ kis = ssid[i] kid = ddid[i] d = bii[i] x = 0.25*d adiag[i] = adiag[i] + d adiag[kis] = adiag[kis] + x adiag[kid] = adiag[kid] + x } adiag # Begin Iterations ccc = 100 zold = c(scg, sage, bhat, sanm) length(zold) source("animalMME.R") # Gibbs sampling part to estimate variances source("gibbs1.R") # After all samples are done, plot convergence plot(VRh2,type="l", col="red", lty="solid", ylim=c(0,1)) nl = length(VRh2) VRh2 = VRh2[2001:nl] mean(VRh2) var(VRh2)