result = open('haplotype-sort-del#.hmp','w') withopen('haplotypep','r') as hamp: for line in hamp: p = re.compile(r'#') #delete '#' line = p.sub('',line) p = re.compile(r'--') #'--' replaced as 'NN' line = p.sub('NN',line) print >> result,line, result.close()
defhaplotypeChromoSub(haplotype,chromesome_dict,haplotypeSub): result = open(haplotypeSub,'a')
#sort the chromesome_dict 's keys keys = chromesome_dict.keys() keys.sort()
#print the head of haplotype withopen(haplotype) as fd: for line in fd: if line.startswith('rs#') or line.startswith('rs\t'): print >> result,line, break
#sub the chromesome names for chromosome in keys: withopen(haplotype) as fd: for line in fd: if line.startswith('rs#') or line.startswith('rs\t'): next line_list = line.split() if line_list[2] == chromosome: line_list[2] = chromesome_dict[chromosome] line = '\t'.join(line_list) print >> result,line
#print the scaffolds withopen(haplotype) as fd: for line in fd: if re.search(r'scaffold',line): print >> result,line, result.close()
基本流程 kinship计算使用VanRaden method 聚类算法默认为average group kinship type为mean
1 2 3 4 5 6 7
#Step 1: Set data directory and import files myY <- read.table("mdp_traits.txt", head =TRUE) myG <- read.table("mdp_genotype_test.hmp.txt", head =FALSE) #Step 2: Run GAPIT myGAPIT <- GAPIT( Y=myY , G=myG, PCA.total=3)
增加参数
1 2 3 4 5 6 7 8 9 10 11
#Step 1: Set data directory and import files myY <- read.table("mdp_traits.txt", head =TRUE) myG <- read.table("mdp_genotype_test.hmp.txt", head =FALSE) #Step 2: Run GAPIT myGAPIT <- GAPIT( Y=myY , G=myG, PCA.total=3, kinship.cluster=c("average","complete","ward"), kinship.group=c("Mean","Max"), group.from=200, group.to=1000000, group.by=10 )
导入Kinship Matrix and Covariates和Q值
1 2 3 4 5 6 7 8
#Step 1: Set data directory and import files myY <- read.table("mdp_traits.txt", head =TRUE) myG <- read.table("mdp_genotype_test.hmp.txt", head =FALSE) myKI <- read.table("KSN.txt", head =FALSE) myCV <- read.table("Copy of Q_First_Three_Principal_Components.txt", head =TRUE) #Step 2: Run GAPIT myGAPIT <- GAPIT( Y=myY , G=myG, KI=myKI, CV=myCV )
基因组预测
这个分析用不到基因型数据,所以使用SNP.test=FALSE这个参数
1 2 3 4 5
#Step 1: Set data directory and import files myY <- read.table("mdp_traits.txt", head =TRUE) myKI <- read.table("KSN.txt", head =FALSE) #Step 2: Run GAPIT myGAPIT <- GAPIT( Y=myY , KI=myKI, PCA.total=3, SNP.test=FALSE )
多基因型数据导入分析
1 2 3 4 5
#Step 1: Set data directory and import files myY <- read.table("mdp_traits.txt", head =TRUE) #Step 2: Run GAPIT myGAPIT <- GAPIT( Y=myY , PCA.total=3, file.G="mdp_genotype_chr", file.Ext.G="hmp.txt", file.from=1, file.to=10, file.path="C:\\myGAPIT\\")
#Step 1: Set data directory and import files myY <- read.table("mdp_traits.txt", head =TRUE) #Step 2: Run GAPIT myGAPIT <- GAPIT( Y=myY , PCA.total=3, file.GD="mdp_numeric", file.GM="mdp_SNP_information", file.Ext.GD="txt", file.Ext.GM="txt", file.from=1, file.to=3, SNP .fraction=0.6)
#Step 1: Set data directory and import files myY <- read.table("mdp_traits.txt", head =TRUE) #Step 2: Run GAPIT myGAPIT <- GAPIT( Y=myY , PCA.total=3, file.GD="mdp_numeric", file.GM="mdp_SNP_information", file.Ext.GD="txt", file.Ext.GM="txt", file.from=1, file.to=3, SNP .fraction=0.6, file.fragment =128)
Model selection
1 2 3 4 5
myY <- read.table("mdp_traits.txt", head =TRUE) myG <- read.table("mdp_genotype_test.hmp.txt", head =FALSE) #Step 2: Run GAPIT myGAPIT <- GAPIT( Y=myY , G=myG, PCA.total=3, Model.selection =TRUE)
模型选择
1 2 3 4 5 6 7 8 9 10 11 12
#Step 1: Set data directory and import files myCV <- read.table("Copy of Q_First_Three_Principal_Components.txt", head =TRUE) myY <- read.table("mdp_traits.txt", head =TRUE) myG <- read.table("mdp_genotype_test.hmp.txt", head =FALSE) #Step 2: Run GAPIT myGAPIT_SUPER <- GAPIT( Y=myY[,c(1,2)], G=myG, #KI=myKI, CV=myCV , #PCA.total=3, sangwich.top="MLM",#options are GLM,MLM,CMLM, FaST and SUPER sangwich.bottom="SUPER", #options are GLM,MLM,CMLM, FaST and SUPER LD=0.1, )