Commit 2d36059b authored by Kosmas Hench's avatar Kosmas Hench

include 2.2.9 selscan scripts

parent a4fd034b
nig
pue
uni
nigbel
puebel
unibel
nighon
puehon
unihon
nigboc
pueboc
uniboc
\ No newline at end of file
getIHH12 <- function(searchLG,xr){
np <- read.csv(paste('./iHH12/',searchLG,'/iHH12.nig.selscan-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGihh12) %>% mutate(run='nig')
nu <- read.csv(paste('./iHH12/',searchLG,'/iHH12.pue.selscan-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGihh12) %>% mutate(run='pue')
pu <- read.csv(paste('./iHH12/',searchLG,'/iHH12.uni.selscan-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGihh12) %>% mutate(run='uni')
data_iHH12 <- rbind(np,nu,pu) %>% mutate(POS=(BIN_START+BIN_END)/2,window='bolditalic(iHH[12])')
npB <- read.csv(paste('./iHH12/',searchLG,'/iHH12.nigbel.selscan-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGihh12) %>% mutate(run='nig',group='bel')
nuB <- read.csv(paste('./iHH12/',searchLG,'/iHH12.puebel.selscan-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGihh12) %>% mutate(run='pue',group='bel')
puB <- read.csv(paste('./iHH12/',searchLG,'/iHH12.unibel.selscan-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGihh12) %>% mutate(run='uni',group='bel')
npH <- read.csv(paste('./iHH12/',searchLG,'/iHH12.puehon.selscan-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGihh12) %>% mutate(run='nig',group='hon')
nuH <- read.csv(paste('./iHH12/',searchLG,'/iHH12.puehon.selscan-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGihh12) %>% mutate(run='pue',group='hon')
puH <- read.csv(paste('./iHH12/',searchLG,'/iHH12.unihon.selscan-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGihh12) %>% mutate(run='uni',group='hon')
npP <- read.csv(paste('./iHH12/',searchLG,'/iHH12.nigboc.selscan-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGihh12) %>% mutate(run='nig',group='pan')
nuP <- read.csv(paste('./iHH12/',searchLG,'/iHH12.pueboc.selscan-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGihh12) %>% mutate(run='pue',group='pan')
puP <- read.csv(paste('./iHH12/',searchLG,'/iHH12.uniboc.selscan-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGihh12) %>% mutate(run='uni',group='pan')
data_iHH12_pw <- rbind(npB,nuB,puB,npH,nuH,puH,npP,nuP,puP) %>%
mutate(POS=(BIN_START+BIN_END)/2,window='bolditalic(ind~iHH[12])')
return(list(data_iHH12_pw=data_iHH12_pw,data_iHH12=data_iHH12))
}
\ No newline at end of file
getPI<- function(searchLG,xr){
np <- read.csv(gzfile(paste('./pi/',searchLG,'/dxy.nig.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='nig')
nu <- read.csv(gzfile(paste('./pi/',searchLG,'/dxy.pue.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='pue')
pu <- read.csv(gzfile(paste('./pi/',searchLG,'/dxy.uni.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='uni')
data_pi <- rbind(np,nu,pu) %>% mutate(POS=(BIN_START+BIN_END)/2,window='bolditalic(pi)')
npB <- read.csv(gzfile(paste('./pi/',searchLG,'/dxy.nigbel.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='nig',group='bel')
nuB <- read.csv(gzfile(paste('./pi/',searchLG,'/dxy.puebel.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='pue',group='bel')
puB <- read.csv(gzfile(paste('./pi/',searchLG,'/dxy.unibel.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='uni',group='bel')
npH <- read.csv(gzfile(paste('./pi/',searchLG,'/dxy.nighon.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='nig',group='hon')
nuH <- read.csv(gzfile(paste('./pi/',searchLG,'/dxy.puehon.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='pue',group='hon')
puH <- read.csv(gzfile(paste('./pi/',searchLG,'/dxy.unihon.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='uni',group='hon')
npP <- read.csv(gzfile(paste('./pi/',searchLG,'/dxy.nigboc.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='nig',group='pan')
nuP <- read.csv(gzfile(paste('./pi/',searchLG,'/dxy.pueboc.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='pue',group='pan')
puP <- read.csv(gzfile(paste('./pi/',searchLG,'/dxy.uniboc.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='uni',group='pan')
data_pi_pw <- rbind(npB,nuB,puB,npH,nuH,puH,npP,nuP,puP) %>%
mutate(POS=(BIN_START+BIN_END)/2,window='bolditalic(ind~pi)')
return(list(data_pi_pw=data_pi_pw,data_pi=data_pi))
}
\ No newline at end of file
getPIpw<- function(searchLG,xr){
np <- read.csv(gzfile(paste('./pi_pw/',searchLG,'/dxy.pue-nig.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='nig-pue')
nu <- read.csv(gzfile(paste('./pi_pw/',searchLG,'/dxy.nig-uni.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='nig-uni')
pu <- read.csv(gzfile(paste('./pi_pw/',searchLG,'/dxy.uni-pue.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='pue-uni')
data_pi <- rbind(np,nu,pu) %>% mutate(POS=(BIN_START+BIN_END)/2,window='bolditalic(pi~pw)')
npB <- read.csv(gzfile(paste('./pi_pw/',searchLG,'/dxy.puebel-nigbel.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='nig-pue',group='bel')
nuB <- read.csv(gzfile(paste('./pi_pw/',searchLG,'/dxy.nigbel-unibel.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='nig-uni',group='bel')
puB <- read.csv(gzfile(paste('./pi_pw/',searchLG,'/dxy.unibel-puebel.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='pue-uni',group='bel')
npH <- read.csv(gzfile(paste('./pi_pw/',searchLG,'/dxy.puehon-nighon.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='nig-pue',group='hon')
nuH <- read.csv(gzfile(paste('./pi_pw/',searchLG,'/dxy.nighon-unihon.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='nig-uni',group='hon')
puH <- read.csv(gzfile(paste('./pi_pw/',searchLG,'/dxy.unihon-puehon.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='pue-uni',group='hon')
npP <- read.csv(gzfile(paste('./pi_pw/',searchLG,'/dxy.pueboc-nigboc.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='nig-pue',group='pan')
nuP <- read.csv(gzfile(paste('./pi_pw/',searchLG,'/dxy.nigboc-uniboc.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='nig-uni',group='pan')
puP <- read.csv(gzfile(paste('./pi_pw/',searchLG,'/dxy.uniboc-pueboc.',searchLG,'-10kb-1kb.windowed.pi.gz',sep='')),sep='\t') %>%
select(BIN_START,BIN_END,PI) %>% mutate(run='pue-uni',group='pan')
data_pi_pw <- rbind(npB,nuB,puB,npH,nuH,puH,npP,nuP,puP) %>%
mutate(POS=(BIN_START+BIN_END)/2,window='bolditalic(pw~pi~pw)')
return(list(data_piPW_pw=data_pi_pw,data_piPW=data_pi))
}
\ No newline at end of file
getXPEHH <- function(searchLG,xr){
np <- read.csv(paste('./xpEHH/',searchLG,'/xpEHH.pue-nig-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGxxEHH) %>% mutate(run='nig-pue')
nu <- read.csv(paste('./xpEHH/',searchLG,'/xpEHH.nig-uni-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGxxEHH) %>% mutate(run='nig-uni')
pu <- read.csv(paste('./xpEHH/',searchLG,'/xpEHH.pue-uni-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGxxEHH) %>% mutate(run='pue-uni')
data_xpEHH <- rbind(np,nu,pu) %>% mutate(POS=(BIN_START+BIN_END)/2,window='bolditalic(xpEHH)')
npB <- read.csv(paste('./xpEHH/',searchLG,'/xpEHH.puebel-nigbel-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGxxEHH) %>% mutate(run='nig-pue',group='bel')
nuB <- read.csv(paste('./xpEHH/',searchLG,'/xpEHH.nigbel-unibel-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGxxEHH) %>% mutate(run='nig-uni',group='bel')
puB <- read.csv(paste('./xpEHH/',searchLG,'/xpEHH.puebel-unibel-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGxxEHH) %>% mutate(run='pue-uni',group='bel')
npH <- read.csv(paste('./xpEHH/',searchLG,'/xpEHH.puehon-nighon-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGxxEHH) %>% mutate(run='nig-pue',group='hon')
nuH <- read.csv(paste('./xpEHH/',searchLG,'/xpEHH.nighon-unihon-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGxxEHH) %>% mutate(run='nig-uni',group='hon')
puH <- read.csv(paste('./xpEHH/',searchLG,'/xpEHH.puehon-unihon-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGxxEHH) %>% mutate(run='pue-uni',group='hon')
npP <- read.csv(paste('./xpEHH/',searchLG,'/xpEHH.pueboc-nigboc-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGxxEHH) %>% mutate(run='nig-pue',group='pan')
nuP <- read.csv(paste('./xpEHH/',searchLG,'/xpEHH.nigboc-uniboc-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGxxEHH) %>% mutate(run='nig-uni',group='pan')
puP <- read.csv(paste('./xpEHH/',searchLG,'/xpEHH.pueboc-uniboc-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,AVGxxEHH) %>% mutate(run='pue-uni',group='pan')
data_xpEHH_pw <- rbind(npB,nuB,puB,npH,nuH,puH,npP,nuP,puP) %>%
mutate(POS=(BIN_START+BIN_END)/2,window='bolditalic(pw~xpEHH)')
return(list(data_xpEHH_pw=data_xpEHH_pw,data_xpEHH=data_xpEHH))
}
\ No newline at end of file
create_K_plot <- function(searchLG,gfffile,xr,searchgene,secondary_genes,searchsnp,muskID){
source('../../0_data/0_scripts/F3.functions.R');
source('../../0_data/0_scripts/F3.getFSTs.R')
source('../../0_data/0_scripts/F3.getGxP.R')
source('../../0_data/0_scripts/F3.getDXY.R')
source('../../0_data/0_scripts/S11.getPI.R')
source('../../0_data/0_scripts/S11.getPIpw.R')
source('../../0_data/0_scripts/S11.getIHH12.R')
source('../../0_data/0_scripts/S11.getXPEHH.R')
highclr <- '#3bb33b'
theme_set(theme_minimal(base_size = 6))
if(xr[1]>xr[2]){print('error: ill conditioned x-range')}
wdth <- .3
# get gene models
df_list <- get_anno_df_single_line(searchLG=searchLG,gfffile=gfffile,xrange=xr,
genes_of_interest = searchgene,genes_of_sec_interest=secondary_genes,anno_rown=3)
# get fst values
fst_list <- getFSTS(searchLG,xr,searchsnp,highclr)
# data_fst_pw<-fst_list$data_fst_pw;
global_fst<-fst_list$global_fst;
data_fst<-fst_list$data_fst
# get pfst values
pfst_list <- getGxP(searchLG,xr)
# data_pfst_pw<-pfst_list$data_pfst_pw;
data_pfst<-pfst_list$data_pfst
# get dxy values
dxy_list <- getDXY(searchLG,xr)
#data_dxy_pw<-dxy_list$data_dxy_pw;
data_dxy<-dxy_list$data_dxy
clr <- c('#fb8620','#1b519c','#d93327')
annoclr <- c('lightgray',highclr,rgb(.3,.3,.3))[1:3]
df_list[[1]] <- df_list[[1]] %>% mutate(label=paste("italic(",tolower(Parentgenename),")",sep='') )
LW <- .3;lS <- 9;tS <- 6
plotSET <- theme(rect = element_blank(),
text=element_text(size=tS,color='black'),
panel.grid.major = element_line(colour = rgb(.92,.92,.92)),
panel.grid.minor = element_line(colour = rgb(.95,.95,.95)),
plot.background = element_blank(),
plot.margin = unit(c(3,7,3,3),'pt'),
panel.background = element_blank(),#element_rect(colour = 'gray',fill=NULL),
legend.position = 'none',#c(.8,.3),
legend.direction = 'horizontal',
axis.title.y = element_blank(),
axis.title.x = element_blank(),#element_text(size=13,face='bold',color='black'),
strip.text.y = element_text(size=lS,color='black'),
axis.text.x = element_blank(),#element_text(size=11,color='black'),
strip.placement = "outside",
panel.border = element_blank()
)
p11 <- ggplot()+coord_cartesian(xlim=xr/1000)+
geom_rect(data=df_list[[2]],aes(xmin=ps/1000,xmax=pe/1000,ymin=yl-(wdth/2),ymax=yl+(wdth/2),
fill=as.factor(clr),col=as.factor(clr),group=Parent),lwd=.1)+
geom_segment(data=(df_list[[1]]%>%filter(strand%in%c('+','-'))),
aes(x=ps/1000,xend=pe/1000,y=yl,yend=yl,group=Parent),lwd=.1,
arrow=arrow(length=unit(2,"pt"),type='closed'),color='black')+
geom_segment(data=(df_list[[1]]%>%filter(!strand%in%c('+','-'))),
aes(x=ps/1000,xend=pe/1000,y=yl,yend=yl,group=Parent),lwd=.2,color='black')+
geom_text(data=(df_list[[1]]%>%filter(Parentgenename%in%c(searchgene,secondary_genes))),
aes(x=labelx/1000,label=label,y=yl-.5),size=1.8,parse=T)+
facet_grid(window~.,scales='free_y',
switch = 'y',labeller = label_parsed,as.table = T)+
# color settings
scale_color_manual(values=annoclr,breaks=c("x","y","z"),guide=F)+
scale_fill_manual(values=annoclr,guide=F)+
scale_x_continuous(name=paste(searchLG,' (',muskID,', kb)'),expand=c(0,0),position = 'top')+
scale_y_continuous(breaks = seq(0,.75,length.out = 4))+
theme(rect = element_blank(),
text=element_text(size=tS,color='black'),
panel.grid.major = element_blank(),#element_line(colour = rgb(.92,.92,.92)),
panel.grid.minor = element_blank(),#element_line(colour = rgb(.95,.95,.95)),
plot.background = element_blank(),
panel.background = element_blank(),#element_rect(colour = 'gray',fill=NULL),
# plot.margin = unit(c(3,3,3,3),'pt'),
legend.position = c(.8,.3),
legend.direction = 'horizontal',
axis.title.y = element_blank(),
axis.title.x = element_text(size=lS,face='bold',color='black'),
strip.text.y = element_text(size=lS,color='black'),
axis.text.x = element_text(size=tS,color='black'),
strip.placement = "outside",
panel.border = element_blank()
);
if(muskID=='A'){
p11$layers[[4]]$data$label <- c("italic(sox10)","italic(rnaseh2a)")
} else if(muskID=='B'){
p11$layers[[4]]$data$label <- c("italic(casz1^3)","italic(casz1^2)","italic(casz1^1)")
} else if(muskID=='C'){
p11$layers[[4]]$data$label <- c('italic(hoxc10a)',"italic(hoxc11a)","italic(hoxc12a)","italic(hoxc13a)","italic(calcoco1)")
}else if(muskID=='E'){
p11$layers[[4]]$data$label <- c("italic(polr1d)","italic(ednrb)")
} else if(muskID=='F'){
p11$layers[[4]]$data$label <- c("italic(foxd3)","italic(alg6)","italic(efcab7)")
} else if(muskID=='D'){
p11$layers[[4]]$data$label <- c("italic(hcfc1)","italic(hcfc1[2])","italic(hcfc1[1])",'italic(paste(sws2a,"\u03B2"))',
'italic(paste(sws2a,"\u03B1"))',"italic(sw2b)","italic(lws)",
"italic(gnl3l)","italic(tfe3)","italic(mdfic2)","italic(cxxc1[3])",
"italic(cxxc1[1])",'italic(mbd1)','italic(ccdc120)')
}else if(muskID=='G'){
p11$layers[[4]]$data$label <- c("italic(lgals3bpb)","italic(mpnd)","italic(sh3gl1)","italic(rorb)")
}else if(muskID=='H'){
p11$layers[[4]]$data$label <- c("italic(alg2)","italic(sec61b)","italic(nr4a3)",
"italic(stx17)","italic(erp44)","italic(invs)")
}
p12 <- ggplot()+coord_cartesian(xlim=xr)+
geom_point(data=global_fst,aes(x=POS,y=WEIR_AND_COCKERHAM_FST),col=global_fst$clr,size=.2)+
geom_point(data=global_fst,aes(x=POS,y=WEIR_AND_COCKERHAM_FST),col=global_fst$clr2,size=.3)+
# add pairwise fsts as lines (10 kb / 1kb)
geom_line(data=(data_fst %>% filter(POS > xr[1],POS<xr[2]))
,aes(x=POS,y=WEIGHTED_FST,col=run),lwd=LW)+
# geom_line(data=(data_fst_pw %>% filter(POS > xr[1],POS<xr[2]))
# ,aes(x=POS,y=WEIGHTED_FST,col=run,linetype=group),lwd=1)+
scale_color_manual(values=c(clr,annoclr),breaks=c("nig-pue","nig-uni","pue-uni"),guide=F)+
scale_fill_manual(values=annoclr,guide=F)+
facet_grid(window~.,scales='free_y',
switch = 'y',labeller = label_parsed,as.table = T)+
scale_x_continuous(name=searchLG,expand=c(0,0),position = 'top')+
scale_y_continuous(breaks = seq(0,.75,length.out = 4))+
scale_linetype(name='location',label=c('Belize','Honduras','Panama'))+
guides(linetype= guide_legend(override.aes = list(color = 'black')))+plotSET
p13 <- ggplot()+coord_cartesian(xlim=xr)+
geom_line(data=(data_pfst %>% filter(POS > xr[1],POS<xr[2]))
,aes(x=POS,y=avgp_wald,col=run),lwd=LW)+
# geom_line(data=(data_pfst_pw %>% filter(POS > xr[1],POS<xr[2]))
# ,aes(x=POS,y=avgp_wald,col=run,linetype=group),lwd=1)+
scale_color_manual(values=c(clr,annoclr),breaks=c("nig-pue","nig-uni","pue-uni","x","y","z"),guide=F)+
facet_grid(window~.,scales='free_y',
switch = 'y',labeller = label_parsed,as.table = T)+
scale_x_continuous(name=searchLG,expand=c(0,0),position = 'top')+
scale_y_continuous(breaks = seq(0,9,length.out = 4))+
scale_linetype(name='location',label=c('Belize','Honduras','Panama'))+
guides(linetype= guide_legend(override.aes = list(color = 'black')))+plotSET
p14 <- ggplot()+coord_cartesian(xlim=xr)+
geom_line(data=(data_dxy %>% filter(POS > xr[1],POS<xr[2]))
,aes(x=POS,y=dxy,col=run),lwd=LW)+
# geom_line(data=(data_dxy_pw %>% filter(POS > xr[1],POS<xr[2]))
# ,aes(x=POS,y=dxy,col=run,linetype=group),lwd=1)+
scale_color_manual(values=c(clr,annoclr),breaks=c("nig-pue","nig-uni","pue-uni","x","y","z"),guide=F)+
facet_grid(window~.,scales='free_y',
switch = 'y',labeller = label_parsed,as.table = T)+
scale_x_continuous(name=searchLG,expand=c(0,0),position = 'top')+
scale_y_continuous(breaks = seq(0,.01,length.out = 5))+
scale_linetype(name='location',label=c('Belize','Honduras','Panama'))+
guides(linetype= guide_legend(override.aes = list(color = 'black')))+plotSET
p2 <- plot_grid(p11,p12,p13,p14,
ncol = 1,align = 'v',axis = 'r',rel_heights = c(1.3,rep(1,3)))
return(p2)}
\ No newline at end of file
#!/usr/bin/bash
#usage: selscanVcf <vcfname prefix: test for test.vcf.gz> <chrom name:LGxx> <poptag> <pop-file-path> <threadnr: int>
RED='\033[0;31m'
OR='\033[0;34m'
NC='\033[0m' # No Color
mkdir -p sel_tmp;
mkdir -p selscan_out;
(>&2 echo -e "--- ${RED} selscanVcf ${NC} -- (${OR}vcf 2 map${NC}) ---")
vcftools --gzvcf $WORK/2_output/07_phased_variants/$1.vcf.gz --chr $2 --keep $4 --plink --out sel_tmp/$1.$2.$3;
(>&2 echo -e "--- ${RED} selscanVcf ${NC} -- (${OR}vcf filtering and transformation${NC}) ---")
vcftools --gzvcf $WORK/2_output/07_phased_variants/$1.vcf.gz --chr $2 --keep $4 --recode --stdout | \
awk -v "OFS=\t" ' {if (substr($1,1,1) != "#" ) {$4=0; $5=1} ;print $0}' | \
grep -v '#' | \
sed 's/LG0//g; s/LG//g' | \
gzip -c > sel_tmp/$1.$2.$3.selscan.vcf.gz ;
(>&2 echo -e "--- ${RED} selscanVcf ${NC} -- (${OR}map transformation${NC}) ---")
cut -f 2 sel_tmp/$1.$2.$3.map | \
sed 's/:/\t/g'| \
awk '{print $1"\t"$2"\t"$2"\t"$2}' | \
sed 's/LG0//g; s/LG//g' > sel_tmp/$1.$2.$3.selscan.map ;
(>&2 echo -e "--- ${RED} selscanVcf ${NC} -- (${OR}run selscan${NC}) ---")
selscan --ihh12 --vcf sel_tmp/$1.$2.$3.selscan.vcf.gz --map sel_tmp/$1.$2.$3.selscan.map --out selscan_out/$1.$2.$3.selscan --threads $5
(>&2 echo -e "--- ${RED} selscanVcf ${NC} -- (${OR}done${NC}) ---")
#!/usr/bin/bash
#usage: selscanVcf <vcfname prefix: test for test.vcf.gz> <chrom name:LGxx> <poptag> <pop-file-path> <pop-file-path2> <threadnr: int>
RED='\033[0;31m'
OR='\033[0;34m'
NC='\033[0m' # No Color
mkdir -p sel_tmp;
mkdir -p selscan_out;
(>&2 echo -e "--- ${RED} selscanVcf ${NC} -- (${OR}vcf 2 map${NC}) ---")
vcftools --gzvcf $WORK/2_output/07_phased_variants/$1.vcf.gz --chr $2 --keep $4 --keep $5 --plink --out sel_tmp/$1.$2.$3.xpEHH;
(>&2 echo -e "--- ${RED} selscanVcf ${NC} -- (${OR}vcf filtering and transformation${NC}) ---")
vcftools --gzvcf $WORK/2_output/07_phased_variants/$1.vcf.gz --chr $2 --keep $4 --recode --stdout | \
awk -v "OFS=\t" ' {if (substr($1,1,1) != "#" ) {$4=0; $5=1} ;print $0}' | \
grep -v '#' | \
sed 's/LG0//g; s/LG//g' | \
gzip -c > sel_tmp/$1.$2.$3.pop1.xpEHH.selscan.vcf.gz ;
vcftools --gzvcf $WORK/2_output/07_phased_variants/$1.vcf.gz --chr $2 --keep $5 --recode --stdout | \
awk -v "OFS=\t" ' {if (substr($1,1,1) != "#" ) {$4=0; $5=1} ;print $0}' | \
grep -v '#' | \
sed 's/LG0//g; s/LG//g' | \
gzip -c > sel_tmp/$1.$2.$3.pop2.xpEHH.selscan.vcf.gz \
;
(>&2 echo -e "--- ${RED} selscanVcf ${NC} -- (${OR}map transformation${NC}) ---")
cut -f 2 sel_tmp/$1.$2.$3.xpEHH.map | \
sed 's/:/\t/g'| \
awk '{print $1"\t"$2"\t"$2"\t"$2}' | \
sed 's/LG0//g; s/LG//g' > sel_tmp/$1.$2.$3.xpEHH.selscan.map ;
(>&2 echo -e "--- ${RED} selscanVcf ${NC} -- (${OR}run selscan${NC}) ---")
selscan --xpehh --vcf sel_tmp/$1.$2.$3.pop1.xpEHH.selscan.vcf.gz --vcf-ref sel_tmp/$1.$2.$3.pop2.xpEHH.selscan.vcf.gz --map sel_tmp/$1.$2.$3.xpEHH.selscan.map --out selscan_out/$1.$2.$3.xpEHH.selscan --threads $6
(>&2 echo -e "--- ${RED} selscanVcf ${NC} -- (${OR}done${NC}) ---")
#PBS -l elapstim_req=00:30:00
#PBS -l memsz_job=30gb
#PBS -b 1
#PBS -l cpunum_job=8
#PBS -N fXXnameXXXXjXX
#PBS -q clexpress
#PBS -o 2.2.9.1.iHH_XXnameXX_XXlgXX.stdout
#PBS -e 2.2.9.1.iHH_XXnameXX_XXlgXX.stderr
cd $WORK/2_output/08_popGen
mkdir -p 09_iHH12
cd 09_iHH12
$WORK/clean_methods/0_data/0_scripts/selscanVcf_iHH12 6_phased_mac2 XXlgXX XXnameXX $WORK/0_data/0_resources/vcfpops/XXname2XX 8
#PBS -l elapstim_req=00:30:00
#PBS -l memsz_job=30gb
#PBS -b 1
#PBS -l cpunum_job=8
#PBS -N eXXnameXXXXjXX
#PBS -q clexpress
#PBS -o 2.2.9.2.xpEHH_XXnameXX_XXlgXX.stdout
#PBS -e 2.2.9.2.xpEHH_XXnameXX_XXlgXX.stderr
cd $WORK/2_output/08_popGen
mkdir -p 10_xpEHH
cd 10_xpEHH
$WORK/clean_methods/0_data/0_scripts/selscanVcf_xpehh 6_phased_mac2 XXlgXX XXnameXX $WORK/0_data/0_resources/vcfpops/XXname2XX $WORK/0_data/0_resources/vcfpops/XXname3XX 8
#!/bin/bash
# starter script to rewrite and submit the 2.2.9.1.iHH_temp.sh script once per species pair
cd $WORK/2_reSeq-scripts/2-2_popGen
mkdir -p 08_iHH
for k in {1..12};do
POP1=$(awk -v k=$k 'NR==k{print $1}' $WORK/0_data/0_resources/iHH.config );
echo $POP1;
for j in {01..24};do
LG="LG"$j;
echo "-- "$LG" ---";
sed "s/XXnameXX/$POP1/g; s/XXname2XX/vcftools\_$POP1.pop/g; s/XXlgXX/$LG/g; s/XXjXX/$j/g" 00_templates/2.2.9.1.iHH_temp.sh > 08_iHH/2.2.9.1.$POP1.$LG.iHH.sh
cd 08_iHH
qsub 2.2.9.1.$POP1.$LG.iHH.sh
cd ..
done
done
#!/bin/bash
# starter script to rewrite and submit the 2.2.9.1.iHH_temp.sh script once per species pair
cd $WORK/2_reSeq-scripts/2-2_popGen
mkdir -p 09_xpEHH
for k in {1..12};do
POP1=$(awk -v k=$k 'NR==k{print $1}' $WORK/0_data/0_resources/fstA.config);
POP2=$(awk -v k=$k 'NR==k{print $2}' $WORK/0_data/0_resources/fstA.config);
echo $POP1 "--" $POP2;
for j in {01..24};do
LG="LG"$j;
echo "-- "$LG" ---";
sed "s/XXnameXX/$POP1-$POP2/g; s/XXname2XX/vcftools\_$POP1.pop/g; s/XXname3XX/vcftools\_$POP2.pop/g; s/XXlgXX/$LG/g; s/XXjXX/$j/g" 00_templates/2.2.9.2.xpEHH_temp.sh > 09_xpEHH/2.2.9.2.$POP1-$POP2.$LG.xpEHH.sh
cd 09_xpEHH
qsub 2.2.9.2.$POP1-$POP2.$LG.xpEHH.sh
cd ..
done
done
#PBS -l elapstim_req=00:20:00
#PBS -l memsz_job=80gb
#PBS -b 1
#PBS -l cpunum_job=1
#PBS -N iHH12merge
#PBS -q clexpress
#PBS -o 2.2.9.3.merge_iHH12.stdout
#PBS -e 2.2.9.3.merge_iHH12.stderr
cd $WORK/2_output/08_popGen/09_iHH12
mkdir -p selscan_merge_10kb
WIN=10000;
STEP=1000;
for k in nig pue uni nigbel puebel unibel nighon puehon unihon nigboc pueboc uniboc; do
echo $k
for j in {01..24};do
LG="LG"$j;
echo $LG;
cut -f 2,3,4 selscan_out/no_mac01.$LG.$k.selscan.ihh12.out | \
awk -v lg=$LG -v "OFS=\t" '{print lg,$1,$2,$3}' | \
awk -v OFS="\t" -v w=$WIN -v s=$STEP -v r=$j 'BEGIN{window=w;slide=s;g=0;OL=int(window/slide);}
{if(NR==1 && r==01){print "CHROM","BIN_START","BIN_END","nSNPs","AVGPOS","BINrank","BINnr","SNPdens","AVG"$4 ;}}
{if(NR>1){g=int(($2-1)/slide)+1;{for (i=0;i<=OL-1;i++){if(g-i >0){A[g-i]+=$2; B[g-i]++;C[g-i]+=$4;G[g-i]=g-i;H[g-i]=$1;}}}};}
END{for(i in A){print H[i],(G[i]-1)*slide,(G[i]-1)*slide+window,B[i],A[i]/B[i],G[i],k+1,B[i]/window,C[i]/B[i];k++}}' >> selscan_merge_10kb/$k.selscan.ihh12.10k.1k.smooth.out
done
done
#PBS -l elapstim_req=00:20:00
#PBS -l memsz_job=80gb
#PBS -b 1
#PBS -l cpunum_job=1
#PBS -N iHH12merge
#PBS -q clexpress
#PBS -o 2.2.9.4.merge_xpEHH.stdout
#PBS -e 2.2.9.4.merge_xpEHH.stderr
cd $WORK/2_output/08_popGen/10_xpEHH
mkdir -p selscan_merge_10kb
WIN=10000;
STEP=1000;
for k in pue-nig pue-uni nig-uni puebel-nigbel puebel-unibel nigbel-unibel puehon-nighon puehon-unihon nighon-unihon pueboc-nigboc pueboc-uniboc nigboc-uniboc; do
echo $k
for j in {01..24};do
LG="LG"$j;
echo $LG;
cut -f 2,4,8 selscan_out/no_mac01.$LG.$k.xpEHH.selscan.xpehh.out | awk -v lg=$LG -v "OFS=\t" '{print lg,$1,$2,$3}' | $AWK -v OFS="\t" -v w=$WIN -v s=$STEP -v r=$j 'BEGIN{window=w;slide=s;g=0;OL=int(window/slide);}
{if(NR==1 && r==01){print "CHROM","BIN_START","BIN_END","nSNPs","AVGPOS","BINrank","BINnr","SNPdens","AVGxxEHH" ;}}
{if(NR>1){g=int(($2-1)/slide)+1;{for (i=0;i<=OL-1;i++){if(g-i >0){A[g-i]+=$2; B[g-i]++;C[g-i]+=$4;G[g-i]=g-i;H[g-i]=$1;}}}};}
END{for(i in A){print H[i],(G[i]-1)*slide,(G[i]-1)*slide+window,B[i],A[i]/B[i],G[i],k+1,B[i]/window,C[i]/B[i];k++}}' >> selscan_merge_10kb/$k.selscan.xpEHH.10k.1k.smooth.out
done
done
......@@ -25,7 +25,8 @@ rm Rplots.pdf
```
## Details of F2.R
In the following, the individual steps of the R script are documented. Is an executable R script that depends on a variety of image manipulation and data managing packages.
In the following, the individual steps of the R script are documented.
Is an executable R script that depends on a variety of image manipulation and data managing packages.
```{r head,results='hide',message=FALSE}
library(grid)
......
......@@ -372,7 +372,7 @@ Then, the base plot is created and stored in `p1`.
scale_fill_gradientn(name=expression(bar(r^2)),colours=clr,
values=rescale(c(1,.08,.03,.015,.01,0)),
limits=c(0,1),guide = 'legend',breaks=c(0,.005,.01,.02,.03,.1,1))+
# plotting layout theme
# plot layout theme
theme_void()+
theme(legend.position = c(.7,.75),legend.direction = 'vertical')
```
......
......@@ -38,5 +38,4 @@ F5 <- ggdraw(pGr8)+
draw_grob(nuGrob, 0.7, 0.37, 0.28, 0.1)+
draw_grob(puGrob, 0.7, .04, 0.28, 0.1)
#ggsave(plot = F5,filename = '../output/F5.pdf',width = 91.5,height = 145,units = 'mm',device = cairo_pdf)
ggsave(plot = F5,filename = '../output/F5.png',width = 91.5,height = 145,units = 'mm',dpi = 150)
\ No newline at end of file
......@@ -63,16 +63,14 @@ data2 <- merge(merge(bp2,cM2,by='Loci'),seqStart,by='LG') %>% arrange(LG,bin_bp)
mutate(Parent="Parent 2")
dataMERGE <- rbind(data1,data2)
strt <- seqStart%>% filter(grepl('LG',LG))
levels(dataMERGE$LG)
dataMERGE$LG <- drop.levels(dataMERGE$LG)
dataMERGE %>% str
lgclr <- rgb(.9,.9,.9)
annoclr <- rgb(.4,.4,.4)
secclr <- rgb(.75,.75,.75)
secclrLAB <- rgb(.4,.4,.4)
secclrLAB <- rgb(.45,.45,.45)
secScale <- 10
YL1 <- "Recombination density (cM/Mb)"
YL2 <- "Cummulative recombination (cM)"
......@@ -81,7 +79,7 @@ S08 <- ggplot()+
geom_rect(inherit.aes = F,data=karyo,aes(xmin=GSTART,xmax=GEND,
ymin=-Inf,ymax=Inf,fill=GROUP))+
geom_line(data=dataMERGE,aes(x=POS,y=`cM`/secScale),col=secclr)+
geom_smooth(data=dataMERGE,aes(x=POS,y=`abs_cM`,col=LG,fill=LG),size=1,method = 'loess',span=1.2,se=T)+
geom_smooth(data=dataMERGE,aes(x=POS,y=`abs_cM`,col=LG,fill=LG),size=1,method = 'loess',span=0.7,se=T)+
facet_grid(Parent~.,switch = "y")+
scale_fill_manual(values = c(NA,lgclr,alpha(clr_LGs,.3)),guide=F)+
scale_color_manual(values=clr_LGs)+
......@@ -107,4 +105,4 @@ S08 <- ggplot()+
strip.background = element_blank(),
strip.placement = "outside")
ggsave(plot = S08,filename = '../output/S08.pdf',width = 14,height = 5)
ggsave(plot = S08,filename = '../output/S08.pdf',width = 14,height = 5,device = cairo_pdf)
#!/usr/bin/env Rscript
library(grid)
library(gridSVG)
library(grImport2)
library(grConvert)
library(tidyverse)
library(cowplot)
library(hrbrthemes)
karyo <- read.csv('../../0_data/0_resources/F2.karyo.txt',sep='\t') %>%
mutate(GSTART=lag(cumsum(END),n = 1,default = 0),
GEND=GSTART+END,GROUP=rep(letters[1:2],12)) %>%
select(CHROM,GSTART,GEND,GROUP)
# global -------------
bh <- read.csv('../../2_output/08_popGen/05_fst/bel-hon.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>% mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='BH',RUN='BH');
bp <- read.csv('../../2_output/08_popGen/05_fst/bel-hon.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>% mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='BP',RUN='BP');
hp <- read.csv('../../2_output/08_popGen/05_fst/hon-boc.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>% mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='HP',RUN='HP');
# NIG -------------
bhN <- read.csv('../../2_output/08_popGen/05_fst/nigbel-nighon.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>% mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='BH',RUN='BHN');
bpN <- read.csv('../../2_output/08_popGen/05_fst/nigbel-nigboc.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>% mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='BP',RUN='BPN');
hpN <- read.csv('../../2_output/08_popGen/05_fst/nighon-nigboc.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>% mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='HP',RUN='HPN');
# PUE -------------
bhP <- read.csv('../../2_output/08_popGen/05_fst/puebel-puehon.50kb.5kb.windowed.weir.fst',sep='\t') %>%