Commit 0ccac08b authored by Kosmas Hench's avatar Kosmas Hench

update documentation of figure 1 - 3

parent d7507191
START END CHROM GSTART
0 25890264 LG01
0 31702000 LG02
0 23122023 LG03
0 25000711 LG04
0 20912025 LG05
0 24210077 LG06
0 24100755 LG07
0 27000498 LG08
0 27186781 LG09
0 21876389 LG10
0 21320898 LG11
0 25506241 LG12
0 26810734 LG13
0 22338278 LG14
0 21633613 LG15
0 24734982 LG16
0 28429713 LG17
0 19773282 LG18
0 27051319 LG19
0 24942031 LG20
0 20142184 LG21
0 13699534 LG22
0 17761902 LG23
0 14503443 LG24
get_anno_df_single_line <- function(searchLG,gfffile,
xrange,
genes_of_interest,
genes_of_sec_interest,
anno_rown=3){
gff_filter <- list(seqid=searchLG)
data <- as.data.frame(readGFF(gfffile,filter=gff_filter)) %>% mutate(Parent=as.character(Parent))#%>%
#rowwise() %>% mutate(Parent=ifelse(length(Parent)==0,ID,Parent))
mRNAs <- data %>% filter(type=='mRNA',end>xrange[1],start<xrange[2]) %>%
ungroup() %>% mutate(yl=row_number()%%anno_rown+2) %>% rowwise()%>%
mutate(checkStart =ifelse(start<xrange[1],-Inf,start),
checkEnd =ifelse(end>xrange[2],Inf,end),
ps=ifelse(strand=='-',checkEnd,checkStart),
pe=ifelse(strand=='-',checkStart,checkEnd),
labelx=mean(c(sort(c(xrange[1],ps))[2],
sort(c(xrange[2],pe))[1])),
window='bold(Gene)',
clr=ifelse(Parentgenename %in% genes_of_interest,"y",
ifelse(Parentgenename %in% genes_of_sec_interest,"z","x"))) %>%
select(-Parent);
names(mRNAs)[names(mRNAs)=='ID'] <- 'Parent'
exons <- data %>% filter(type=='exon',end>xrange[1],start<xrange[2]) %>%
merge(.,mRNAs %>% select(Parent,yl,clr),by='Parent',all.x=T) %>%
mutate(ps=ifelse(strand=='-',end,start),
pe=ifelse(strand=='-',start,end),
window='bold(Gene)')
return(list(mRNAs,exons))}
getDXY<- function(searchLG,xr){
np <- read.csv(gzfile(paste('../../2_output/08_popGen/01_dxy/10kb/',searchLG,'/dxy.pue-nig.',searchLG,'.10kb-1kb.txt.gz',sep=''))) %>%
select(BIN_START,BIN_END,dxy) %>% mutate(run='nig-pue')
nu <- read.csv(gzfile(paste('../../2_output/08_popGen/01_dxy/10kb/',searchLG,'/dxy.nig-uni.',searchLG,'.10kb-1kb.txt.gz',sep=''))) %>%
select(BIN_START,BIN_END,dxy) %>% mutate(run='nig-uni')
pu <- read.csv(gzfile(paste('../../2_output/08_popGen/01_dxy/10kb/',searchLG,'/dxy.pue-uni.',searchLG,'.10kb-1kb.txt.gz',sep=''))) %>%
select(BIN_START,BIN_END,dxy) %>% mutate(run='pue-uni')
data_dxy <- rbind(np,nu,pu) %>% mutate(POS=(BIN_START+BIN_END)/2,window='bolditalic(d[XY])')
npB <- read.csv(gzfile(paste('../../2_output/08_popGen/01_dxy/10kb/',searchLG,'/dxy.puebel-nigbel.',searchLG,'.10kb-1kb.txt.gz',sep=''))) %>%
select(BIN_START,BIN_END,dxy) %>% mutate(run='nig-pue',group='bel')
nuB <- read.csv(gzfile(paste('../../2_output/08_popGen/01_dxy/10kb/',searchLG,'/dxy.nigbel-unibel.',searchLG,'.10kb-1kb.txt.gz',sep=''))) %>%
select(BIN_START,BIN_END,dxy) %>% mutate(run='nig-uni',group='bel')
puB <- read.csv(gzfile(paste('../../2_output/08_popGen/01_dxy/10kb/',searchLG,'/dxy.puebel-unibel.',searchLG,'.10kb-1kb.txt.gz',sep=''))) %>%
select(BIN_START,BIN_END,dxy) %>% mutate(run='pue-uni',group='bel')
npH <- read.csv(gzfile(paste('../../2_output/08_popGen/01_dxy/10kb/',searchLG,'/dxy.puehon-nighon.',searchLG,'.10kb-1kb.txt.gz',sep=''))) %>%
select(BIN_START,BIN_END,dxy) %>% mutate(run='nig-pue',group='hon')
nuH <- read.csv(gzfile(paste('../../2_output/08_popGen/01_dxy/10kb/',searchLG,'/dxy.nighon-unihon.',searchLG,'.10kb-1kb.txt.gz',sep=''))) %>%
select(BIN_START,BIN_END,dxy) %>% mutate(run='nig-uni',group='hon')
puH <- read.csv(gzfile(paste('../../2_output/08_popGen/01_dxy/10kb/',searchLG,'/dxy.puehon-unihon.',searchLG,'.10kb-1kb.txt.gz',sep=''))) %>%
select(BIN_START,BIN_END,dxy) %>% mutate(run='pue-uni',group='hon')
npP <- read.csv(gzfile(paste('../../2_output/08_popGen/01_dxy/10kb/',searchLG,'/dxy.pueboc-nigboc.',searchLG,'.10kb-1kb.txt.gz',sep=''))) %>%
select(BIN_START,BIN_END,dxy) %>% mutate(run='nig-pue',group='pan')
nuP <- read.csv(gzfile(paste('../../2_output/08_popGen/01_dxy/10kb/',searchLG,'/dxy.nigboc-uniboc.',searchLG,'.10kb-1kb.txt.gz',sep=''))) %>%
select(BIN_START,BIN_END,dxy) %>% mutate(run='nig-uni',group='pan')
puP <- read.csv(gzfile(paste('../../2_output/08_popGen/01_dxy/10kb/',searchLG,'/dxy.pueboc-uniboc.',searchLG,'.10kb-1kb.txt.gz',sep=''))) %>%
select(BIN_START,BIN_END,dxy) %>% mutate(run='pue-uni',group='pan')
data_dxy_pw <- rbind(npB,nuB,puB,npH,nuH,puH,npP,nuP,puP) %>%
mutate(POS=(BIN_START+BIN_END)/2,window='bolditalic(pw~d[XY])')
return(list(data_dxy_pw=data_dxy_pw,data_dxy=data_dxy))
}
\ No newline at end of file
getFSTS <- function(searchLG,xr,searchsnp,highclr){
np <- read.csv(paste('../../2_output/08_popGen/05_fst/10kb/',searchLG,'/pue-nig.10kb.1kb.windowed.',searchLG,'.fst',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,WEIGHTED_FST) %>% mutate(run='nig-pue')
nu <- read.csv(paste('../../2_output/08_popGen/05_fst/10kb/',searchLG,'/nig-uni.10kb.1kb.windowed.',searchLG,'.fst',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,WEIGHTED_FST) %>% mutate(run='nig-uni')
pu <- read.csv(paste('../../2_output/08_popGen/05_fst/10kb/',searchLG,'/pue-uni.10kb.1kb.windowed.',searchLG,'.fst',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,WEIGHTED_FST) %>% mutate(run='pue-uni')
data_fst <- rbind(np,nu,pu) %>% mutate(POS=(BIN_START+BIN_END)/2,window='bolditalic(F[ST])')
npB <- read.csv(paste('../../2_output/08_popGen/05_fst/10kb/',searchLG,'/puebel-nigbel.10kb.1kb.windowed.',searchLG,'.fst',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,WEIGHTED_FST) %>% mutate(run='nig-pue',group='bel')
nuB <- read.csv(paste('../../2_output/08_popGen/05_fst/10kb/',searchLG,'/nigbel-unibel.10kb.1kb.windowed.',searchLG,'.fst',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,WEIGHTED_FST) %>% mutate(run='nig-uni',group='bel')
puB <- read.csv(paste('../../2_output/08_popGen/05_fst/10kb/',searchLG,'/puebel-unibel.10kb.1kb.windowed.',searchLG,'.fst',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,WEIGHTED_FST) %>% mutate(run='pue-uni',group='bel')
npH <- read.csv(paste('../../2_output/08_popGen/05_fst/10kb/',searchLG,'/puehon-nighon.10kb.1kb.windowed.',searchLG,'.fst',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,WEIGHTED_FST) %>% mutate(run='nig-pue',group='hon')
nuH <- read.csv(paste('../../2_output/08_popGen/05_fst/10kb/',searchLG,'/nighon-unihon.10kb.1kb.windowed.',searchLG,'.fst',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,WEIGHTED_FST) %>% mutate(run='nig-uni',group='hon')
puH <- read.csv(paste('../../2_output/08_popGen/05_fst/10kb/',searchLG,'/puehon-unihon.10kb.1kb.windowed.',searchLG,'.fst',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,WEIGHTED_FST) %>% mutate(run='pue-uni',group='hon')
npP <- read.csv(paste('../../2_output/08_popGen/05_fst/10kb/',searchLG,'/pueboc-nigboc.10kb.1kb.windowed.',searchLG,'.fst',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,WEIGHTED_FST) %>% mutate(run='nig-pue',group='pan')
nuP <- read.csv(paste('../../2_output/08_popGen/05_fst/10kb/',searchLG,'/nigboc-uniboc.10kb.1kb.windowed.',searchLG,'.fst',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,WEIGHTED_FST) %>% mutate(run='nig-uni',group='pan')
puP <- read.csv(paste('../../2_output/08_popGen/05_fst/10kb/',searchLG,'/pueboc-uniboc.10kb.1kb.windowed.',searchLG,'.fst',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,WEIGHTED_FST) %>% mutate(run='pue-uni',group='pan')
data_fst_pw <- rbind(npB,nuB,puB,npH,nuH,puH,npP,nuP,puP) %>%
mutate(POS=(BIN_START+BIN_END)/2,window='bolditalic(pw~F[ST])')
global_fst <- read.csv(paste('../../2_output/08_popGen/05_fst/10kb/',searchLG,'/global.',searchLG,'.fst',sep=''),sep='\t') %>%
filter(CHROM==searchLG,POS > xr[1],POS<xr[2]) %>%
select(POS,WEIR_AND_COCKERHAM_FST) %>%
mutate(window='bolditalic(F[ST])',clr=ifelse(POS%in%searchsnp,NA,'lightgray'),
clr2=ifelse(POS%in%searchsnp,highclr,NA))
return(list(data_fst_pw=data_fst_pw,global_fst=global_fst,data_fst=data_fst))
}
\ No newline at end of file
getGxP <- function(searchLG,xr){
np <- read.csv(paste('../../2_output/08_popGen/06_GxP/smoothed/10kb/',searchLG,'/gxp.pue-nig-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,avgp_wald) %>% mutate(run='nig-pue')
nu <- read.csv(paste('../../2_output/08_popGen/06_GxP/smoothed/10kb/',searchLG,'/gxp.nig-uni-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,avgp_wald) %>% mutate(run='nig-uni')
pu <- read.csv(paste('../../2_output/08_popGen/06_GxP/smoothed/10kb/',searchLG,'/gxp.pue-uni-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,avgp_wald) %>% mutate(run='pue-uni')
data_pfst <- rbind(np,nu,pu) %>% mutate(POS=(BIN_START+BIN_END)/2,window='bold(-log[10] (bolditalic(p)))')
npB <- read.csv(paste('../../2_output/08_popGen/06_GxP/smoothed/10kb/',searchLG,'/gxp.puebel-nigbel-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,avgp_wald) %>% mutate(run='nig-pue',group='bel')
nuB <- read.csv(paste('../../2_output/08_popGen/06_GxP/smoothed/10kb/',searchLG,'/gxp.nigbel-unibel-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,avgp_wald) %>% mutate(run='nig-uni',group='bel')
puB <- read.csv(paste('../../2_output/08_popGen/06_GxP/smoothed/10kb/',searchLG,'/gxp.puebel-unibel-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,avgp_wald) %>% mutate(run='pue-uni',group='bel')
npH <- read.csv(paste('../../2_output/08_popGen/06_GxP/smoothed/10kb/',searchLG,'/gxp.puehon-nighon-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,avgp_wald) %>% mutate(run='nig-pue',group='hon')
nuH <- read.csv(paste('../../2_output/08_popGen/06_GxP/smoothed/10kb/',searchLG,'/gxp.nighon-unihon-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,avgp_wald) %>% mutate(run='nig-uni',group='hon')
puH <- read.csv(paste('../../2_output/08_popGen/06_GxP/smoothed/10kb/',searchLG,'/gxp.puehon-unihon-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,avgp_wald) %>% mutate(run='pue-uni',group='hon')
npP <- read.csv(paste('../../2_output/08_popGen/06_GxP/smoothed/10kb/',searchLG,'/gxp.pueboc-nigboc-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,avgp_wald) %>% mutate(run='nig-pue',group='pan')
nuP <- read.csv(paste('../../2_output/08_popGen/06_GxP/smoothed/10kb/',searchLG,'/gxp.nigboc-uniboc-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,avgp_wald) %>% mutate(run='nig-uni',group='pan')
puP <- read.csv(paste('../../2_output/08_popGen/06_GxP/smoothed/10kb/',searchLG,'/gxp.pueboc-uniboc-',searchLG,'-10kb-1kb.txt',sep=''),sep='\t') %>%
select(BIN_START,BIN_END,avgp_wald) %>% mutate(run='pue-uni',group='pan')
data_pfst_pw <- rbind(npB,nuB,puB,npH,nuH,puH,npP,nuP,puP) %>%
mutate(POS=(BIN_START+BIN_END)/2,window='bold(pw~-log[10] (bolditalic(p)))')
return(list(data_pfst_pw=data_pfst_pw,data_pfst=data_pfst))
}
\ No newline at end of file
create_K_plot <- function(searchLG,gfffile,xr,searchgene,secondary_genes,searchsnp,muskID){
source('../../0_data/0_scripts/F3.functions.R');
source('../../0_data/0_scripts/F3.getFSTs.R')
source('../../0_data/0_scripts/F3.getGxP.R')
source('../../0_data/0_scripts/F3.getDXY.R')
highclr <- '#3bb33b'
theme_set(theme_minimal(base_size = 6))
if(xr[1]>xr[2]){print('error: ill conditioned x-range')}
wdth <- .3
# get gene models
df_list <- get_anno_df_single_line(searchLG=searchLG,gfffile=gfffile,xrange=xr,
genes_of_interest = searchgene,genes_of_sec_interest=secondary_genes,anno_rown=3)
# get fst values
fst_list <- getFSTS(searchLG,xr,searchsnp,highclr)
# data_fst_pw<-fst_list$data_fst_pw;
global_fst<-fst_list$global_fst;
data_fst<-fst_list$data_fst
# get pfst values
pfst_list <- getGxP(searchLG,xr)
# data_pfst_pw<-pfst_list$data_pfst_pw;
data_pfst<-pfst_list$data_pfst
# get dxy values
dxy_list <- getDXY(searchLG,xr)
#data_dxy_pw<-dxy_list$data_dxy_pw;
data_dxy<-dxy_list$data_dxy
clr <- c('#fb8620','#1b519c','#d93327')
annoclr <- c('lightgray',highclr,rgb(.3,.3,.3))[1:3]
df_list[[1]] <- df_list[[1]] %>% mutate(label=paste("italic(",tolower(Parentgenename),")",sep='') )
LW <- .3;lS <- 9;tS <- 6
plotSET <- theme(rect = element_blank(),
text=element_text(size=tS,color='black'),
panel.grid.major = element_line(colour = rgb(.92,.92,.92)),
panel.grid.minor = element_line(colour = rgb(.95,.95,.95)),
plot.background = element_blank(),
plot.margin = unit(c(3,7,3,3),'pt'),
panel.background = element_blank(),#element_rect(colour = 'gray',fill=NULL),
legend.position = 'none',#c(.8,.3),
legend.direction = 'horizontal',
axis.title.y = element_blank(),
axis.title.x = element_blank(),#element_text(size=13,face='bold',color='black'),
strip.text.y = element_text(size=lS,color='black'),
axis.text.x = element_blank(),#element_text(size=11,color='black'),
strip.placement = "outside",
panel.border = element_blank()
)
p11 <- ggplot()+coord_cartesian(xlim=xr/1000)+
geom_rect(data=df_list[[2]],aes(xmin=ps/1000,xmax=pe/1000,ymin=yl-(wdth/2),ymax=yl+(wdth/2),
fill=as.factor(clr),col=as.factor(clr),group=Parent),lwd=.1)+
geom_segment(data=(df_list[[1]]%>%filter(strand%in%c('+','-'))),
aes(x=ps/1000,xend=pe/1000,y=yl,yend=yl,group=Parent),lwd=.1,
arrow=arrow(length=unit(2,"pt"),type='closed'),color='black')+
geom_segment(data=(df_list[[1]]%>%filter(!strand%in%c('+','-'))),
aes(x=ps/1000,xend=pe/1000,y=yl,yend=yl,group=Parent),lwd=.2,color='black')+
geom_text(data=(df_list[[1]]%>%filter(Parentgenename%in%c(searchgene,secondary_genes))),
aes(x=labelx/1000,label=label,y=yl-.5),size=1.8,parse=T)+
facet_grid(window~.,scales='free_y',
switch = 'y',labeller = label_parsed,as.table = T)+
# color settings
scale_color_manual(values=annoclr,breaks=c("x","y","z"),guide=F)+
scale_fill_manual(values=annoclr,guide=F)+
scale_x_continuous(name=paste(searchLG,' (',muskID,', kb)'),expand=c(0,0),position = 'top')+
scale_y_continuous(breaks = seq(0,.75,length.out = 4))+
theme(rect = element_blank(),
text=element_text(size=tS,color='black'),
panel.grid.major = element_blank(),#element_line(colour = rgb(.92,.92,.92)),
panel.grid.minor = element_blank(),#element_line(colour = rgb(.95,.95,.95)),
plot.background = element_blank(),
panel.background = element_blank(),#element_rect(colour = 'gray',fill=NULL),
# plot.margin = unit(c(3,3,3,3),'pt'),
legend.position = c(.8,.3),
legend.direction = 'horizontal',
axis.title.y = element_blank(),
axis.title.x = element_text(size=lS,face='bold',color='black'),
strip.text.y = element_text(size=lS,color='black'),
axis.text.x = element_text(size=tS,color='black'),
strip.placement = "outside",
panel.border = element_blank()
);
if(muskID=='A'){
p11$layers[[4]]$data$label <- c("italic(sox10)","italic(rnaseh2a)")
} else if(muskID=='B'){
p11$layers[[4]]$data$label <- c("italic(casz1^3)","italic(casz1^2)","italic(casz1^1)")
} else if(muskID=='C'){
p11$layers[[4]]$data$label <- c('italic(hoxc10a)',"italic(hoxc11a)","italic(hoxc12a)","italic(hoxc13a)","italic(calcoco1)")
}else if(muskID=='E'){
p11$layers[[4]]$data$label <- c("italic(polr1d)","italic(ednrb)")
} else if(muskID=='F'){
p11$layers[[4]]$data$label <- c("italic(foxd3)","italic(alg6)","italic(efcab7)")
} else if(muskID=='D'){
p11$layers[[4]]$data$label <- c("italic(hcfc1)","italic(hcfc1[2])","italic(hcfc1[1])",'italic(paste(sws2a,"\u03B2"))',
'italic(paste(sws2a,"\u03B1"))',"italic(sw2b)","italic(lws)",
"italic(gnl3l)","italic(tfe3)","italic(mdfic2)","italic(cxxc1[3])",
"italic(cxxc1[1])",'italic(mbd1)','italic(ccdc120)')
}else if(muskID=='G'){
p11$layers[[4]]$data$label <- c("italic(lgals3bpb)","italic(mpnd)","italic(sh3gl1)","italic(rorb)")
}else if(muskID=='H'){
p11$layers[[4]]$data$label <- c("italic(alg2)","italic(sec61b)","italic(nr4a3)",
"italic(stx17)","italic(erp44)","italic(invs)")
}
p12 <- ggplot()+coord_cartesian(xlim=xr)+
geom_point(data=global_fst,aes(x=POS,y=WEIR_AND_COCKERHAM_FST),col=global_fst$clr,size=.2)+
geom_point(data=global_fst,aes(x=POS,y=WEIR_AND_COCKERHAM_FST),col=global_fst$clr2,size=.3)+
# add pairwise fsts as lines (10 kb / 1kb)
geom_line(data=(data_fst %>% filter(POS > xr[1],POS<xr[2]))
,aes(x=POS,y=WEIGHTED_FST,col=run),lwd=LW)+
# geom_line(data=(data_fst_pw %>% filter(POS > xr[1],POS<xr[2]))
# ,aes(x=POS,y=WEIGHTED_FST,col=run,linetype=group),lwd=1)+
scale_color_manual(values=c(clr,annoclr),breaks=c("nig-pue","nig-uni","pue-uni"),guide=F)+
scale_fill_manual(values=annoclr,guide=F)+
facet_grid(window~.,scales='free_y',
switch = 'y',labeller = label_parsed,as.table = T)+
scale_x_continuous(name=searchLG,expand=c(0,0),position = 'top')+
scale_y_continuous(breaks = seq(0,.75,length.out = 4))+
scale_linetype(name='location',label=c('Belize','Honduras','Panama'))+
guides(linetype= guide_legend(override.aes = list(color = 'black')))+plotSET
p13 <- ggplot()+coord_cartesian(xlim=xr)+
geom_line(data=(data_pfst %>% filter(POS > xr[1],POS<xr[2]))
,aes(x=POS,y=avgp_wald,col=run),lwd=LW)+
# geom_line(data=(data_pfst_pw %>% filter(POS > xr[1],POS<xr[2]))
# ,aes(x=POS,y=avgp_wald,col=run,linetype=group),lwd=1)+
scale_color_manual(values=c(clr,annoclr),breaks=c("nig-pue","nig-uni","pue-uni","x","y","z"),guide=F)+
facet_grid(window~.,scales='free_y',
switch = 'y',labeller = label_parsed,as.table = T)+
scale_x_continuous(name=searchLG,expand=c(0,0),position = 'top')+
scale_y_continuous(breaks = seq(0,9,length.out = 4))+
scale_linetype(name='location',label=c('Belize','Honduras','Panama'))+
guides(linetype= guide_legend(override.aes = list(color = 'black')))+plotSET
p14 <- ggplot()+coord_cartesian(xlim=xr)+
geom_line(data=(data_dxy %>% filter(POS > xr[1],POS<xr[2]))
,aes(x=POS,y=dxy,col=run),lwd=LW)+
# geom_line(data=(data_dxy_pw %>% filter(POS > xr[1],POS<xr[2]))
# ,aes(x=POS,y=dxy,col=run,linetype=group),lwd=1)+
scale_color_manual(values=c(clr,annoclr),breaks=c("nig-pue","nig-uni","pue-uni","x","y","z"),guide=F)+
facet_grid(window~.,scales='free_y',
switch = 'y',labeller = label_parsed,as.table = T)+
scale_x_continuous(name=searchLG,expand=c(0,0),position = 'top')+
scale_y_continuous(breaks = seq(0,.01,length.out = 5))+
scale_linetype(name='location',label=c('Belize','Honduras','Panama'))+
guides(linetype= guide_legend(override.aes = list(color = 'black')))+plotSET
p2 <- plot_grid(p11,p12,p13,p14,
ncol = 1,align = 'v',axis = 'r',rel_heights = c(1.3,rep(1,3)))
return(p2)}
\ No newline at end of file
......@@ -23,5 +23,5 @@ for k in {01..24};do
grep -v nan $j.LG$k.tmp | awk -v OFS="\t" -v w=$WIN -v s=$STEP -v r=$k 'BEGIN{window=w;slide=s;g=0;OL=int(window/slide);}
{if(NR==1 && r==01){print "CHROM","BIN_START","BIN_END","nSNPs","a vgPOS","BINrank","BINnr","SNPdens","avg"$4 ,"avg"$5,"avg"$6;}}
{if(NR>1){g=int(($2-1)/slide)+1;{for (i=0;i<=OL-1;i++){if(g-i >0){A[g-i]+=$2; B[g-i]++;C[g-i]+=$4;D[g-i]+=$5;E[g-i]+=$6;G[g-i]=g-i;H[g-i]=$1;}}}};}
END{for(i in A){print H[i],(G[i]-1)*slide,(G[i]-1)*slide+window,B[i],A[i]/B[i],G[i],k+1,B[i]/window,C[i]/B[i],D[i]/B[i],E[i]/B[i];k++}}' >> smoothed/50kb/$j.50k.5k.smooth
END{for(i in A){print H[i],(G[i]-1)*slide,(G[i]-1)*slide+window,B[i],A[i]/B[i],G[i],k+1,B[i]/window,C[i]/B[i],D[i]/B[i],E[i]/B[i];k++}}' >> smoothed/50kb/$j-gemma.lm.50k.5k.smooth
done
......@@ -12,6 +12,7 @@ NC='\033[0m';
j=$1
for k in {01..24};do
q="LG"$k;
echo -e "--- ${RED}$j${NC} - $k ---"
head -n 1 $j.out | cut -f 2,3,8,12,13,14 > $j.LG$k.tmp
......@@ -23,5 +24,5 @@ for k in {01..24};do
grep -v nan $j.LG$k.tmp | awk -v OFS="\t" -v w=$WIN -v s=$STEP -v r=$k 'BEGIN{window=w;slide=s;g=0;OL=int(window/slide);}
{if(NR==1 && r==01){print "CHROM","BIN_START","BIN_END","nSNPs","a vgPOS","BINrank","BINnr","SNPdens","avg"$4 ,"avg"$5,"avg"$6;}}
{if(NR>1){g=int(($2-1)/slide)+1;{for (i=0;i<=OL-1;i++){if(g-i >0){A[g-i]+=$2; B[g-i]++;C[g-i]+=$4;D[g-i]+=$5;E[g-i]+=$6;G[g-i]=g-i;H[g-i]=$1;}}}};}
END{for(i in A){print H[i],(G[i]-1)*slide,(G[i]-1)*slide+window,B[i],A[i]/B[i],G[i],k+1,B[i]/window,C[i]/B[i],D[i]/B[i],E[i]/B[i];k++}}' >> smoothed/10kb/$j.10k.1k.smooth
END{for(i in A){print H[i],(G[i]-1)*slide,(G[i]-1)*slide+window,B[i],A[i]/B[i],G[i],k+1,B[i]/window,C[i]/B[i],D[i]/B[i],E[i]/B[i];k++}}' >> smoothed/10kb/$q/gxp.$j-$q-10kb-1kb.txt
done
......@@ -10,7 +10,8 @@
cd $WORK/1-output/09_gff_from_IKMB
mkdir -p subsets
perl -anle 'BEGIN{my $gene_id; my $gene_name} if (/\s+gene\s+/) { /ID=(.+?);/; $gene_id = $1; /Name=(.+?);/; $gene_name = $1} print $_, ";Parentgene=$gene_id;Parentgenename=$gene_name"' HP.annotation.gff > HP.annotation.named.gff
sed 's/HYPUNI/HP/g; s/OPSB_1/SWS2abeta/g; s/OPSB_2/SWS2aalpha/g; s/OPSB_0/SWS2b/g; s/OPSR/LWS/g;' HP.annotation.gff | \
perl -anle 'BEGIN{my $gene_id; my $gene_name} if (/\s+gene\s+/) { /ID=(.+?);/; $gene_id = $1; /Name=(.+?)$/; $gene_name = $1} print $_, ";Parentgene=$gene_id;Parentgenename=$gene_name"' > HP.annotation.named.gff
for k in {01..24};
do echo $k;
......
......@@ -3,19 +3,19 @@ output: html_document
editor_options:
chunk_output_type: console
---
# F1
# Figure 1
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(root.dir = './F1')
knitr::opts_knit$set(root.dir = './F_scripts')
```
## Summary
This is the accessory documentation of Figure 1.
The Figure can be recreated by using runing the **R** script F1.R:
The Figure can be recreated by runing the **R** script F1.R:
```sh
cd $WORK/3_figures/F1
cd $WORK/3_figures/F_scripts
Rscript --vanilla F1.R
rm Rplots.pdf
......
This diff is collapsed.
This diff is collapsed.
---
output: html_document
editor_options:
chunk_output_type: console
---
# Figure 4
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(root.dir = './F_scripts')
```
## Summary
This is the accessory documentation of Figure 4.
The Figure can be recreated by runing the **R** script F4.R:
```sh
cd $WORK/3_figures/F_scripts
Rscript --vanilla F4.R
rm Rplots.pdf
```
\ No newline at end of file
---
output: html_document
editor_options:
chunk_output_type: console
---
# Figure 5
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(root.dir = './F_scripts')
```
## Summary
This is the accessory documentation of Figure 5.
The Figure can be recreated by runing the **R** script F5.R:
```sh
cd $WORK/3_figures/F_scripts
Rscript --vanilla F5.R
rm Rplots.pdf
```
\ No newline at end of file
#!/usr/bin/env Rscript
library(grid)
library(gridSVG)
library(grImport2)
library(grConvert)
library(tidyverse)
library(cowplot)
library(hrbrthemes)
karyo <- read.csv('../../0_data/0_resources/F2.karyo.txt',sep='\t') %>%
mutate(GSTART=lag(cumsum(END),n = 1,default = 0),
GEND=GSTART+END,GROUP=rep(letters[1:2],12)) %>%
select(CHROM,GSTART,GEND,GROUP)
# global -------------
pn <- read.csv('../../2_output/08_popGen/05_fst/pue-nig.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PN',RUN='PN');
pu <- read.csv('../../2_output/08_popGen/05_fst/pue-uni.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PU',RUN='PU');
nu <- read.csv('../../2_output/08_popGen/05_fst/nig-uni.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='NU',RUN='NU');
# PAN -------------
ppnp <- read.csv('../../2_output/08_popGen/05_fst/pueboc-nigboc.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PN',RUN='PPNP');
ppup <- read.csv('../../2_output/08_popGen/05_fst/pueboc-uniboc.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PU',RUN='PPUP');
npup <- read.csv('../../2_output/08_popGen/05_fst/nigboc-uniboc.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='NU',RUN='NPUP');
# Bel -------------
pbnb <- read.csv('../../2_output/08_popGen/05_fst/puebel-nigbel.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PN',RUN='PBNB');
pbub <- read.csv('../../2_output/08_popGen/05_fst/puebel-unibel.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PU',RUN='PBUB');
nbub <- read.csv('../../2_output/08_popGen/05_fst/nigbel-unibel.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='NU',RUN='NBUB');
# Hon -------------
phnh <- read.csv('../../2_output/08_popGen/05_fst/puehon-nighon.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PN',RUN='PHNH');
phuh <- read.csv('../../2_output/08_popGen/05_fst/puehon-unihon.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PU',RUN='PHUH');
nhuh <- read.csv('../../2_output/08_popGen/05_fst/nighon-unihon.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='NU',RUN='NHUH');
# ------------------------------------------
data <- rbind(pn,pu,nu,ppnp,ppup,npup,pbnb,pbub,nbub,phnh,phuh,nhuh)
data$RUN <- factor(as.character(data$RUN),
levels=c('PN','NU','PU','PPNP','NPUP','PPUP','PBNB','NBUB','PBUB','PHNH','NHUH','PHUH'))
data$COL <- factor(as.character(data$COL),levels=c('PN','PU','NU'))
gwFST <- read.csv('../../2_output/08_popGen/05_fst/genome_wide_weighted_mean_fst.txt',sep='\t')
gwFST$RUN <- factor(gwFST$run,levels=levels(data$RUN))
threshs <- data %>% group_by(RUN) %>% summarise(thresh=quantile(WEIGHTED_FST,.9998))
data2 <- data %>% filter(RUN %in% levels(data$RUN)[1:3]) %>% merge(.,threshs,by='RUN',all.x=T) %>%
mutate(OUTL = (WEIGHTED_FST>thresh)) %>% filter(OUTL) %>% group_by(RUN) %>%
mutate(CHECK=cumsum(1-(BIN_START-lag(BIN_START,default = 0)==5000)),ID=paste(RUN,'-',CHECK,sep='')) %>%
ungroup() %>% group_by(ID) %>%
summarise(CHROM=CHROM[1],
xmin = min(BIN_START+GSTART),
xmax=max(BIN_END+GSTART),
xmean=mean(GPOS),
LGmin=min(BIN_START),
LGmax=min(BIN_END),
LGmean=min(POS),
RUN=RUN[1],COL=COL[1]) %>%
ungroup() %>% mutate(muskS = letters[as.numeric(as.factor(xmin))],
musk=LETTERS[c(1,3,4,4,1,2,2,3)])
musks <- data2 %>% group_by(musk) %>% summarise(MUSKmin=min(xmin),
MUSKmax=max(xmax),
MUSKminLG=min(LGmin),
MUSKmaxLG=max(LGmax)) %>%
merge(data2,.,by='musk') %>% mutate(muskX = ifelse(musk %in% c("A","B"),
MUSKmin-1e+07,MUSKmax+1e+07))
clr <- c('#fb8620','#d93327','#1b519c')
annoclr <- rgb(.4,.4,.4)
lgclr <- rgb(.9,.9,.9)
yl <- expression(italic('F'[ST]))
secScale <- 20
p1 <- ggplot()+
geom_rect(inherit.aes = F,data=karyo,aes(xmin=GSTART,xmax=GEND,
ymin=-Inf,ymax=Inf,fill=GROUP))+
geom_vline(data=data2,aes(xintercept=xmean),col=annoclr,lwd=.2)+
geom_point(data=data,aes(x=GPOS,y=WEIGHTED_FST,col=COL),size=.01)+
geom_text(data=musks,aes(x=muskX,y=.65,label=musk))+
geom_vline(data = data.frame(x=567000000),aes(xintercept=x),col=annoclr,lwd=.2)+
geom_rect(data=gwFST,aes(xmin=570*10^6,xmax=573*10^6,ymin=0,ymax=gwFST*secScale))+
scale_y_continuous(name = yl,breaks=0:4*0.2,labels = c(0,'',0.4,'',0.8),
sec.axis = sec_axis(~./secScale,labels = c(0,'',.02,'',.04)))+
scale_x_continuous(expand = c(0,0),limits = c(0,577*10^6),
breaks = c((karyo$GSTART+karyo$GEND)/2,571*10^6),
labels = c(1:24,"g.w."),position = "top")+
scale_color_manual(name='comparison',values=clr)+
scale_fill_manual(values = c(NA,lgclr),guide=F)+
theme_ipsum(grid = F,
axis_title_just = 'c',
axis_title_family = 'bold',
strip_text_size = 9)+
theme(plot.margin = unit(c(2,25,40,0),'pt'),
panel.spacing.y = unit(3,'pt'),
legend.position = 'none',
axis.title.x = element_blank(),
axis.ticks.y = element_line(color = annoclr),
axis.line.y = element_line(color = annoclr),
axis.text.y = element_text(margin = unit(c(0,3,0,0),'pt')),
axis.title.y = element_text(vjust = -1),
strip.background = element_blank(),
strip.text = element_blank()
)+
facet_grid(RUN~.)
legGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/legend-pw-cairo.svg"))))
carGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/caribbean-cairo.svg"))))
panGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/panama-cairo.svg"))))
belGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/belize-cairo.svg"))))
honGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/honduras-cairo.svg"))))
ys <- .0765
ysc <- .0057
yd <- .2195
boxes = data.frame(x=rep(.9675,4),y=c(ys,ys+yd+ysc,ys+(yd+ysc)*2,ys+(yd+ysc)*3))
F2 <- ggdraw(p1)+
geom_rect(data = boxes, aes(xmin = x, xmax = x + .02, ymin = y, ymax = y + yd),
colour = NA, fill = c(rep(lgclr,3),annoclr))+
draw_label('Global', x = .9775, y = boxes$y[4]+.08,
size = 9, angle=-90,col='white')+
draw_label('Belize', x = .9775, y = boxes$y[2]+.08,
size = 9, angle=-90)+
draw_label('Honduras', x = .9775, y = boxes$y[1]+.08,
size = 9, angle=-90)+
draw_label('Panama', x = .9775, y = boxes$y[3]+.08,
size = 9, angle=-90)+
draw_grob(legGrob, 0.01, 0, 1, 0.08)+
draw_grob(carGrob, 0.954, boxes$y[4]+.78*yd, .045, .045)+
draw_grob(belGrob, 0.954, boxes$y[2]+.78*yd, .045, .045)+
draw_grob(honGrob, 0.954, boxes$y[1]+.78*yd, .045, .045)+
draw_grob(panGrob, 0.954, boxes$y[3]+.78*yd, .045, .045)
ggsave(plot = F2,filename = '../output/F2.png',width = 183,height = 183,dpi = 200,units = 'mm')
#ggsave('fst_mac02_weight_99.98.eps',width = 183,height = 183,dpi = 200,units = 'mm')
#!/usr/bin/env Rscript
library(grid)
library(gridSVG)
library(grImport2)
library(grConvert)
library(tidyverse)
library(rtracklayer)
library(hrbrthemes)
library(cowplot)
require(rtracklayer)