Commit d3fa0614 authored by Kosmas Hench's avatar Kosmas Hench

add simulations

parent f9726e5c
......@@ -7,3 +7,4 @@
2_output/07_phased_variants/*
!2_output/08_popGen/00_synteny
2_output/09_expression/*
2_output/10_simulation/*
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
create_K_plot <- function(searchLG,gfffile,xr,searchgene,secondary_genes,searchsnp,muskID){
source('../../0_data/0_scripts/F3.functions.R');
source('../../0_data/0_scripts/F3.getFSTs.R')
source('../../0_data/0_scripts/F3.getGxP.R')
source('../../0_data/0_scripts/F3.getDXY.R')
highclr <- '#3bb33b'
......@@ -20,11 +19,6 @@ create_K_plot <- function(searchLG,gfffile,xr,searchgene,secondary_genes,searchs
global_fst<-fst_list$global_fst;
data_fst<-fst_list$data_fst
# get pfst values
pfst_list <- getGxP(searchLG,xr)
# data_pfst_pw<-pfst_list$data_pfst_pw;
data_pfst<-pfst_list$data_pfst
# get dxy values
dxy_list <- getDXY(searchLG,xr)
#data_dxy_pw<-dxy_list$data_dxy_pw;
......@@ -127,20 +121,6 @@ create_K_plot <- function(searchLG,gfffile,xr,searchgene,secondary_genes,searchs
guides(linetype= guide_legend(override.aes = list(color = 'black')))+plotSET
p13 <- ggplot()+coord_cartesian(xlim=xr)+
geom_line(data=(data_pfst %>% filter(POS > xr[1],POS<xr[2]))
,aes(x=POS,y=avgp_wald,col=run),lwd=LW)+
# geom_line(data=(data_pfst_pw %>% filter(POS > xr[1],POS<xr[2]))
# ,aes(x=POS,y=avgp_wald,col=run,linetype=group),lwd=1)+
scale_color_manual(values=c(clr,annoclr),
breaks=c("nig-pue","nig-uni","pue-uni","x","y","z"),guide=F)+
facet_grid(window~.,scales='free_y',
switch = 'y',labeller = label_parsed,as.table = T)+
scale_x_continuous(name=searchLG,expand=c(0,0),position = 'top')+
scale_y_continuous(breaks = seq(0,9,length.out = 4))+
scale_linetype(name='location',label=c('Belize','Honduras','Panama'))+
guides(linetype= guide_legend(override.aes = list(color = 'black')))+plotSET
p14 <- ggplot()+coord_cartesian(xlim=xr)+
geom_line(data=(data_dxy %>% filter(POS > xr[1],POS<xr[2]))
,aes(x=POS,y=dxy,col=run),lwd=LW)+
# geom_line(data=(data_dxy_pw %>% filter(POS > xr[1],POS<xr[2]))
......@@ -153,7 +133,7 @@ create_K_plot <- function(searchLG,gfffile,xr,searchgene,secondary_genes,searchs
scale_linetype(name='location',label=c('Belize','Honduras','Panama'))+
guides(linetype= guide_legend(override.aes = list(color = 'black')))+plotSET
p2 <- plot_grid(p11,p12,p13,p14,
p2 <- plot_grid(p11,p12,p13,
ncol = 1,align = 'v',axis = 'r',
rel_heights = c(1.3,rep(1,3)))
rel_heights = c(1.3,rep(1,2)))
return(p2)}
\ No newline at end of file
......@@ -93,7 +93,7 @@ trplot <- function(sel){
Nx2 = (y+scling[sclr])/10000)
zmLab <- data.frame(x=c(s1+.5*l1,s2+.5*l2,s3+.5*l3,s4+.5*l4),
label=c('LG09 (a)','LG12 (b)','LG12 (c)','LG17 (d)')) %>%
label=c('LG09 (A)','LG12 (B)','LG12 (C)','LG17 (D)')) %>%
mutate(sclr=1:4, Nx= (x+scling[sclr])/10000)
zmEND <- data.frame(x=c(s1,s1+l1,
......@@ -159,4 +159,23 @@ trplot <- function(sel){
}
return(p1)
}
run_one_sample_test <- function(pop,n,data){
df <- data %>%
filter(run == pop)
x <- df$R.2 %>% na.omit() %>% as.vector()
exp_mu <- 1/(2*n)
test_result <- wilcox.test(x,mu = exp_mu)
return(tibble(population = pop,
mean = x %>% mean,
sd = x %>% sd,
se = x %>% se,
var = x %>% var,
n = length(x),
exp_mu = exp_mu) %>% bind_cols(
test_result %>% broom::tidy()))
}
\ No newline at end of file
......@@ -28,13 +28,27 @@ dt2 <- BW %>%
BC <-rgb(.7,.7,.7)
clr <-colorRampPalette(colors = c('white',BC,'black'))(8)
# tests
locations <- c('Global',"Panana","Belize","Honduras","H. puella","H. nigricans","H. unicolor")
sample_sizes <- c(110,39,36,35,37,37,36)
test_gw <- purrr::map2(locations,sample_sizes,run_one_sample_test,data=BW) %>% bind_rows()
test_gw$xS <- factor(test_gw$population,
levels = c('Global',"Panana","Belize","Honduras",
"H. nigricans","H. puella","H. unicolor")) %>%
as.numeric()
pBOX <- ggplot(BW,aes(x=run,y=R.2))+
geom_boxplot(fill=BC,width=.7,outlier.size = .1)+
coord_fixed(ylim=c(0,.031),ratio = 250)+
geom_segment(inherit.aes = FALSE,
data=test_gw,aes(x=xS-.38,
xend=xS+.38,
y=exp_mu,yend=exp_mu),col='red',size=.5)+
geom_point(inherit.aes = F, data=dt2,aes(x=run,y=val,fill=type),shape=23,size=1)+
scale_x_discrete(labels = expression(Global,Panama,Belize,Honduas,
italic("H. nigricans"),italic("H. puella"),italic("H. unicolor")))+
scale_y_continuous(name=expression(genome~wide~ILD~(italic(r)^2)))+
scale_y_continuous(name=expression(Genome~wide~ILD~(italic(r)^2)))+
scale_fill_manual('',values = clr[c(6,1)],labels=c('mean','median'))+
guides(shape = guide_legend(ncol = 1))+
theme(legend.position = c(-.4,1.27),
......@@ -43,4 +57,6 @@ pBOX <- ggplot(BW,aes(x=run,y=R.2))+
axis.text.y = element_text(size=7),
axis.text.x = element_text(size=7,angle=45,hjust = 1),
panel.background = element_blank(),
plot.background = element_blank())
\ No newline at end of file
plot.background = element_blank())
......@@ -27,11 +27,25 @@ BoxGenes_summary <- dataBoxGenes %>%
select(xS,meanR2,medR2) %>%
gather(key = 'type',value = 'val',2:3)
# tests
locations <- c("global","boc","bel","hon","nig","pue","uni")
sample_sizes <- c(110,39,36,35,37,37,36)
test_area <- purrr::map2(locations,sample_sizes,run_one_sample_test,data=dataBoxGenes) %>%
bind_rows()
test_area$xS <- factor(test_area$population,
levels = c("global","boc","bel","hon","nig","pue","uni")) %>%
as.numeric()
boxGenes <- ggplot(dataBoxGenes,aes(x=xS))+
geom_boxplot(aes(y=R.2),fill=BC,width=.7,outlier.size = .1) +
geom_segment(inherit.aes = FALSE,
data=test_area,aes(x=xS-.38,
xend=xS+.38,
y=exp_mu,yend=exp_mu),col='red',size=.5)+
geom_point(data=BoxGenes_summary,aes(y=val,fill=type),shape=23,size=1)+
coord_fixed(ylim=c(0,.031),ratio = 250)+
scale_y_continuous(expression(candidate~gene~ILD~(italic(r)^2)))+
scale_y_continuous(expression(Candidate~gene~ILD~(italic(r)^2)))+
scale_x_discrete(labels = expression(Global,Panama,Belize,Honduas,
italic("H. nigricans"),italic("H. puella"),italic("H. unicolor")))+
scale_fill_manual('',values = clr[c(6,1)],labels=c('mean','median'))+
......@@ -43,4 +57,5 @@ boxGenes <- ggplot(dataBoxGenes,aes(x=xS))+
axis.text.y = element_text(size=7),
axis.text.x = element_text(size=7,angle=45,hjust = 1),
panel.background = element_blank(),
plot.background = element_blank())
\ No newline at end of file
plot.background = element_blank())
......@@ -139,7 +139,7 @@ trplot <- function(sel){
zmLab <- data.frame(x=c(s1+.5*lALL,s2+.5*lALL,s3+.5*lALL,s4+.5*lALL,
s5+.5*lALL,s6+.5*lALL,s7+.5*lALL,s8+.5*lALL),
label=c("LG4 (e)","LG08 (f)","LG08 (g)",'LG09 (a)','LG12 (b)','LG12 (c)','LG17 (d)',"LG20 (h)")) %>%
label=c("LG4 (E)","LG08 (F)","LG08 (G)",'LG09 (A)','LG12 (B)','LG12 (C)','LG17 (D)',"LG20 (H)")) %>%
mutate(sclr=1:8, Nx= (x+scling[sclr])/10000)
zmEND <- data.frame(x=c(s1,s1+lALL,
......
se <- function(x){
x2 <- x[!is.na(x)]
return(sd(x2)/sqrt(length(x2)))}
trplot <- function(sel){
files <- c("global","boc","bel","hon","pue","nig","uni","nig-pue","nig-uni","pue-uni")
data <- read.csv(paste('../../2_output/08_popGen/07_LD/',files[sel],'-10000-bins.txt',sep=''),sep='\t')
message(files[sel])
s1=17620000;s2=19910000;s3=21960000;s4=22320000;
e2=20660000;e3=22460000;
stp=20000;
l1=500000;l2=750000;l3=500000;l4=500000
scling <- c(-s1,-s2+(l1+stp),-s3+(l1+l2+2*stp),-s4+(l1+l2+l3+(stp*3)))
dt <- data %>% mutate(miX = floor(Mx/10000),miY=floor(My/10000))
print(max(dt$Mval,na.rm=TRUE))
genes <- data.frame(start=c(17871610,20186151,22225149,22553187,22556763,22561894,22573388),
end=c(17873915,20347811,22228342,22555052,22559742,22566321,22575503),
sclr=c(1,2,3,4,4,4,4),
LG=c("LG09","LG12-1","LG12-2","LG17","LG17","LG17","LG17"),
name=c("sox10",'casz1',"hoxc13a","sws2a\u03B1","sws2a\u03B2","sws2b","lws"))
genes <- genes %>% mutate(Nx1 = (start+scling[sclr])/10000,Nx2 = (end+scling[sclr])/10000,
labPOS = (Nx1+Nx2)/2)
genes$labPOS[genes$name %in% c("sws2a\u03B1","sws2a\u03B2","sws2b","lws")] <- genes$labPOS[genes$name %in% c("sws2a\u03B1","sws2a\u03B2","sws2b","lws")]+c(-16,-1,12,20)
GZrS <- 40000;GZrS2 <- 20000; #gene zoom offset
BS <- c(-5,5,-4,3,-8,8,-20,-10,-6,5,7,16,17,22)*10000 # Backshifter for gene zoom
GZdf <- data.frame(x=c(genes$start[1],genes$end[1],genes$end[1]+GZrS+BS[1],genes$start[1]+GZrS+BS[2],genes$start[1],
genes$start[2],genes$end[2],genes$end[2]+GZrS+BS[3],genes$start[2]+GZrS+BS[4],genes$start[2],
genes$start[3],genes$end[3],genes$end[3]+GZrS+BS[5],genes$start[3]+GZrS+BS[6],genes$start[3],
genes$start[4],genes$end[4],genes$end[4]+GZrS+BS[7],genes$start[4]+GZrS+BS[8],genes$start[4],
genes$start[5],genes$end[5],genes$end[5]+GZrS+BS[9],genes$start[5]+GZrS+BS[10],genes$start[5],
genes$start[6],genes$end[6],genes$end[6]+GZrS+BS[11],genes$start[6]+GZrS+BS[12],genes$start[6],
genes$start[7],genes$end[7],genes$end[7]+GZrS+BS[13],genes$start[7]+GZrS+BS[14],genes$start[7])+GZrS2,
y=c(genes$start[1],genes$end[1],genes$end[1]-GZrS+BS[1],genes$start[1]-GZrS+BS[2],genes$start[1],
genes$start[2],genes$end[2],genes$end[2]-GZrS+BS[3],genes$start[2]-GZrS+BS[4],genes$start[2],
genes$start[3],genes$end[3],genes$end[3]-GZrS+BS[5],genes$start[3]-GZrS+BS[6],genes$start[3],
genes$start[4],genes$end[4],genes$end[4]-GZrS+BS[7],genes$start[4]-GZrS+BS[8],genes$start[4],
genes$start[5],genes$end[5],genes$end[5]-GZrS+BS[9],genes$start[5]-GZrS+BS[10],genes$start[5],
genes$start[6],genes$end[6],genes$end[6]-GZrS+BS[11],genes$start[6]-GZrS+BS[12],genes$start[6],
genes$start[7],genes$end[7],genes$end[7]-GZrS+BS[13],genes$start[7]-GZrS+BS[14],genes$start[7])-GZrS2,
grp=rep(letters[1:7],each=5)) %>%
mutate(sclr=rep(c(1,2,3,4,4,4,4),each=5),
Nx1 = (x+scling[sclr])/10000,
Nx2 = (y+scling[sclr])/10000)
clr = c(viridis::inferno(5)[c(1,1:5)])
Gcol <- '#3bb33b'
Zcol = rgb(.94,.94,.94)
DG <- rgb(.4,.4,.4)
LGoffset <- 15
GLABoffset <- 8
rS <- 100000 # width of grey annotation band
zmRange <- data.frame(x=c(s1,s1+l1,s1+l1+rS,s1+rS,s1,
s2,s2+l2,s2+l2+rS,s2+rS,s2,
s3,s3+l3,s3+l3+rS,s3+rS,s3,
s4,s4+l4,s4+l4+rS,s4+rS,s4),
y=c(s1,s1+l1,s1+l1-rS,s1-rS,s1,
s2,s2+l2,s2+l2-rS,s2-rS,s2,
s3,s3+l3,s3+l3-rS,s3-rS,s3,
s4,s4+l4,s4+l4-rS,s4-rS,s4),
grp=rep(letters[1:4],each=5)) %>%
mutate(sclr=rep(1:4,each=5),
Nx1 = (x+scling[sclr])/10000-1,
Nx2 = (y+scling[sclr])/10000)
rS2 <- .75*rS
zmRange2 <- data.frame(x=c(s1,s1+l1,s1+l1+rS2,s1+rS2,s1,
s2,s2+l2,s2+l2+rS2,s2+rS2,s2,
s3,s3+l3,s3+l3+rS2,s3+rS2,s3,
s4,s4+l4,s4+l4+rS2,s4+rS2,s4),
y=c(s1,s1+l1,s1+l1-rS2,s1-rS2,s1,
s2,s2+l2,s2+l2-rS2,s2-rS2,s2,
s3,s3+l3,s3+l3-rS2,s3-rS2,s3,
s4,s4+l4,s4+l4-rS2,s4-rS2,s4),
grp=rep(letters[1:4],each=5)) %>%
mutate(sclr=rep(1:4,each=5),
Nx1 = (x+scling[sclr])/10000-1,
Nx2 = (y+scling[sclr])/10000)
rS3 <- .07*rS
zmRange3 <- data.frame(x=c(s1,s1+l1,s1+l1+rS3,s1+rS3,s1,
s2,s2+l2,s2+l2+rS3,s2+rS3,s2,
s3,s3+l3,s3+l3+rS3,s3+rS3,s3,
s4,s4+l4,s4+l4+rS3,s4+rS3,s4),
y=c(s1,s1+l1,s1+l1-rS3,s1-rS3,s1,
s2,s2+l2,s2+l2-rS3,s2-rS3,s2,
s3,s3+l3,s3+l3-rS3,s3-rS3,s3,
s4,s4+l4,s4+l4-rS3,s4-rS3,s4),
grp=rep(letters[1:4],each=5)) %>%
mutate(sclr=rep(1:4,each=5),
Nx1 = (x+scling[sclr])/10000-1,
Nx2 = (y+scling[sclr])/10000)
zmLab <- data.frame(x=c(s1+.5*l1,s2+.5*l2,s3+.5*l3,s4+.5*l4),
label=c('LG09 (A)','LG12 (B)','LG12 (C)','LG17 (D)')) %>%
mutate(sclr=1:4, Nx= (x+scling[sclr])/10000)
zmEND <- data.frame(x=c(s1,s1+l1,
s2,s2+l2,
s3,s3+l3,
s4,s4+l4)) %>%
mutate(sclr=rep(1:4,each=2),
Nx = ((x+scling[sclr])/10000)+rep(c(2.5,-4),4),
label=round((x/1000000),2))
textSCALE1 <- c(1.8,rep(NA,6),.7)
textSCALE2 <- c(2,rep(NA,6),1)
textSCALE3 <- c(3.5,rep(NA,6),1.75)
p1 <- ggplot(dt %>% filter(!is.na(Mval)),aes(fill=Mval))+
coord_equal()+
geom_polygon(inherit.aes = F,data=zmRange,
aes(x=Nx1+1,y=Nx2-1,group=grp),fill='lightgray')+
geom_polygon(inherit.aes = F,data=zmRange2,
aes(x=Nx1+11,y=Nx2-11,group=grp),fill=DG)+
geom_polygon(inherit.aes = F,data=zmRange3,
aes(x=Nx1+1.1,y=Nx2-1.1,group=grp),fill=DG)+
geom_polygon(inherit.aes = F,data=GZdf,
aes(x=Nx1,y=Nx2,group=grp),fill=Zcol)+
geom_segment(inherit.aes = F,data=genes,
aes(x=Nx1+1,y=Nx1-1,xend=Nx2+1,yend=Nx2-1),
col=Gcol,size=1.5)+
geom_tile(aes(x=miX,y=miY))+
geom_text(inherit.aes = F,data=zmEND,
aes(x=Nx+LGoffset,y=Nx-LGoffset,label=paste(label,'\n(Mb)')),
angle=45,size=textSCALE1[sel])+
geom_text(inherit.aes = F,data=genes,
aes(x=labPOS+GLABoffset,y=labPOS-GLABoffset,label=name),
angle=-45,fontface='italic',size=textSCALE2[sel])+
geom_text(inherit.aes = F,data=zmLab,
aes(x=Nx+LGoffset-.8,y=Nx-LGoffset+.8,label=label),
angle=-45,fontface='bold',size=textSCALE3[sel],col='white')+
scale_x_continuous(expand=c(0,0))+
scale_y_continuous(expand=c(0,0),
trans = 'reverse')+
scale_fill_viridis_c(name=expression(bar(italic(r)^2)))+
theme_void()+
theme(legend.position = c(.9,.75),
legend.direction = 'vertical')
return(p1)
}
\ No newline at end of file
BW <- read.csv('../../2_output/08_popGen/07_LD/subsets/glob_between.interchrom.hap.ld',sep='\t') %>%
select(R.2) %>%
mutate(type='Global',run='Global')
for(j in 1:6){
flS <- c("boc","bel","hon","pue","nig","uni")
flL <- c("Panana","Belize","Honduras","H. puella","H. nigricans","H. unicolor")
flT <- c(rep('Geo',3),rep('Spec',3))
k <- flS[j]
q <- flL[j]
u <- flT[j]
print(j)
BW <- BW %>%
rbind(.,(read.csv(paste('../../2_output/08_popGen/07_LD/subsets/glob_between.',k,'.interchrom.hap.ld',sep=''),
sep='\t') %>%
select(R.2) %>%
mutate(type=u,run=q)))
}
BW$run <- factor(BW$run,levels=c('Global',"Panana","Belize","Honduras",
"H. nigricans","H. puella","H. unicolor"))
dt2 <- BW %>%
group_by(run) %>%
summarise(meanR2=mean(R.2,na.rm = T),medR2=median(R.2,na.rm = T)) %>%
gather(key = 'type',value = 'val',2:3)
BC <-rgb(.7,.7,.7)
clr <-colorRampPalette(colors = c('white',BC,'black'))(8)
pBOX <- ggplot(BW,aes(x=run,y=R.2))+
geom_boxplot(fill=BC,width=.7,outlier.size = .1)+
#coord_fixed(ylim=c(0,.031),ratio = 250)+
geom_point(inherit.aes = F, data=dt2,aes(x=run,y=val,fill=type),shape=23,size=1)+
ggtitle(expression(Genome~wide~ILD~(italic(r)^2)))+
scale_x_discrete(labels = expression(Global,Panama,Belize,Honduas,
italic("H. nigricans"),italic("H. puella"),italic("H. unicolor")))+
scale_y_continuous()+
scale_fill_manual('',values = clr[c(6,1)],labels=c('mean','median'))+
guides(shape = guide_legend(ncol = 1))+
theme(legend.position = c(-.4,1.27),
text = element_text(size=10),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=7),
axis.text.x = element_text(size=7,angle=45,hjust = 1),
panel.background = element_blank(),
plot.background = element_blank())
\ No newline at end of file
dataBoxGenes <- data.frame(R.2=c(),run=c(),stringsAsFactors = F)
for (file in dir("../../2_output/08_popGen/07_LD/",pattern = "interchrom.hap.ld.gz")) {
nm <- str_remove_all(file, "spotlight.") %>% str_remove_all(.,".interchrom.hap.ld.gz")
if(nm %in% c("global","uni","pue","nig","hon","bel","boc")){
dataBoxGenes <- read.csv(gzfile(paste('../../2_output/08_popGen/07_LD/',file,sep='')),sep='\t') %>%
mutate(run = nm) %>%
select(R.2,run) %>%
rbind(dataBoxGenes,.)
}
}
dataBoxGenes$xS <- factor(dataBoxGenes$run,
levels = c("global","boc","bel","hon","nig","pue","uni"))
BoxGenes_summary <- dataBoxGenes %>%
group_by(xS) %>%
summarise(run = run[1],
meanR2 = mean(R.2,na.rm = T),
medR2 = median(R.2,na.rm = T),
sdR2 = sd(R.2,na.rm=T),
seR2 = se(R.2),
nanr = sum(is.na(R.2)),
minR2 = min(R.2,na.rm = T),
maxR2 = max(R.2,na.rm = T),
lengthR2 = length(R.2)) %>%
select(xS,meanR2,medR2) %>%
gather(key = 'type',value = 'val',2:3)
boxGenes <- ggplot(dataBoxGenes,aes(x=xS))+
geom_boxplot(aes(y=R.2),fill=BC,width=.7,outlier.size = .1) +
geom_point(data=BoxGenes_summary,aes(y=val,fill=type),shape=23,size=1)+
#coord_fixed(ylim=c(0,.031),ratio = 250)+
ggtitle(expression(Candidate~gene~ILD~(italic(r)^2)))+
scale_y_continuous(expression(Candidate~gene~ILD~(italic(r)^2)))+
scale_x_discrete(labels = expression(Global,Panama,Belize,Honduas,
italic("H. nigricans"),italic("H. puella"),italic("H. unicolor")))+
scale_fill_manual('',values = clr[c(6,1)],labels=c('mean','median'))+
guides(shape = guide_legend(ncol = 1))+
theme(legend.position = c(.35,1.27),
text = element_text(size=10),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(size=7),
axis.text.x = element_text(size=7,angle=45,hjust = 1),
panel.background = element_blank(),
plot.background = element_blank())
\ No newline at end of file
This diff is collapsed.
label_value <- function(labels, multi_line = TRUE) {
labels <- lapply(labels, as.character)
if (multi_line) {
labels
} else {
collapse_labels_lines(labels)
}
}
label_variable <- function(labels, multi_line = TRUE) {
if (multi_line) {
row <- as.list(names(labels))
} else {
row <- list(paste(names(labels), collapse = ", "))
}
lapply(row, rep, nrow(labels) %||% length(labels[[1]]))
}
# parsed
label_both_parsed <- function (labels, multi_line = TRUE) {
value <- label_value(labels, multi_line = multi_line)
variable <- label_variable(labels, multi_line = multi_line)
labls <- purrr::map2(variable,value,str_c,sep=':')
if (multi_line) {
lapply(unname(labls), lapply, function(values) {
c(parse(text = as.character(values)))
})
}
else {
lapply(labels, function(values) {
values <- paste0("list(", values, ")")
lapply(values, function(expr) c(parse(text = expr)))
})
}
}
......@@ -13,11 +13,11 @@ for k in {01..24}; do
sed "s/XXnameXX/$i/" $WORK/2_reSeq-scripts/2-1_genotyping/0_templates/2.1.11.shapeit_temp.sh > $WORK/2_reSeq-scripts/2-1_genotyping/2-1-11_shapeit/2.1.11.shapeit_$i.sh;
echo "cd $WORK/2_output/07_phased_variants/$i/" >> $WORK/2_reSeq-scripts/2-1_genotyping/2-1-11_shapeit/2.1.11.shapeit_$i.sh;
echo "shapeit -assemble --input-vcf 4_bi-allelic_snps.$i.vcf.gz --input-pir PIRsList-$i.txt --thread 8 -O 5_phased-$i " >> $WORK/2_reSeq-scripts/2-1_genotyping/2-1-11_shapeit/2.1.11.shapeit_$i.sh;
echo "shapeit -assemble --input-vcf 4_bi-allelic_snps.$i.vcf.gz --input-pir PIRsList-$i.txt --thread 8 -O 5_phased-$i " >> $WORK/2_reSeq-scripts/2-1_genotyping/2-1-11_shapeit/2.1.11.shapeit_$i.sh;
echo "echo '---- shapeit done ----' " >> $WORK/2_reSeq-scripts/2-1_genotyping/2-1-11_shapeit/2.1.11.shapeit_$i.sh;
echo "shapeit -convert --input-hap 5_phased-$i --output-vcf 5_phased-$i.vcf " >> $WORK/2_reSeq-scripts/2-1_genotyping/2-1-11_shapeit/2.1.11.shapeit_$i.sh;;
echo "shapeit -convert --input-hap 5_phased-$i --output-vcf 5_phased-$i.vcf " >> $WORK/2_reSeq-scripts/2-1_genotyping/2-1-11_shapeit/2.1.11.shapeit_$i.sh;;
echo "echo '---- conversion done ----' " >> $WORK/2_reSeq-scripts/2-1_genotyping/2-1-11_shapeit/2.1.11.shapeit_$i.sh;
echo "gzip 5_phased-$i.vcf " >> $WORK/2_reSeq-scripts/2-1_genotyping/2-1-11_shapeit/2.1.11.shapeit_$i.sh;
echo "gzip 5_phased-$i.vcf " >> $WORK/2_reSeq-scripts/2-1_genotyping/2-1-11_shapeit/2.1.11.shapeit_$i.sh;
cd $WORK/2_reSeq-scripts/2-1_genotyping/2-1-11_shapeit
qsub 2.1.11.shapeit_$i.sh;
......
......@@ -4,8 +4,8 @@
#PBS -l cpunum_job=8
#PBS -N fXXnameXXXXjXX
#PBS -q clexpress
#PBS -o 2.2.9.1.iHH_XXnameXX_XXlgXX.stdout
#PBS -e 2.2.9.1.iHH_XXnameXX_XXlgXX.stderr
#PBS -o 2.2.12.1.iHH_XXnameXX_XXlgXX.stdout
#PBS -e 2.2.12.1.iHH_XXnameXX_XXlgXX.stderr
cd $WORK/2_output/08_popGen
......
......@@ -4,8 +4,8 @@
#PBS -l cpunum_job=8
#PBS -N eXXnameXXXXjXX
#PBS -q clexpress
#PBS -o 2.2.9.2.xpEHH_XXnameXX_XXlgXX.stdout
#PBS -e 2.2.9.2.xpEHH_XXnameXX_XXlgXX.stderr
#PBS -o 2.2.12.2.xpEHH_XXnameXX_XXlgXX.stdout
#PBS -e 2.2.12.2.xpEHH_XXnameXX_XXlgXX.stderr
cd $WORK/2_output/08_popGen
......
#!/bin/bash
# starter script to rewrite and submit the 2.2.9.1.iHH_temp.sh script once per species pair
# starter script to rewrite and submit the 2.2.12.1.iHH_temp.sh script once per species pair
cd $WORK/2_reSeq-scripts/2-2_popGen
......@@ -11,10 +11,10 @@ for k in {1..12};do
for j in {01..24};do
LG="LG"$j;
echo "-- "$LG" ---";
sed "s/XXnameXX/$POP1/g; s/XXname2XX/vcftools\_$POP1.pop/g; s/XXlgXX/$LG/g; s/XXjXX/$j/g" 00_templates/2.2.9.1.iHH_temp.sh > 08_iHH/2.2.9.1.$POP1.$LG.iHH.sh
sed "s/XXnameXX/$POP1/g; s/XXname2XX/vcftools\_$POP1.pop/g; s/XXlgXX/$LG/g; s/XXjXX/$j/g" 00_templates/2.2.12.1.iHH_temp.sh > 08_iHH/2.2.12.1.$POP1.$LG.iHH.sh
cd 08_iHH
qsub 2.2.9.1.$POP1.$LG.iHH.sh
qsub 2.2.12.1.$POP1.$LG.iHH.sh
cd ..
done
done
......@@ -13,10 +13,10 @@ for k in {1..12};do
for j in {01..24};do
LG="LG"$j;
echo "-- "$LG" ---";
sed "s/XXnameXX/$POP1-$POP2/g; s/XXname2XX/vcftools\_$POP1.pop/g; s/XXname3XX/vcftools\_$POP2.pop/g; s/XXlgXX/$LG/g; s/XXjXX/$j/g" 00_templates/2.2.9.2.xpEHH_temp.sh > 09_xpEHH/2.2.9.2.$POP1-$POP2.$LG.xpEHH.sh
sed "s/XXnameXX/$POP1-$POP2/g; s/XXname2XX/vcftools\_$POP1.pop/g; s/XXname3XX/vcftools\_$POP2.pop/g; s/XXlgXX/$LG/g; s/XXjXX/$j/g" 00_templates/2.2.12.2.xpEHH_temp.sh > 09_xpEHH/2.2.12.2.$POP1-$POP2.$LG.xpEHH.sh
cd 09_xpEHH
qsub 2.2.9.2.$POP1-$POP2.$LG.xpEHH.sh
qsub 2.2.12.2.$POP1-$POP2.$LG.xpEHH.sh
cd ..
done
done
......@@ -4,8 +4,8 @@
#PBS -l cpunum_job=1
#PBS -N iHH12merge
#PBS -q clexpress
#PBS -o 2.2.9.3.merge_iHH12.stdout
#PBS -e 2.2.9.3.merge_iHH12.stderr
#PBS -o 2.2.12.3.merge_iHH12.stdout
#PBS -e 2.2.12.3.merge_iHH12.stderr
cd $WORK/2_output/08_popGen/09_iHH12
......
......@@ -4,8 +4,8 @@
#PBS -l cpunum_job=1
#PBS -N iHH12merge
#PBS -q clexpress
#PBS -o 2.2.9.4.merge_xpEHH.stdout
#PBS -e 2.2.9.4.merge_xpEHH.stderr
#PBS -o 2.2.12.4.merge_xpEHH.stdout
#PBS -e 2.2.12.4.merge_xpEHH.stderr
cd $WORK/2_output/08_popGen/10_xpEHH
......
......@@ -3,7 +3,7 @@
cd $WORK/2_reSeq-scripts/2-3_expression
for SAMPLE in $(cat $WORK/0_data/0_resources/retinaSAmples.txt);do
for SAMPLE in $(cat $WORK/0_data/0_resources/retinaSamples.txt);do
echo "--$SAMPLE--";
sed "s/XXnameXX/$SAMPLE/g" 2.3.3.kallisto_temp.sh > 2-3-3_kallisto/2.3.3.kallisto.$SAMPLE.sh
......
#!/usr/bin/env nextflow
/* nextflow run msms.nf -with-dag msms.png -c nextflow.config -resume */
extend = Channel.from( 500000 )
recom = Channel.from( 0.02 )
sel = Channel.from( 0.05, 0.1, 0.5 )
ne = Channel.from( 1000, 10000, 100000 )
mig = Channel.from( 0.00001 , 0.0001, 0.001, 0.01, 0.1 )
divt = Channel.from( 10000, 100000, 1000000 )
dom = Channel.from( 0.5 )
par = extend
.combine( recom )
.combine( ne )
.combine( sel )
.combine( mig )
.combine( divt )
.combine( dom )
.map{ row -> [ ext:row[0], rec:row[1], ne:row[2], sel:row[3], mig:row[4], divt:row[5], dom:row[6]] }
process msms {
input:
val( x ) from par
output:
set val( x ), file( "*.vcf.gz" ), file( "*_${x.dom}.txt" ) into ( msms_output, vcf2pca )
set val( x ), file( "fst_decay.msms.seq_gen.fa" ) into ( vcf2geno )
script:
"""
rec=\$(awk "BEGIN {print 4*${x.ne}*${x.rec}}")
sel=\$(awk "BEGIN {print 2*${x.ne}*${x.sel}}")
mig=\$(awk "BEGIN {print 4*${x.ne}*${x.mig}}")
divt=\$(awk "BEGIN {print ${x.divt}/(4*${x.ne})}")
dom=\$(awk -v s=\$sel "BEGIN {print s*${x.dom}}")
antidom=\$(awk -v s=\$sel "BEGIN {print s*(1-${x.dom})}")
msms 24 1 \
-T \
-I 2 12 12 \
-ej \$divt 2 1 \
-m 1 2 \$mig \
-m 2 1 \$mig \
-r \$rec ${x.ext} \
-Sc 0 1 \$sel \$dom 0 \
-Sc 0 2 0 \$antidom \$sel \