Commit a5aeb3c5 authored by Kosmas Hench's avatar Kosmas Hench

version: revision R1

parent 89b22387
This diff is collapsed.
......@@ -81,24 +81,24 @@ create_K_plot <- function(searchLG,gfffile,xr,searchgene,secondary_genes,searchs
panel.border = element_blank()
);
if(muskID=='a'){
if(muskID=='A'){
p11$layers[[4]]$data$label <- c("italic(sox10)","italic(rnaseh2a)")
} else if(muskID=='b'){
} else if(muskID=='B'){
p11$layers[[4]]$data$label <- c("italic(casz1.3)","italic(casz1.2)","italic(casz1.1)")
} else if(muskID=='c'){
} else if(muskID=='C'){
p11$layers[[4]]$data$label <- c('italic(hoxc10a)',"italic(hoxc11a)","italic(hoxc12a)","italic(hoxc13a)","italic(calcoco1)")
}else if(muskID=='e'){
}else if(muskID=='E'){
p11$layers[[4]]$data$label <- c("italic(polr1d)","italic(ednrb)")
} else if(muskID=='f'){
} else if(muskID=='F'){
p11$layers[[4]]$data$label <- c("italic(foxd3)","italic(alg6)","italic(efcab7)")
} else if(muskID=='d'){
} else if(muskID=='D'){
p11$layers[[4]]$data$label <- c("italic(hcfc1)","italic(hcfc1[2])","italic(hcfc1[1])",'italic(paste(sws2a,"\u03B2"))',
'italic(paste(sws2a,"\u03B1"))',"italic(sw2b)","italic(lws)",
"italic(gnl3l)","italic(tfe3)","italic(mdfic2)","italic(cxxc1[3])",
"italic(cxxc1[1])",'italic(mbd1)','italic(ccdc120)')
}else if(muskID=='g'){
}else if(muskID=='G'){
p11$layers[[4]]$data$label <- c("italic(lgals3bpb)","italic(mpnd)","italic(sh3gl1)","italic(rorb)")
}else if(muskID=='h'){
}else if(muskID=='H'){
p11$layers[[4]]$data$label <- c("italic(alg2)","italic(sec61b)","italic(nr4a3)",
"italic(stx17)","italic(erp44)","italic(invs)")
}
......
create_K_plot <- function(searchLG,gfffile,xr,xr2,muskID){
source('../../0_data/0_scripts/S07.functions.R');
source('../../0_data/0_scripts/S08.functions.R');
source('../../0_data/0_scripts/F3.getFSTs.R')
highclr <- '#3bb33b'
......
create_K_plot <- function(searchLG,gfffile,xr,searchgene,secondary_genes,searchsnp,muskID){
source('../../0_data/0_scripts/F3.functions.R');
source('../../0_data/0_scripts/F3.getFSTs.R')
source('../../0_data/0_scripts/F3.getGxP.R')
source('../../0_data/0_scripts/F3.getDXY.R')
source('../../0_data/0_scripts/S15.getGxP.R')
source('../../0_data/0_scripts/S15.getPI.R')
source('../../0_data/0_scripts/S15.getPIpw.R')
source('../../0_data/0_scripts/S15.getIHH12.R')
......
se <- function(x){
x2 <- x[!is.na(x)]
return(sd(x2)/sqrt(length(x2)))}
trplot <- function(sel){
files <- c("global","boc","bel","hon","pue","nig","uni","nig-pue","nig-uni","pue-uni")
data <- read.csv(paste('../../2_output/08_popGen/07_LD/',files[sel],'-10000-bins.txt',sep=''),sep='\t')
message(files[sel])
s1=17620000;s2=19910000;s3=21960000;s4=22320000;
e2=20660000;e3=22460000;
stp=20000;
l1=500000;l2=750000;l3=500000;l4=500000
scling <- c(-s1,-s2+(l1+stp),-s3+(l1+l2+2*stp),-s4+(l1+l2+l3+(stp*3)))
dt <- data %>% mutate(miX = floor(Mx/10000),miY=floor(My/10000))
print(max(dt$Mval,na.rm=TRUE))
genes <- data.frame(start=c(17871610,20186151,22225149,22553187,22556763,22561894,22573388),
end=c(17873915,20347811,22228342,22555052,22559742,22566321,22575503),
sclr=c(1,2,3,4,4,4,4),
LG=c("LG09","LG12-1","LG12-2","LG17","LG17","LG17","LG17"),
name=c("sox10",'casz1',"hoxc13a","sws2a\u03B1","sws2a\u03B2","sws2b","lws"))
genes <- genes %>% mutate(Nx1 = (start+scling[sclr])/10000,Nx2 = (end+scling[sclr])/10000,
labPOS = (Nx1+Nx2)/2)
genes$labPOS[genes$name %in% c("sws2a\u03B1","sws2a\u03B2","sws2b","lws")] <- genes$labPOS[genes$name %in% c("sws2a\u03B1","sws2a\u03B2","sws2b","lws")]+c(-16,-1,12,20)
GZrS <- 40000;GZrS2 <- 20000; #gene zoom offset
BS <- c(-5,5,-4,3,-8,8,-20,-10,-6,5,7,16,17,22)*10000 # Backshifter for gene zoom
GZdf <- data.frame(x=c(genes$start[1],genes$end[1],genes$end[1]+GZrS+BS[1],genes$start[1]+GZrS+BS[2],genes$start[1],
genes$start[2],genes$end[2],genes$end[2]+GZrS+BS[3],genes$start[2]+GZrS+BS[4],genes$start[2],
genes$start[3],genes$end[3],genes$end[3]+GZrS+BS[5],genes$start[3]+GZrS+BS[6],genes$start[3],
genes$start[4],genes$end[4],genes$end[4]+GZrS+BS[7],genes$start[4]+GZrS+BS[8],genes$start[4],
genes$start[5],genes$end[5],genes$end[5]+GZrS+BS[9],genes$start[5]+GZrS+BS[10],genes$start[5],
genes$start[6],genes$end[6],genes$end[6]+GZrS+BS[11],genes$start[6]+GZrS+BS[12],genes$start[6],
genes$start[7],genes$end[7],genes$end[7]+GZrS+BS[13],genes$start[7]+GZrS+BS[14],genes$start[7])+GZrS2,
y=c(genes$start[1],genes$end[1],genes$end[1]-GZrS+BS[1],genes$start[1]-GZrS+BS[2],genes$start[1],
genes$start[2],genes$end[2],genes$end[2]-GZrS+BS[3],genes$start[2]-GZrS+BS[4],genes$start[2],
genes$start[3],genes$end[3],genes$end[3]-GZrS+BS[5],genes$start[3]-GZrS+BS[6],genes$start[3],
genes$start[4],genes$end[4],genes$end[4]-GZrS+BS[7],genes$start[4]-GZrS+BS[8],genes$start[4],
genes$start[5],genes$end[5],genes$end[5]-GZrS+BS[9],genes$start[5]-GZrS+BS[10],genes$start[5],
genes$start[6],genes$end[6],genes$end[6]-GZrS+BS[11],genes$start[6]-GZrS+BS[12],genes$start[6],
genes$start[7],genes$end[7],genes$end[7]-GZrS+BS[13],genes$start[7]-GZrS+BS[14],genes$start[7])-GZrS2,
grp=rep(letters[1:7],each=5)) %>%
mutate(sclr=rep(c(1,2,3,4,4,4,4),each=5),
Nx1 = (x+scling[sclr])/10000,
Nx2 = (y+scling[sclr])/10000)
clr = c(viridis::inferno(5)[c(1,1:5)])
Gcol <- '#3bb33b'
Zcol = rgb(.94,.94,.94)
DG <- rgb(.4,.4,.4)
LGoffset <- 15
GLABoffset <- 8
rS <- 100000 # width of grey annotation band
zmRange <- data.frame(x=c(s1,s1+l1,s1+l1+rS,s1+rS,s1,
s2,s2+l2,s2+l2+rS,s2+rS,s2,
s3,s3+l3,s3+l3+rS,s3+rS,s3,
s4,s4+l4,s4+l4+rS,s4+rS,s4),
y=c(s1,s1+l1,s1+l1-rS,s1-rS,s1,
s2,s2+l2,s2+l2-rS,s2-rS,s2,
s3,s3+l3,s3+l3-rS,s3-rS,s3,
s4,s4+l4,s4+l4-rS,s4-rS,s4),
grp=rep(letters[1:4],each=5)) %>%
mutate(sclr=rep(1:4,each=5),
Nx1 = (x+scling[sclr])/10000-1,
Nx2 = (y+scling[sclr])/10000)
rS2 <- .75*rS
zmRange2 <- data.frame(x=c(s1,s1+l1,s1+l1+rS2,s1+rS2,s1,
s2,s2+l2,s2+l2+rS2,s2+rS2,s2,
s3,s3+l3,s3+l3+rS2,s3+rS2,s3,
s4,s4+l4,s4+l4+rS2,s4+rS2,s4),
y=c(s1,s1+l1,s1+l1-rS2,s1-rS2,s1,
s2,s2+l2,s2+l2-rS2,s2-rS2,s2,
s3,s3+l3,s3+l3-rS2,s3-rS2,s3,
s4,s4+l4,s4+l4-rS2,s4-rS2,s4),
grp=rep(letters[1:4],each=5)) %>%
mutate(sclr=rep(1:4,each=5),
Nx1 = (x+scling[sclr])/10000-1,
Nx2 = (y+scling[sclr])/10000)
rS3 <- .07*rS
zmRange3 <- data.frame(x=c(s1,s1+l1,s1+l1+rS3,s1+rS3,s1,
s2,s2+l2,s2+l2+rS3,s2+rS3,s2,
s3,s3+l3,s3+l3+rS3,s3+rS3,s3,
s4,s4+l4,s4+l4+rS3,s4+rS3,s4),
y=c(s1,s1+l1,s1+l1-rS3,s1-rS3,s1,
s2,s2+l2,s2+l2-rS3,s2-rS3,s2,
s3,s3+l3,s3+l3-rS3,s3-rS3,s3,
s4,s4+l4,s4+l4-rS3,s4-rS3,s4),
grp=rep(letters[1:4],each=5)) %>%
mutate(sclr=rep(1:4,each=5),
Nx1 = (x+scling[sclr])/10000-1,
Nx2 = (y+scling[sclr])/10000)
zmLab <- data.frame(x=c(s1+.5*l1,s2+.5*l2,s3+.5*l3,s4+.5*l4),
label=c('LG09 (A)','LG12 (B)','LG12 (C)','LG17 (D)')) %>%
mutate(sclr=1:4, Nx= (x+scling[sclr])/10000)
zmEND <- data.frame(x=c(s1,s1+l1,
s2,s2+l2,
s3,s3+l3,
s4,s4+l4)) %>%
mutate(sclr=rep(1:4,each=2),
Nx = ((x+scling[sclr])/10000)+rep(c(2.5,-4),4),
label=round((x/1000000),2))
textSCALE1 <- c(1.8,rep(NA,6),.7)
textSCALE2 <- c(2,rep(NA,6),1)
textSCALE3 <- c(3.5,rep(NA,6),1.75)
p1 <- ggplot(dt %>% filter(!is.na(Mval)),aes(fill=Mval))+
coord_equal()+
geom_polygon(inherit.aes = F,data=zmRange,
aes(x=Nx1+1,y=Nx2-1,group=grp),fill='lightgray')+
geom_polygon(inherit.aes = F,data=zmRange2,
aes(x=Nx1+11,y=Nx2-11,group=grp),fill=DG)+
geom_polygon(inherit.aes = F,data=zmRange3,
aes(x=Nx1+1.1,y=Nx2-1.1,group=grp),fill=DG)+
geom_polygon(inherit.aes = F,data=GZdf,
aes(x=Nx1,y=Nx2,group=grp),fill=Zcol)+
geom_segment(inherit.aes = F,data=genes,
aes(x=Nx1+1,y=Nx1-1,xend=Nx2+1,yend=Nx2-1),
col=Gcol,size=1.5)+
geom_tile(aes(x=miX,y=miY))+
geom_text(inherit.aes = F,data=zmEND,
aes(x=Nx+LGoffset,y=Nx-LGoffset,label=paste(label,'\n(Mb)')),
angle=45,size=textSCALE1[sel])+
geom_text(inherit.aes = F,data=genes,
aes(x=labPOS+GLABoffset,y=labPOS-GLABoffset,label=name),
angle=-45,fontface='italic',size=textSCALE2[sel])+
geom_text(inherit.aes = F,data=zmLab,
aes(x=Nx+LGoffset-.8,y=Nx-LGoffset+.8,label=label),
angle=-45,fontface='bold',size=textSCALE3[sel],col='white')+
scale_x_continuous(expand=c(0,0))+
scale_y_continuous(expand=c(0,0),
trans = 'reverse')+
scico::scale_fill_scico(name=expression(bar(italic(r)^2)),palette = 'lapaz',direction = -1)+
theme_void()+
theme(legend.position = c(.9,.75),
legend.direction = 'vertical')
return(p1)
}
\ No newline at end of file
BW <- read.csv('../../2_output/08_popGen/07_LD/subsets/glob_between.interchrom.hap.ld',sep='\t') %>%
select(R.2) %>%
mutate(type='Global',run='Global')
for(j in 1:6){
flS <- c("boc","bel","hon","pue","nig","uni")
flL <- c("Panana","Belize","Honduras","H. puella","H. nigricans","H. unicolor")
flT <- c(rep('Geo',3),rep('Spec',3))
k <- flS[j]
q <- flL[j]
u <- flT[j]
print(j)
BW <- BW %>%
rbind(.,(read.csv(paste('../../2_output/08_popGen/07_LD/subsets/glob_between.',k,'.interchrom.hap.ld',sep=''),
sep='\t') %>%
select(R.2) %>%
mutate(type=u,run=q)))
}
BW$run <- factor(BW$run,levels=c('Global',"Panana","Belize","Honduras",
"H. nigricans","H. puella","H. unicolor"))
dt2 <- BW %>%
group_by(run) %>%
summarise(meanR2=mean(R.2,na.rm = T),medR2=median(R.2,na.rm = T)) %>%
gather(key = 'type',value = 'val',2:3)
BC <-rgb(.7,.7,.7)
clr <-colorRampPalette(colors = c('white',BC,'black'))(8)
pBOX <- ggplot(BW,aes(x=run,y=R.2))+
geom_boxplot(fill=BC,width=.7,outlier.size = .1)+
#coord_fixed(ylim=c(0,.031),ratio = 250)+
geom_point(inherit.aes = F, data=dt2,aes(x=run,y=val,fill=type),shape=23,size=1)+
ggtitle(expression(Genome~wide~ILD~(italic(r)^2)))+
scale_x_discrete(labels = expression(Global,Panama,Belize,Honduas,
italic("H. nigricans"),italic("H. puella"),italic("H. unicolor")))+
scale_y_continuous()+
scale_fill_manual('',values = clr[c(6,1)],labels=c('mean','median'))+
guides(shape = guide_legend(ncol = 1))+
theme(legend.position = c(-.4,1.27),
text = element_text(size=10),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=7),
axis.text.x = element_text(size=7,angle=45,hjust = 1),
panel.background = element_blank(),
plot.background = element_blank())
\ No newline at end of file
dataBoxGenes <- data.frame(R.2=c(),run=c(),stringsAsFactors = F)
for (file in dir("../../2_output/08_popGen/07_LD/",pattern = "interchrom.hap.ld.gz")) {
nm <- str_remove_all(file, "spotlight.") %>% str_remove_all(.,".interchrom.hap.ld.gz")
if(nm %in% c("global","uni","pue","nig","hon","bel","boc")){
dataBoxGenes <- read.csv(gzfile(paste('../../2_output/08_popGen/07_LD/',file,sep='')),sep='\t') %>%
mutate(run = nm) %>%
select(R.2,run) %>%
rbind(dataBoxGenes,.)
}
}
dataBoxGenes$xS <- factor(dataBoxGenes$run,
levels = c("global","boc","bel","hon","nig","pue","uni"))
BoxGenes_summary <- dataBoxGenes %>%
group_by(xS) %>%
summarise(run = run[1],
meanR2 = mean(R.2,na.rm = T),
medR2 = median(R.2,na.rm = T),
sdR2 = sd(R.2,na.rm=T),
seR2 = se(R.2),
nanr = sum(is.na(R.2)),
minR2 = min(R.2,na.rm = T),
maxR2 = max(R.2,na.rm = T),
lengthR2 = length(R.2)) %>%
select(xS,meanR2,medR2) %>%
gather(key = 'type',value = 'val',2:3)
boxGenes <- ggplot(dataBoxGenes,aes(x=xS))+
geom_boxplot(aes(y=R.2),fill=BC,width=.7,outlier.size = .1) +
geom_point(data=BoxGenes_summary,aes(y=val,fill=type),shape=23,size=1)+
#coord_fixed(ylim=c(0,.031),ratio = 250)+
ggtitle(expression(Candidate~gene~ILD~(italic(r)^2)))+
scale_y_continuous(expression(Candidate~gene~ILD~(italic(r)^2)))+
scale_x_discrete(labels = expression(Global,Panama,Belize,Honduas,
italic("H. nigricans"),italic("H. puella"),italic("H. unicolor")))+
scale_fill_manual('',values = clr[c(6,1)],labels=c('mean','median'))+
guides(shape = guide_legend(ncol = 1))+
theme(legend.position = c(.35,1.27),
text = element_text(size=10),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(size=7),
axis.text.x = element_text(size=7,angle=45,hjust = 1),
panel.background = element_blank(),
plot.background = element_blank())
\ No newline at end of file
#!/usr/bin/env nextflow
/* nextflow run msms.nf -with-dag msms.png -c nextflow.config -resume */
/* nextflow run 2.4.1.msms.nf -with-dag msms.png -c nextflow.config -resume */
extend = Channel.from( 500000 )
recom = Channel.from( 0.02 )
......
......@@ -218,7 +218,7 @@ p1 <- ggplot()+
```
<center>
```{r basePlotSHOW, echo=FALSE}
```{r basePlotSHOW, echo=FALSE, message=FALSE, warning=FALSE}
p1
```
</center>
......
......@@ -68,11 +68,7 @@ p4 <- create_K_plot(searchLG = "LG17",gfffile = '../../1-output/09_gff_from_IKMB
muskID = 'D')
```
<<<<<<< HEAD
An individual result will look like this (`plts[[1]]` & `plts[[2]]`):
=======
An individual result will look like this (p1):
>>>>>>> reformat
<center>
```{r basePlotSHOW, echo=FALSE}
......
......@@ -105,7 +105,7 @@ honGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/hond
panGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/panama-cairo.svg"))))
globGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/caribbean-cairo.svg"))))
leg <- get_legend(plts[[1]]+theme(legend.direction = 'horizontal')+ guides(fill = guide_legend(nrow=1)))
leg <- get_legend(plts[[1]]+theme(legend.direction = 'horizontal',legend.key.width = unit(60,'pt')))
sY <- .035;
sX <- -.03;
```
......@@ -367,10 +367,6 @@ Then, the base plot is created and stored in `p1`.
scale_x_continuous(expand=c(0,0))+
scale_y_continuous(expand=c(0,0),
trans = 'reverse')+
# manual color gradient for LD data
scale_fill_gradientn(name=expression(bar(italic(r)^2)),colours=clr,
values=rescale(c(1,.08,.03,.015,.01,0)),
limits=c(0,1),guide = 'legend',breaks=c(0,.005,.01,.02,.03,.1,1))+
# plot layout theme
theme_void()+
theme(legend.position = c(.7,.75),legend.direction = 'vertical')
......@@ -394,10 +390,6 @@ Then, the base plot is created and stored in `p1`.
scale_x_continuous(expand=c(0,0))+
scale_y_continuous(expand=c(0,0),
trans = 'reverse')+
# manual color gradient for LD data
scale_fill_gradientn(name=expression(bar(r^2)),colours=clr,
values=rescale(c(1,.08,.03,.015,.01,0)),
limits=c(0,1),guide = 'legend',breaks=c(0,.005,.01,.02,.03,.1,1))+
# plotting layout theme
theme_void()+
theme(legend.position = c(.7,.75),legend.direction = 'vertical')
......@@ -405,13 +397,39 @@ Then, the base plot is created and stored in `p1`.
}
```
Finally, the function returns the current base plot stored in `p1`.
Finally, the genreal color paletee is set and the function returns the current base plot stored in `p1`.
```{r eval=FALSE}
return(p1)
cs <- scale_fill_viridis_c(name=expression(bar(r^2)),
limits = c(0,0.55),values = c(0,.08,1),
option = 'B',direction = -1)
return(p1+cs)
}
```
The script `F4.functions.R` contains a second helper function which is used for the box plots.
This function retuns a set of summary statistics for the r^2 distributions:
```{r eval = FALSE}
run_one_sample_test <- function(pop,n,data){
df <- data %>%
filter(run == pop)
x <- df$R.2 %>% na.omit() %>% as.vector()
exp_mu <- 1/(2*n)
test_result <- wilcox.test(x,mu = exp_mu)
return(tibble(population = pop,
mean = x %>% mean,
sd = x %>% sd,
se = x %>% se,
var = x %>% var,
n = length(x),
exp_mu = exp_mu) %>% bind_cols(
test_result %>% broom::tidy()))
}
```
---
## Details of `F4.genomeWide_box.R`
......@@ -465,6 +483,21 @@ BC <-rgb(.7,.7,.7)
clr <-colorRampPalette(colors = c('white',BC,'black'))(8)
```
We run the summary statistics on the data (basically only to generate the expected mean r^2).
```{r eval=FALSE}
# tests
locations <- c('Global',"Panana","Belize","Honduras","H. puella","H. nigricans","H. unicolor")
sample_sizes <- c(110,39,36,35,37,37,36)
test_gw <- purrr::map2(locations,sample_sizes,run_one_sample_test,data=BW) %>% bind_rows()
test_gw$xS <- factor(test_gw$population,
levels = c('Global',"Panana","Belize","Honduras",
"H. nigricans","H. puella","H. unicolor")) %>%
as.numeric()
```
And finally , the data is plotted.
```{r eval=FALSE}
......@@ -474,6 +507,10 @@ pBOX <- ggplot(BW,aes(x=run,y=R.2))+
geom_boxplot(fill=BC,width=.7,outlier.size = .1)+
# set a fixed aspect ratio
coord_fixed(ylim=c(0,.031),ratio = 250)+
# adding expected mean value
geom_segment(inherit.aes = FALSE,data=test_gw,
aes(x=xS-.38,xend=xS+.38,y=exp_mu,yend=exp_mu),
col='red',size=.5)+
# adding mean and median values
geom_point(inherit.aes = F, data=dt2,aes(x=run,y=val,fill=type),shape=23,size=1)+
# settting the axis and color labels
......@@ -542,6 +579,20 @@ BoxGenes_summary <- dataBoxGenes %>%
gather(key = 'type',value = 'val',2:3)
```
We run the summary statistics on the data (basically only to generate the expected mean r^2).
```{r eval=FALSE}
# tests
locations <- c("global","boc","bel","hon","nig","pue","uni")
sample_sizes <- c(110,39,36,35,37,37,36)
test_area <- purrr::map2(locations,sample_sizes,run_one_sample_test,data=dataBoxGenes) %>%
bind_rows()
test_area$xS <- factor(test_area$population,
levels = c("global","boc","bel","hon","nig","pue","uni")) %>%
as.numeric()
```
And finally , the data is plotted.
```{r eval=FALSE}
......@@ -553,6 +604,10 @@ boxGenes <- ggplot(dataBoxGenes,aes(x=xS))+
geom_point(data=BoxGenes_summary,aes(y=val,fill=type),shape=23,size=1)+
# set a fixed aspect ratio
coord_fixed(ylim=c(0,.031),ratio = 133)+
# adding expected mean value
geom_segment(inherit.aes = FALSE,data=test_gw,
aes(x=xS-.38,xend=xS+.38,y=exp_mu,yend=exp_mu),
col='red',size=.5)+
# settting the axis and color labels
scale_y_continuous(expression(candidate~gene~ILD~(italic(r)^2)))+
scale_x_discrete(labels = expression(Global,Panama,Belize,Honduas,
......
#!/usr/bin/env Rscript
library(sp)
library(maptools)
library(scales)
library(PBSmapping)
library(RColorBrewer)
library(tidyverse)
library(ggforce)
library(cowplot)
library(scatterpie)
library(hrbrthemes)
library(cowplot)
library(grid)
library(gridSVG)
library(grImport2)
library(grConvert)
library(ggmap)
library(marmap)
source("../../0_data/0_scripts/F1.functions.R")
dataAll <- read.csv('../../0_data/0_resources/F1.sample.txt',sep='\t') %>% mutate(loc=substrRight(as.character(id),3))
data <- dataAll %>% filter(sample=="sample")
dataSum <- data%>% rowwise()%>% mutate(nn = sum(as.numeric(strsplit(as.character(`Latittude.N`),' ')[[1]])*c(1,1/60,1/3600)),
ww = sum(as.numeric(strsplit(as.character(`Longitude.W`),' ')[[1]])*c(1,1/60,1/3600))) %>%
group_by(loc) %>% summarise(n=mean(nn,na.rm = T),w=-mean(ww,na.rm = T)) %>%
as.data.frame() %>% cbind(.,read.csv('../../0_data/0_resources/F1.pointer.csv'))
crvs <- read.csv('../../0_data/0_resources/F1.curve.csv')
xlimW = c(-100,-53); ylimW = c(7,30.8)
worldmap = map_data("world")
names(worldmap) <- c("X","Y","PID","POS","region","subregion")
worldmap = clipPolys(worldmap, xlim=xlimW,ylim=ylimW, keepExtra=TRUE)
compGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/north-cairo.svg"))))
legGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/legend-map-cairo.svg"))))
nigGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/nigricans-cairo.svg"))))
pueGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/puella-cairo.svg"))))
uniGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/unicolor-cairo.svg"))))
belGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/belize-cairo.svg"))))
honGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/honduras-cairo.svg"))))
panGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/panama-cairo.svg"))))
globGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/caribbean-cairo.svg"))))
legPGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/legend_pairs-cairo.svg"))))
ypos = c(2,1,0)*1.6;
xpos=c(1,0,1)*4.57
sampSize <- data %>% group_by(spec,loc) %>% summarise(n=length(id)) %>% filter(spec!='gum') %>% arrange(loc) %>%
cbind(y=c(rep(24.4,3)+ypos, # belize
rep(13.6,3)+ypos, # panama
rep(9.2,3)+ypos), # honduras
x=c(rep(-94.7,3)+xpos, # belize
rep(-78,3)+xpos, # panama
rep(-98.75,3)+xpos)) # honduras
uS <- readShapeSpatial('../../0_data/0_resources/F1_shps/F1.npu_distribution.shp');
uS_f <- fortify(uS,region="DN");
names(uS_f) <- c("X","Y","POS","hole","piece","id","PID"); uS_f$PID <- as.numeric(uS_f$PID)
uS_f <- clipPolys(uS_f, xlim=xlimW,ylim=ylimW, keepExtra=TRUE)
hS <- readShapeSpatial('../../0_data/0_resources/F1_shps/F1.hypoplectrus_distribution.shp');
hS_f <- fortify(hS,region="DN");
names(hS_f) <- c("X","Y","POS","hole","piece","id","PID"); hS_f$PID <- as.numeric(hS_f$PID)
hS_f <- clipPolys(hS_f, xlim=xlimW,ylim=ylimW, keepExtra=TRUE)
clrShps <- c("#ffc097","#b1d4f8")
cFILL <- rgb(.7,.7,.7)
ccc<-rgb(0,.4,.8)
p1 <- ggplot()+coord_map(projection = 'mercator',xlim=xlimW,ylim=ylimW)+
labs(y="Latitude",x="Longitude") +
theme_mapK +
geom_polygon(data=worldmap,aes(X,Y,group=PID),fill = cFILL,
col=rgb(0,0,0),lwd=.2)+
geom_polygon(data=hS_f,aes(X,Y,group=PID),fill=clrShps[2],col=rgb(.3,.3,.3),lwd=.2)+
geom_polygon(data=uS_f,aes(X,Y,group=PID),fill=clrShps[1],col=clrShps[1],lwd=.2)+
geom_point(data=sampSize,aes(x=x,y=y),col='white',size=7)+
geom_text(data=sampSize,aes(x=x,y=y,label=n),size=3)+
geom_bezier(data = crvs,aes(x= x, y = y, group = loc),col=ccc,lwd=.4) +
geom_point(data = crvs[c(3,6,9),],aes(x= x, y = y, group = loc),size=5,col=ccc,shape=21,fill='white')+
geom_text(data = crvs[c(3,6,9),],aes(x= x, y = y, label=c('B','P','H')),size=3)+
geom_point(data=dataSum,aes(x=w,y=n),size=1.5,shape=21,fill='white')
fstdata <- read.csv('../../2_output/08_popGen/05_fst/genome_wide_weighted_mean_fst.txt',
sep='\t',skip = 1,head=F,col.names = c('pair','fst')) %>%
filter(!pair %in% c('NU','PN','PU')) %>%
arrange(fst) %>% mutate(x=row_number()) %>% rowwise() %>%
mutate(loc = substr(as.character(pair),2,2),
pop1 = substr(as.character(pair),1,1),
pop2 = substr(as.character(pair),3,3),
pairclass=paste(pop1,pop2,sep='-'))
clrP <- c('#fb8620','#1b519c','#d93327')
p5 <- ggplot(fstdata,aes(x=x,y=fst,fill=pairclass))+
geom_bar(stat='identity')+
annotation_custom(legPGrob,xmin =0.15,xmax =5,ymin =.008,ymax = .035)+
scale_fill_manual(values = clrP)+
scale_y_continuous(name=expression(italic("F"['ST'])),
expand = c(0,0))+
scale_x_continuous(expand = c(.01,0),breaks = 1:9,
labels = c('H','H','B','H','P','B','B','P','P'))+
theme_mapK+
theme(legend.position = 'none',
panel.grid.major.y = element_line(color=rgb(.9,.9,.9)),
axis.title.x = element_blank(),
axis.text.x = element_text(#face = 'bold',
size = 8));
clr<-c('#000000','#d45500','#000000')
fll<-c('#000000','#d45500','#ffffff')
eVclr <- c(rep('darkred',2),rep(cFILL,8))
belPCA <- readRDS('../../2_output/08_popGen/04_pca/bel/belpca.Rds')
honPCA <- readRDS('../../2_output/08_popGen/04_pca/hon/honpca.Rds')
panPCA <- readRDS('../../2_output/08_popGen/04_pca/pan/panpca.Rds')
dataHON <- cbind(dataAll %>% filter(sample=='sample',loc=='hon')%>% select(id,spec),
honPCA$scores); names(dataHON)[3:12]<- paste('PC',1:10,sep='')
exp_varHON <- (honPCA$singular.values[1:10])^2/length(honPCA$maf)
xlabHON <- paste('PC1 (',sprintf("%.1f",exp_varHON[1]*100),'%)')# explained varinace)')
ylabHON <- paste('PC2 (',sprintf("%.1f",exp_varHON[2]*100),'%)')# explained varinace)')
p2 <- ggplot(dataHON,aes(x=PC1,y=PC2,col=spec,fill=spec))+geom_point(size=2,shape=21)+
scale_color_manual(values=clr,guide=F)+
ggtitle('Honduras')+
scale_fill_manual(values=fll,guide=F)+
scale_x_continuous(name=xlabHON)+
scale_y_continuous(name=ylabHON)