Commit f9726e5c authored by Kosmas Hench's avatar Kosmas Hench

adjust figures for reformating

parent 06738b06
......@@ -51,7 +51,7 @@ for(k in 1:7){
}
```
An individual result will look like this (plts[[1]] & plts[[2]]):
An individual result will look like this (`plts[[1]]` & `plts[[2]]`):
<center>
```{r basePlotSHOW, echo=FALSE}
......
......@@ -123,7 +123,7 @@ cp1 <- plot_grid(pbel+theme(legend.position = 'none'),p1,
phon+theme(legend.position = 'none'),p2,
pboc+theme(legend.position = 'none'),p3,
ncol = 2,rel_heights = c(1,1,1),
rel_widths = c(0.65,0.35)),
rel_widths = c(0.65,0.35),
labels=c('A','B',rep('',4)),
label_size = 10);
```
......
......@@ -2,7 +2,7 @@ book_filename: "Script repository Hench et al. 2018"
chapter_name: ""
repo: https://github.com/k-hench/bookdown
output_dir: ../docs
rmd_files: ["index.Rmd","Workflow.Rmd","F1.Rmd","F2.Rmd","F3.Rmd","E1.Rmd","S01.Rmd","S02.Rmd","S03.Rmd","S05.Rmd","S06.Rmd","S07.Rmd","S08.Rmd","S09.Rmd","S10.Rmd","S11.Rmd","S12.Rmd","S13.Rmd","S14.Rmd","S15.Rmd"]
rmd_files: ["index.Rmd","Workflow.Rmd","F1.Rmd","F2.Rmd","F3.Rmd","F4.Rmd","S01.Rmd","S02.Rmd","S03.Rmd","S05.Rmd","S06.Rmd","S07.Rmd","S08.Rmd","S09.Rmd","S10.Rmd","S11.Rmd","S12.Rmd","S13.Rmd","S14.Rmd","S15.Rmd"]
clean: [packages.bib, bookdown.bbl]
new_session: yes
delete_merged_file: True
This diff is collapsed.
---
output: html_document
editor_options:
editor_options:
chunk_output_type: console
---
# Figure 1
## Summary
......@@ -47,7 +47,7 @@ library(marmap)
source("../../0_data/0_scripts/F1.functions.R")
```
### Fig. 1 a)
### Fig. 1 A)
First, we load the data sheet containing the sampling location as well as the population specifics for each sample and sub sample it to include only the samples included in the re-sequencing study.
......@@ -61,7 +61,7 @@ dataAll <- read.csv('../../0_data/0_resources/F1.sample.txt',sep='\t') %>% mutat
data <- dataAll %>% filter(sample=="sample")
dataSum <- data%>% rowwise()%>% mutate(nn = sum(as.numeric(strsplit(as.character(`Latittude.N`),' ')[[1]])*c(1,1/60,1/3600)),
ww = sum(as.numeric(strsplit(as.character(`Longitude.W`),' ')[[1]])*c(1,1/60,1/3600))) %>%
group_by(loc) %>% summarise(n=mean(nn,na.rm = T),w=-mean(ww,na.rm = T)) %>%
group_by(loc) %>% summarise(n=mean(nn,na.rm = T),w=-mean(ww,na.rm = T)) %>%
as.data.frame() %>% cbind(.,read.csv('../../0_data/0_resources/F1.pointer.csv'))
```
......@@ -152,9 +152,9 @@ Now, we got everything needed for the base map (Fig. 1 a):
```r
p1 <- ggplot()+
# set coordinates
coord_map(projection = 'mercator',xlim=xlimW,ylim=ylimW)+
coord_map(projection = 'mercator',xlim=xlimW,ylim=ylimW)+
# axes labels
labs(y="Latitude",x="Longitude") +
labs(y="Latitude",x="Longitude") +
# preset theme
theme_mapK +
# landmass layer
......@@ -182,18 +182,18 @@ p1 <- ggplot()+
<img src="F1_files/figure-html/mapBaseSHOW-1.png" width="672" />
</center>
### Fig. 1 b)
### Fig. 1 B)
Next we create the sub figure containing the *F~ST~* information (Fig. 1 b).
Next we create the sub figure containing the *F~ST~* information (Fig. 1 B).
We first read in the data and define the color palette.
```r
fstdata <- read.csv('../../2_output/08_popGen/05_fst/genome_wide_weighted_mean_fst.txt',
sep='\t',skip = 1,head=F,col.names = c('pair','fst')) %>%
sep='\t',skip = 1,head=F,col.names = c('pair','fst')) %>%
filter(!pair %in% c('NU','PN','PU')) %>%
arrange(fst) %>%
mutate(x=row_number()) %>%
arrange(fst) %>%
mutate(x=row_number()) %>%
rowwise() %>%
mutate(loc = substr(as.character(pair),2,2),
pop1 = substr(as.character(pair),1,1),
......@@ -234,7 +234,7 @@ p5 <- ggplot(fstdata,aes(x=x,y=fst,fill=pairclass))+
<img src="F1_files/figure-html/fstBaseSHOW-1.png" width="672" />
</center>
### Fig. 1 c)
### Fig. 1 C)
The last sub figure contains the PCA output for each location.
For those, we define the color palette for hamlet species and read in the PCA data generated by `pcadapt`.
......@@ -262,7 +262,7 @@ Further, the explained variance of each PC is extracted for the axis labels.
dataHON <- cbind(dataAll %>% filter(sample=='sample',loc=='hon')%>% select(id,spec),
honPCA$scores); names(dataHON)[3:12]<- paste('PC',1:10,sep='')
exp_varHON <- (honPCA$singular.values[1:10])^2/length(honPCA$maf)
xlabHON <- paste('PC1 (',sprintf("%.1f",exp_varHON[1]*100),'%)')# explained varinace)')
xlabHON <- paste('PC1 (',sprintf("%.1f",exp_varHON[1]*100),'%)')# explained varinace)')
ylabHON <- paste('PC2 (',sprintf("%.1f",exp_varHON[2]*100),'%)')# explained varinace)')
# Honduras PCA (plot)
......@@ -362,11 +362,11 @@ F1 <- ggdraw()+
draw_grob(nigGrob, xf[1]+xS[1], yf[1]+yS[1], hSC[1], hSC[2])+ # Honduras
draw_grob(pueGrob, xf[2]+xS[1], yf[2]+yS[1], hSC[1], hSC[2])+ # Honduras
draw_grob(uniGrob, xf[3]+xS[1], yf[3]+yS[1], hSC[1], hSC[2])+ # Honduras
draw_grob(nigGrob, xf[1]+xS[2], yf[1]+yS[2], hSC[1], hSC[2])+ # Panama
draw_grob(pueGrob, xf[2]+xS[2], yf[2]+yS[2], hSC[1], hSC[2])+ # Panama
draw_grob(uniGrob, xf[3]+xS[2], yf[3]+yS[2], hSC[1], hSC[2])+ # Panama
draw_grob(nigGrob, xf[1]+xS[3], yf[1]+yS[3], hSC[1], hSC[2])+ # Belize
draw_grob(pueGrob, xf[2]+xS[3], yf[2]+yS[3], hSC[1], hSC[2])+ # Belize
draw_grob(uniGrob, xf[3]+xS[3], yf[3]+yS[3], hSC[1], hSC[2])+ # Belize
......@@ -374,14 +374,14 @@ F1 <- ggdraw()+
draw_grob(panGrob, 0.3, 0.485, 0.042, 0.042)+
draw_grob(belGrob, 0.2, 0.67, 0.042, 0.042)+
draw_grob(honGrob, 0.28, 0.61, 0.042, 0.042)+
draw_grob(honGrob, 0.285+.09, 0.37, 0.042, 0.042)+
draw_grob(belGrob, 0.535+.09, 0.37, 0.042, 0.042)+
draw_grob(panGrob, 0.785+.09, 0.37, 0.042, 0.042)+
# annotations (subfigure labels)
# annotations (subfigure labels)
draw_plot_label(x = c(.005,.005,.25),
y = c(.995,.42,.42),
label =letters[1:3])
label = LETTERS[1:3])
```
It is then exported using `ggsave()`.
......
---
output: html_document
editor_options:
editor_options:
chunk_output_type: console
---
# Figure 2
## Summary
......@@ -45,58 +45,58 @@ Further more, the extent of the individual LGs in loaded.
```r
karyo <- read.csv('../../0_data/0_resources/F2.karyo.txt',sep='\t') %>%
karyo <- read.csv('../../0_data/0_resources/F2.karyo.txt',sep='\t') %>%
mutate(GSTART=lag(cumsum(END),n = 1,default = 0),
GEND=GSTART+END,GROUP=rep(letters[1:2],12)) %>%
GEND=GSTART+END,GROUP=rep(letters[1:2],12)) %>%
select(CHROM,GSTART,GEND,GROUP)
# Global -------------
pn <- read.csv('../../2_output/08_popGen/05_fst/pue-nig.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
pn <- read.csv('../../2_output/08_popGen/05_fst/pue-nig.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PN',RUN='PN');
pu <- read.csv('../../2_output/08_popGen/05_fst/pue-uni.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
pu <- read.csv('../../2_output/08_popGen/05_fst/pue-uni.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PU',RUN='PU');
nu <- read.csv('../../2_output/08_popGen/05_fst/nig-uni.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
nu <- read.csv('../../2_output/08_popGen/05_fst/nig-uni.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='NU',RUN='NU');
# Panama -------------
ppnp <- read.csv('../../2_output/08_popGen/05_fst/pueboc-nigboc.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
ppnp <- read.csv('../../2_output/08_popGen/05_fst/pueboc-nigboc.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PN',RUN='PPNP');
ppup <- read.csv('../../2_output/08_popGen/05_fst/pueboc-uniboc.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
ppup <- read.csv('../../2_output/08_popGen/05_fst/pueboc-uniboc.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PU',RUN='PPUP');
npup <- read.csv('../../2_output/08_popGen/05_fst/nigboc-uniboc.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
npup <- read.csv('../../2_output/08_popGen/05_fst/nigboc-uniboc.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='NU',RUN='NPUP');
# Belize -------------
pbnb <- read.csv('../../2_output/08_popGen/05_fst/puebel-nigbel.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
pbnb <- read.csv('../../2_output/08_popGen/05_fst/puebel-nigbel.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PN',RUN='PBNB');
pbub <- read.csv('../../2_output/08_popGen/05_fst/puebel-unibel.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
pbub <- read.csv('../../2_output/08_popGen/05_fst/puebel-unibel.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PU',RUN='PBUB');
nbub <- read.csv('../../2_output/08_popGen/05_fst/nigbel-unibel.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
nbub <- read.csv('../../2_output/08_popGen/05_fst/nigbel-unibel.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='NU',RUN='NBUB');
# Honduras -------------
phnh <- read.csv('../../2_output/08_popGen/05_fst/puehon-nighon.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
phnh <- read.csv('../../2_output/08_popGen/05_fst/puehon-nighon.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PN',RUN='PHNH');
phuh <- read.csv('../../2_output/08_popGen/05_fst/puehon-unihon.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
phuh <- read.csv('../../2_output/08_popGen/05_fst/puehon-unihon.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PU',RUN='PHUH');
nhuh <- read.csv('../../2_output/08_popGen/05_fst/nighon-unihon.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
nhuh <- read.csv('../../2_output/08_popGen/05_fst/nighon-unihon.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='NU',RUN='NHUH');
```
......@@ -128,15 +128,15 @@ Then windows above this threshold are identified for each global run.
```r
threshs <- data %>% group_by(RUN) %>% summarise(thresh=quantile(WEIGHTED_FST,.9998))
data2 <- data %>%
filter(RUN %in% levels(data$RUN)[1:3]) %>%
data2 <- data %>%
filter(RUN %in% levels(data$RUN)[1:3]) %>%
merge(.,threshs,by='RUN',all.x=T) %>%
mutate(OUTL = (WEIGHTED_FST>thresh)) %>%
filter(OUTL) %>%
group_by(RUN) %>%
mutate(OUTL = (WEIGHTED_FST>thresh)) %>%
filter(OUTL) %>%
group_by(RUN) %>%
mutate(CHECK=cumsum(1-(BIN_START-lag(BIN_START,default = 0)==5000)),ID=paste(RUN,'-',CHECK,sep='')) %>%
ungroup() %>%
group_by(ID) %>%
ungroup() %>%
group_by(ID) %>%
summarise(CHROM=CHROM[1],
xmin = min(BIN_START+GSTART),
xmax=max(BIN_END+GSTART),
......@@ -144,7 +144,7 @@ data2 <- data %>%
LGmin=min(BIN_START),
LGmax=min(BIN_END),
LGmean=min(POS),
RUN=RUN[1],COL=COL[1]) %>%
RUN=RUN[1],COL=COL[1]) %>%
ungroup() %>% mutate(muskS = letters[as.numeric(as.factor(xmin))],
musk=LETTERS[c(1,3,4,4,1,2,2,3)])
......@@ -183,7 +183,7 @@ p1 <- ggplot()+
# Fst values
geom_point(data=data,aes(x=GPOS,y=WEIGHTED_FST,col=COL),size=.01)+
# labels for the outlier windows
geom_text(data=musks,aes(x=muskX,y=.65,label=musk))+
geom_text(data=musks,aes(x=muskX,y=.65,label=tolower(musk)))+
# vertical line separating sliding Fst from genome wide Fst
geom_vline(data = data.frame(x=567000000),aes(xintercept=x),col=annoclr,lwd=.2)+
# genome wide Fst
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
---
output: html_document
editor_options:
editor_options:
chunk_output_type: console
---
# Supplementary Figure 02
......@@ -88,14 +88,14 @@ phon <- plot_fun(data,'hon',2, gl)
pboc <- plot_fun(data,'boc',3, gl)
```
Then some preparation is done for the pca plots. The colors are predefined, the sample metadata and the pca results are loaded and the introgression candidates are specified.
Then some preparation is done for the pca plots. The colors are predefined, the sample metadata and the pca results are loaded and the introgression candidates are specified.
```r
clr<-c('#000000','#d45500','#000000')
fll<-c('#000000','#d45500','#ffffff')
dataAll <- read.csv('../../0_data/0_resources/F1.sample.txt',sep='\t') %>%
dataAll <- read.csv('../../0_data/0_resources/F1.sample.txt',sep='\t') %>%
mutate(loc=substrRight(as.character(id),3))
belPCA <- readRDS('../../2_output/08_popGen/04_pca/bel/belpca.Rds')
honPCA <- readRDS('../../2_output/08_popGen/04_pca/hon/honpca.Rds')
......@@ -126,7 +126,10 @@ The basic structure of the figure is created using the the `plot_grid()` functio
cp1 <- plot_grid(pbel+theme(legend.position = 'none'),p1,
phon+theme(legend.position = 'none'),p2,
pboc+theme(legend.position = 'none'),p3,
ncol = 2,rel_heights = c(1,1,1),rel_widths = c(0.65,0.35));
ncol = 2,rel_heights = c(1,1,1),
rel_widths = c(0.65,0.35),
labels=c('A','B',rep('',4)),
label_size = 10);
```
Then, external annotations loaded and variables needed for the plot composition are created.
......@@ -143,7 +146,8 @@ ysc <- .008
yd <- .3025
labX <- .675
bclr <- rgb(.9,.9,.9)
boxes = data.frame(x=rep(labX-.015,3),y=c(ys,ys+yd+ysc,ys+(yd+ysc)*2))
boxes = data.frame(x=rep(labX-.015,3),
y=c(ys,ys+yd+ysc,ys+(yd+ysc)*2))
```
Finally, the complete Supplementary Figure 02 is put together.
......@@ -151,20 +155,20 @@ Finally, the complete Supplementary Figure 02 is put together.
```r
S02 <- ggdraw()+
geom_rect(data = boxes, aes(xmin = x, xmax = x + .03, ymin = y, ymax = y + yd),
geom_rect(data = boxes, aes(xmin = x, xmax = x + .03, ymin = y+.07, ymax = y + yd+c(.07,.07,.05)),
colour = NA, fill = c(rep(bclr,3)))+
draw_label('Belize', x = labX, y = boxes$y[3]+.13,
draw_label('Belize', x = labX, y = boxes$y[3]+.20,
size = 13, angle=-90)+
draw_label('Honduras', x = labX, y = boxes$y[2]+.13,
draw_label('Honduras', x = labX, y = boxes$y[2]+.20,
size = 13, angle=-90)+
draw_label('Panama', x = labX, y = boxes$y[1]+.13,
draw_label('Panama', x = labX, y = boxes$y[1]+.20,
size = 13, angle=-90)+
draw_label('Posterior probability',x=.01,y=.5,size = 15, angle=90)+
draw_grob(legGrob,0,.93,1,.07)+
draw_plot(cp1,.0,0,1,.93)+
draw_grob(belGrob, labX-.0225, boxes$y[3]+.84*yd, .045, .045)+
draw_grob(honGrob, labX-.0225, boxes$y[2]+.84*yd, .045, .045)+
draw_grob(panGrob, labX-.0225, boxes$y[1]+.84*yd, .045, .045)
draw_label('Posterior probability',x=.01,y=.57,size = 15, angle=90)+
draw_grob(legGrob,0,0,1,.07)+
draw_plot(cp1,.0,.07,1,.93)+
draw_grob(belGrob, labX-.0225, boxes$y[3]+.84*yd+.05, .045, .045)+
draw_grob(honGrob, labX-.0225, boxes$y[2]+.84*yd+.07, .045, .045)+
draw_grob(panGrob, labX-.0225, boxes$y[1]+.84*yd+.07, .045, .045)
```
The final figure is then exported using `ggsave()`.
......@@ -275,7 +279,7 @@ getPofZ <- function(runname,pops){
}
```
The function `plot_fun()` plots the `NewHybrids` results for sets of three pairwise comparisons within one location.
The function `plot_fun()` plots the `NewHybrids` results for sets of three pairwise comparisons within one location.
```r
......@@ -343,17 +347,17 @@ plot_fun <- function(data,loc,sel,gl){
}
```
The function `plotPCA()` plots the pca results for one location and highlights the introgression candidates.
The function `plotPCA()` plots the pca results for one location and highlights the introgression candidates.
```r
plotPCA <- function(pca,dataAll,locIN,introgression_candidates,clr,fll){
data <- cbind(dataAll %>% filter(sample=='sample',loc==locIN) %>%
data <- cbind(dataAll %>% filter(sample=='sample',loc==locIN) %>%
select(id,spec),
pca$scores);
names(data)[3:12]<- paste('PC',1:10,sep='')
exp_var <- (pca$singular.values[1:10])^2/length(pca$maf)
xlab <- paste('PC1 (',sprintf("%.1f",exp_var[1]*100),'%)')# explained varinace)')
xlab <- paste('PC1 (',sprintf("%.1f",exp_var[1]*100),'%)')# explained varinace)')
ylab <- paste('PC2 (',sprintf("%.1f",exp_var[2]*100),'%)')# explained varinace)')
p <- ggplot(data,aes(x=PC1,y=PC2,col=spec,fill=spec))+
geom_point(size=1.1,shape=21)+
......
---
output: html_document
editor_options:
editor_options:
chunk_output_type: console
---
# Supplementary Figure 03
......@@ -44,7 +44,7 @@ p1 <- ggplot()+
```
</div>
Furthermore, the generation of the outlier annotation changed.
Furthermore, the generation of the outlier annotation changed.
While in the *F~ST~* script, the outlier where determined from the loaded data and later exported, within the G x P script the the highlights don't refer to G x P outliers, but the old *F~ST~* outlier are imported and plotted for reference.
......@@ -61,9 +61,9 @@ This replaced the original section that computes the outlier windows within `F2.
threshs <- data %>% group_by(RUN) %>% summarise(thresh=quantile(WEIGHTED_FST,.9998))
data2 <- data %>% filter(RUN %in% levels(data$RUN)[1:3]) %>% merge(.,threshs,by='RUN',all.x=T) %>%
mutate(OUTL = (WEIGHTED_FST>thresh)) %>% filter(OUTL) %>% group_by(RUN) %>%
mutate(OUTL = (WEIGHTED_FST>thresh)) %>% filter(OUTL) %>% group_by(RUN) %>%
mutate(CHECK=cumsum(1-(BIN_START-lag(BIN_START,default = 0)==5000)),ID=paste(RUN,'-',CHECK,sep='')) %>%
ungroup() %>% group_by(ID) %>%
ungroup() %>% group_by(ID) %>%
summarise(CHROM=CHROM[1],
xmin = min(BIN_START+GSTART),
xmax=max(BIN_END+GSTART),
......@@ -71,7 +71,7 @@ data2 <- data %>% filter(RUN %in% levels(data$RUN)[1:3]) %>% merge(.,threshs,by=
LGmin=min(BIN_START),
LGmax=min(BIN_END),
LGmean=min(POS),
RUN=RUN[1],COL=COL[1]) %>%
RUN=RUN[1],COL=COL[1]) %>%
ungroup() %>% mutate(muskS = letters[as.numeric(as.factor(xmin))],
musk=LETTERS[c(1,3,4,4,1,2,2,3)])
......
---
output: html_document
editor_options:
editor_options:
chunk_output_type: console
---
# Supplementary Figure 05
......@@ -41,7 +41,7 @@ library(grConvert)
clp <- function(x){y<-substr(x,2,nchar(x));y}
```
### Suppl. Fig. 05 a)
### Suppl. Fig. 05 A)
First, the base path of the kallisto results is set and one sub-path per sample is generated.
......@@ -101,7 +101,7 @@ Population information is retrieved from the metadata table and the standard dif
```r
samples <- pops%>%select(rID,pop);row.names(samples) <- samples$rID
samples <- pops%>%select(rID,pop);row.names(samples) <- samples$rID
dds <- DESeqDataSetFromTximport(txi.kallisto, samples, ~pop)
dds <- DESeq(dds)
```
......@@ -166,10 +166,10 @@ Then the transformed results are stored within a `data.frame` object,arranged ac
```r
testdat <- as.data.frame(assay(rld)[select,order(retDesign$population)])%>%
tibble::rownames_to_column(var = 'gene') %>%
tibble::rownames_to_column(var = 'gene') %>%
gather(.,key = 'sample',value = 'counts',2:25)
ordS <- retDesign %>% tibble::rownames_to_column(var = 'Sample') %>% arrange(population)
ordS <- retDesign %>% tibble::rownames_to_column(var = 'Sample') %>% arrange(population)
testdat$sample <- factor(as.character(testdat$sample),levels = ordS$Sample)
testdat$gene <- factor(as.character(testdat$gene),levels =rev(selGene[!duplicated(selGene)]))
......@@ -184,7 +184,7 @@ d2 <- data.frame(name = factor(rev(selGene[!duplicated(selGene)]),
d3 <- left_join(data.frame(name=selGene,comp=c(rep('np',3),rep('nu',3),
rep('pu',3),rep('vision',9))),d2,by='name') %>%
group_by(name) %>% mutate(n = 1/length(comp),s=(row_number()-1)/2*2*pi,e=s+n*2*pi) %>%
group_by(name) %>% mutate(n = 1/length(comp),s=(row_number()-1)/2*2*pi,e=s+n*2*pi) %>%
ungroup() %>% mutate(name = as.character(name),clr=c('#fb8620','#d93327','#1b519c','#333333')[as.numeric(comp)])
d3$name <- factor(d3$name,levels=rev(selGene[!duplicated(selGene)]))
......@@ -239,7 +239,7 @@ p2 <- ggplot(testdat,aes(x=sample,y=gene,fill=counts))+
<img src="S05_files/figure-html/p2SHOW-1.png" width="768" />
</center>
### Suppl. Fig. 05 b)
### Suppl. Fig. 05 B)
To judge the expression level and variance of the candidate genes, the transformed count data is additionally plotted as box plot and compared with the complete data set.
......@@ -248,8 +248,8 @@ This is data is then used for the overall distribution of the transformed counts
```r
Cdf3 <- as.data.frame(assay(rld)[,order(retDesign$population)]) %>%
tibble::rownames_to_column(var = 'gene') %>%
Cdf3 <- as.data.frame(assay(rld)[,order(retDesign$population)]) %>%
tibble::rownames_to_column(var = 'gene') %>%
gather(key = "Sample",value = "Count",R28385:RPL17_01)
selGene2 <- c("SWS2abeta","SWS2aalpha","SWS2b","LWS",
......@@ -261,8 +261,8 @@ A second `data.frame` is created for only the candidate genes.
```r
Cgenes2 <- as.data.frame(assay(rld)[,order(retDesign$population)]) %>%
tibble::rownames_to_column(var = 'gene') %>%
Cgenes2 <- as.data.frame(assay(rld)[,order(retDesign$population)]) %>%
tibble::rownames_to_column(var = 'gene') %>%
filter(gene %in% selGene2) %>%
gather(key = "Sample",value = "Count",R28385:RPL17_01)
```
......@@ -314,7 +314,7 @@ S05 <- ggdraw()+
draw_plot(p2,0,0,.6,1)+
draw_plot(p3,.61,.17,.39,.775)+
draw_grob(legGrob, 0.2, 0, 0.8, 0.17)+
draw_plot_label(x=c(0,.6),y=c(.99,.99),label = c('a','b'))
draw_plot_label(x=c(0,.6),y=c(.99,.99),label = c('A','B'))
```
It is then exported using `ggsave()`.
......
---
output: html_document
editor_options:
editor_options:
chunk_output_type: console
---
# Supplementary Figure 06
......@@ -47,21 +47,21 @@ Further more, the extent of the individual LGs in loaded.
```r
karyo <- read.csv('../../0_data/0_resources/F2.karyo.txt',sep='\t') %>%
karyo <- read.csv('../../0_data/0_resources/F2.karyo.txt',sep='\t') %>%
mutate(GSTART=lag(cumsum(END),n = 1,default = 0),
GEND=GSTART+END,GROUP=rep(letters[1:2],12)) %>%
GEND=GSTART+END,GROUP=rep(letters[1:2],12)) %>%
select(CHROM,GSTART,GEND,GROUP)
# global -------------
pn <- read.csv('../../2_output/08_popGen/05_fst/pue-nig.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
pn <- read.csv('../../2_output/08_popGen/05_fst/pue-nig.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PN',RUN='PN');
pu <- read.csv('../../2_output/08_popGen/05_fst/pue-uni.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
pu <- read.csv('../../2_output/08_popGen/05_fst/pue-uni.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PU',RUN='PU');
nu <- read.csv('../../2_output/08_popGen/05_fst/nig-uni.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
nu <- read.csv('../../2_output/08_popGen/05_fst/nig-uni.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='NU',RUN='NU');
```
......@@ -87,9 +87,9 @@ Then windows above this threshold are identified for each global run.
threshs <- data %>% group_by(RUN) %>% summarise(thresh=quantile(WEIGHTED_FST,.999))
data2 <- data %>% filter(RUN %in% levels(data$RUN)[1:3]) %>% merge(.,threshs,by='RUN',all.x=T) %>%
mutate(OUTL = (WEIGHTED_FST>thresh)) %>% filter(OUTL) %>% group_by(RUN) %>%
mutate(OUTL = (WEIGHTED_FST>thresh)) %>% filter(OUTL) %>% group_by(RUN) %>%
mutate(CHECK=cumsum(1-(BIN_START-lag(BIN_START,default = 0)==5000)),ID=paste(RUN,'-',CHECK,sep='')) %>%
ungroup() %>% group_by(ID) %>%
ungroup() %>% group_by(ID) %>%
summarise(CHROM=CHROM[1],
xmin = min(BIN_START+GSTART),
xmax=max(BIN_END+GSTART),
......@@ -97,7 +97,7 @@ data2 <- data %>% filter(RUN %in% levels(data$RUN)[1:3]) %>% merge(.,threshs,by=
LGmin=min(BIN_START),
LGmax=min(BIN_END),
LGmean=min(POS),
RUN=RUN[1],COL=COL[1]) %>%
RUN=RUN[1],COL=COL[1]) %>%
ungroup() %>% mutate(muskS = letters[as.numeric(as.factor(xmin))],
musk=c('E',NA,NA,NA,'F',"A",NA,"C",NA,"D","H", # NU
"E",NA,NA,NA,"G","A",NA,"B","C","D","D", # PN
......@@ -107,7 +107,7 @@ musks <- data2 %>% group_by(musk) %>% summarise(MUSKmin=min(xmin),
MUSKmax=max(xmax),
MUSKminLG=min(LGmin),
MUSKmaxLG=max(LGmax)) %>%
merge(data2[,c(1,9,10,12)],.,by='musk') %>% filter(!is.na(musk)) %>%
merge(data2[,c(1,9,10,12)],.,by='musk') %>% filter(!is.na(musk)) %>%
mutate(muskX = ifelse(musk %in% c("A","B","E","F"),MUSKmin-1e+07,MUSKmax+1e+07))
```
......@@ -134,7 +134,7 @@ p1 <- ggplot()+
# Fst values
geom_point(data=data,aes(x=GPOS,y=WEIGHTED_FST,col=COL),size=.01)+
# labels for the outlier windows
geom_text(data=(musks %>% filter(!is.na(musk))),aes(x=muskX,y=.65,label=musk),size=3)+
geom_text(data=(musks %>% filter(!is.na(musk))),aes(x=muskX,y=.65,label=tolower(musk)),size=3)+
# formatting the axes and color map
scale_y_continuous(name = yl,breaks=0:4*0.2,labels = c(0,'',0.4,'',0.8))+
scale_x_continuous(expand = c(0,0),breaks = (karyo$GSTART+karyo$GEND)/2,labels = 1:24,position = "top")+
......
---
output: html_document
editor_options:
editor_options:
chunk_output_type: console
---
# Supplementary Figure 07
......@@ -21,10 +21,10 @@ rm Rplots.pdf
```
## Details of `S07.R`
The `S07.R` script is basically a variant of the `E1.R` script.
The `S07.R` script is basically a variant of the `F3.R` script.
Is an executable R script that depends on a variety of image manipulation and data managing and genomic data packages.
It Furthermore depends on the R scripts `S07.plot_fun.R` which itselve depends on `S07.functions.R` and `E1.getFSTs.R` (all located under `$WORK/0_data/0_scripts`).
It Furthermore depends on the R scripts `S07.plot_fun.R` which itselve depends on `S07.functions.R` and `F3.getFSTs.R` (all located under `$WORK/0_data/0_scripts`).