Commit 06738b06 authored by Kosmas Hench's avatar Kosmas Hench
Browse files

relabel -> new format

parent 79ce4b8c
Loading
Loading
Loading
Loading

0_data/0_scripts/E1.functions.R

deleted100644 → 0
+0 −30
Original line number Diff line number Diff line
get_anno_df_single_line <- function(searchLG,gfffile,
                                    xrange,
                                    genes_of_interest,
                                    genes_of_sec_interest,
                                    anno_rown=3){
  gff_filter <- list(seqid=searchLG)
  data <- as.data.frame(readGFF(gfffile,filter=gff_filter)) %>% mutate(Parent=as.character(Parent))#%>%
  #rowwise() %>% mutate(Parent=ifelse(length(Parent)==0,ID,Parent))
  
  mRNAs <- data %>% filter(type=='mRNA',end>xrange[1],start<xrange[2]) %>% 
    ungroup() %>%  mutate(yl=row_number()%%anno_rown+2) %>% rowwise()%>%
    mutate(checkStart =ifelse(start<xrange[1],-Inf,start),
           checkEnd =ifelse(end>xrange[2],Inf,end),
           ps=ifelse(strand=='-',checkEnd,checkStart),
           pe=ifelse(strand=='-',checkStart,checkEnd),
           labelx=mean(c(sort(c(xrange[1],ps))[2],
                         sort(c(xrange[2],pe))[1])),
           window='bold(Gene)',
           clr=ifelse(Parentgenename %in% genes_of_interest,"y",
                      ifelse(Parentgenename %in% genes_of_sec_interest,"z","x"))) %>% 
    select(-Parent);
  names(mRNAs)[names(mRNAs)=='ID'] <- 'Parent'
  
  exons <- data %>% filter(type=='exon',end>xrange[1],start<xrange[2]) %>% 
    merge(.,mRNAs %>% select(Parent,yl,clr),by='Parent',all.x=T) %>% 
    mutate(ps=ifelse(strand=='-',end,start),
           pe=ifelse(strand=='-',start,end),
           window='bold(Gene)')
  return(list(mRNAs,exons))}
+28 −160
Original line number Diff line number Diff line
se <-  function(x){
  x2 <- x[!is.na(x)]
  return(sd(x2)/sqrt(length(x2)))}
get_anno_df_single_line <- function(searchLG,gfffile,
                                    xrange,
                                    genes_of_interest,
                                    genes_of_sec_interest,
                                    anno_rown=3){
  gff_filter <- list(seqid=searchLG)
  data <- as.data.frame(readGFF(gfffile,filter=gff_filter)) %>% mutate(Parent=as.character(Parent))#%>%
  #rowwise() %>% mutate(Parent=ifelse(length(Parent)==0,ID,Parent))
  
  mRNAs <- data %>% filter(type=='mRNA',end>xrange[1],start<xrange[2]) %>% 
    ungroup() %>%  mutate(yl=row_number()%%anno_rown+2) %>% rowwise()%>%
    mutate(checkStart =ifelse(start<xrange[1],-Inf,start),
           checkEnd =ifelse(end>xrange[2],Inf,end),
           ps=ifelse(strand=='-',checkEnd,checkStart),
           pe=ifelse(strand=='-',checkStart,checkEnd),
           labelx=mean(c(sort(c(xrange[1],ps))[2],
                         sort(c(xrange[2],pe))[1])),
           window='bold(Gene)',
           clr=ifelse(Parentgenename %in% genes_of_interest,"y",
                      ifelse(Parentgenename %in% genes_of_sec_interest,"z","x"))) %>% 
    select(-Parent);
  names(mRNAs)[names(mRNAs)=='ID'] <- 'Parent'
  
  exons <- data %>% filter(type=='exon',end>xrange[1],start<xrange[2]) %>% 
    merge(.,mRNAs %>% select(Parent,yl,clr),by='Parent',all.x=T) %>% 
    mutate(ps=ifelse(strand=='-',end,start),
           pe=ifelse(strand=='-',start,end),
           window='bold(Gene)')
  return(list(mRNAs,exons))}
trplot <- function(sel){
  files <- c("global","boc","bel","hon","pue","nig","uni","nig-pue","nig-uni","pue-uni")
  data <- read.csv(paste('../../2_output/08_popGen/07_LD/',files[sel],'-10000-bins.txt',sep=''),sep='\t')
  message(files[sel])
  
  s1=17620000;s2=19910000;s3=21960000;s4=22320000;
  e2=20660000;e3=22460000;
  stp=20000;
  l1=500000;l2=750000;l3=500000;l4=500000
  scling <- c(-s1,-s2+(l1+stp),-s3+(l1+l2+2*stp),-s4+(l1+l2+l3+(stp*3)))
  
  dt <- data %>% mutate(miX = floor(Mx/10000),miY=floor(My/10000))
  genes <- data.frame(start=c(17871610,20186151,22225149,22553187,22556763,22561894,22573388),
                      end=c(17873915,20347811,22228342,22555052,22559742,22566321,22575503),
                      sclr=c(1,2,3,4,4,4,4),
                      LG=c("LG09","LG12-1","LG12-2","LG17","LG17","LG17","LG17"),
                      name=c("sox10",'casz1',"hoxc13a","sws2a\u03B1","sws2a\u03B2","sws2b","lws"))
  genes <- genes %>% mutate(Nx1 = (start+scling[sclr])/10000,Nx2 = (end+scling[sclr])/10000,
                            labPOS = (Nx1+Nx2)/2) 
  genes$labPOS[genes$name %in% c("sws2a\u03B1","sws2a\u03B2","sws2b","lws")] <- genes$labPOS[genes$name %in% c("sws2a\u03B1","sws2a\u03B2","sws2b","lws")]+c(-16,-1,12,20)
  
  GZrS <- 40000;GZrS2 <- 20000; #gene zoom offset
  BS <- c(-5,5,-4,3,-8,8,-20,-10,-6,5,7,16,17,22)*10000 # Backshifter for gene zoom
  GZdf <- data.frame(x=c(genes$start[1],genes$end[1],genes$end[1]+GZrS+BS[1],genes$start[1]+GZrS+BS[2],genes$start[1],
                         genes$start[2],genes$end[2],genes$end[2]+GZrS+BS[3],genes$start[2]+GZrS+BS[4],genes$start[2],
                         genes$start[3],genes$end[3],genes$end[3]+GZrS+BS[5],genes$start[3]+GZrS+BS[6],genes$start[3],
                         genes$start[4],genes$end[4],genes$end[4]+GZrS+BS[7],genes$start[4]+GZrS+BS[8],genes$start[4],
                         genes$start[5],genes$end[5],genes$end[5]+GZrS+BS[9],genes$start[5]+GZrS+BS[10],genes$start[5],
                         genes$start[6],genes$end[6],genes$end[6]+GZrS+BS[11],genes$start[6]+GZrS+BS[12],genes$start[6],
                         genes$start[7],genes$end[7],genes$end[7]+GZrS+BS[13],genes$start[7]+GZrS+BS[14],genes$start[7])+GZrS2,
                     y=c(genes$start[1],genes$end[1],genes$end[1]-GZrS+BS[1],genes$start[1]-GZrS+BS[2],genes$start[1],
                         genes$start[2],genes$end[2],genes$end[2]-GZrS+BS[3],genes$start[2]-GZrS+BS[4],genes$start[2],
                         genes$start[3],genes$end[3],genes$end[3]-GZrS+BS[5],genes$start[3]-GZrS+BS[6],genes$start[3],
                         genes$start[4],genes$end[4],genes$end[4]-GZrS+BS[7],genes$start[4]-GZrS+BS[8],genes$start[4],
                         genes$start[5],genes$end[5],genes$end[5]-GZrS+BS[9],genes$start[5]-GZrS+BS[10],genes$start[5],
                         genes$start[6],genes$end[6],genes$end[6]-GZrS+BS[11],genes$start[6]-GZrS+BS[12],genes$start[6],
                         genes$start[7],genes$end[7],genes$end[7]-GZrS+BS[13],genes$start[7]-GZrS+BS[14],genes$start[7])-GZrS2,
                     grp=rep(letters[1:7],each=5)) %>% 
    mutate(sclr=rep(c(1,2,3,4,4,4,4),each=5),
           Nx1 = (x+scling[sclr])/10000,
           Nx2 = (y+scling[sclr])/10000)
  
  clr = c(viridis::inferno(5)[c(1,1:5)])
  Gcol <- '#3bb33b'
  Zcol = rgb(.94,.94,.94)
  DG <- rgb(.4,.4,.4)
  LGoffset <- 15
  GLABoffset <- 8
  if(sel %in% c(1,8)){
    rS <- 100000 # width of grey annotation band
    zmRange <- data.frame(x=c(s1,s1+l1,s1+l1+rS,s1+rS,s1,
                              s2,s2+l2,s2+l2+rS,s2+rS,s2,
                              s3,s3+l3,s3+l3+rS,s3+rS,s3,
                              s4,s4+l4,s4+l4+rS,s4+rS,s4),
                          y=c(s1,s1+l1,s1+l1-rS,s1-rS,s1,
                              s2,s2+l2,s2+l2-rS,s2-rS,s2,
                              s3,s3+l3,s3+l3-rS,s3-rS,s3,
                              s4,s4+l4,s4+l4-rS,s4-rS,s4),
                          grp=rep(letters[1:4],each=5)) %>% 
      mutate(sclr=rep(1:4,each=5),
             Nx1 = (x+scling[sclr])/10000-1,
             Nx2 = (y+scling[sclr])/10000)
    rS2 <- .75*rS
    zmRange2 <- data.frame(x=c(s1,s1+l1,s1+l1+rS2,s1+rS2,s1,
                               s2,s2+l2,s2+l2+rS2,s2+rS2,s2,
                               s3,s3+l3,s3+l3+rS2,s3+rS2,s3,
                               s4,s4+l4,s4+l4+rS2,s4+rS2,s4),
                           y=c(s1,s1+l1,s1+l1-rS2,s1-rS2,s1,
                               s2,s2+l2,s2+l2-rS2,s2-rS2,s2,
                               s3,s3+l3,s3+l3-rS2,s3-rS2,s3,
                               s4,s4+l4,s4+l4-rS2,s4-rS2,s4),
                           grp=rep(letters[1:4],each=5)) %>% 
      mutate(sclr=rep(1:4,each=5),
             Nx1 = (x+scling[sclr])/10000-1,
             Nx2 = (y+scling[sclr])/10000)
    
    rS3 <- .07*rS
    zmRange3 <- data.frame(x=c(s1,s1+l1,s1+l1+rS3,s1+rS3,s1,
                               s2,s2+l2,s2+l2+rS3,s2+rS3,s2,
                               s3,s3+l3,s3+l3+rS3,s3+rS3,s3,
                               s4,s4+l4,s4+l4+rS3,s4+rS3,s4),
                           y=c(s1,s1+l1,s1+l1-rS3,s1-rS3,s1,
                               s2,s2+l2,s2+l2-rS3,s2-rS3,s2,
                               s3,s3+l3,s3+l3-rS3,s3-rS3,s3,
                               s4,s4+l4,s4+l4-rS3,s4-rS3,s4),
                           grp=rep(letters[1:4],each=5)) %>% 
      mutate(sclr=rep(1:4,each=5),
             Nx1 = (x+scling[sclr])/10000-1,
             Nx2 = (y+scling[sclr])/10000)
    
    zmLab <- data.frame(x=c(s1+.5*l1,s2+.5*l2,s3+.5*l3,s4+.5*l4),
                        label=c('LG09 (A)','LG12 (B)','LG12 (C)','LG17 (D)')) %>%  
      mutate(sclr=1:4, Nx= (x+scling[sclr])/10000)
    
    zmEND <- data.frame(x=c(s1,s1+l1,
                            s2,s2+l2,
                            s3,s3+l3,
                            s4,s4+l4)) %>% 
      mutate(sclr=rep(1:4,each=2),
             Nx = ((x+scling[sclr])/10000)+rep(c(2.5,-4),4),
             label=round((x/1000000),2))
    
    textSCALE1 <- c(1.8,rep(NA,6),.7)
    textSCALE2 <- c(2,rep(NA,6),1)
    textSCALE3 <- c(3.5,rep(NA,6),1.75)
    
    p1 <- ggplot(dt %>% filter(!is.na(Mval)),aes(fill=Mval))+
        coord_equal()+
        geom_polygon(inherit.aes = F,data=zmRange,
                     aes(x=Nx1+1,y=Nx2-1,group=grp),fill='lightgray')+
        geom_polygon(inherit.aes = F,data=zmRange2,
                     aes(x=Nx1+11,y=Nx2-11,group=grp),fill=DG)+
        geom_polygon(inherit.aes = F,data=zmRange3,
                     aes(x=Nx1+1.1,y=Nx2-1.1,group=grp),fill=DG)+
        geom_polygon(inherit.aes = F,data=GZdf,
                     aes(x=Nx1,y=Nx2,group=grp),fill=Zcol)+
        geom_segment(inherit.aes = F,data=genes,
                     aes(x=Nx1+1,y=Nx1-1,xend=Nx2+1,yend=Nx2-1),
                     col=Gcol,size=1.5)+
        geom_tile(aes(x=miX,y=miY))+
        geom_text(inherit.aes = F,data=zmEND,
                  aes(x=Nx+LGoffset,y=Nx-LGoffset,label=paste(label,'\n(Mb)')),
                  angle=45,size=textSCALE1[sel])+
        geom_text(inherit.aes = F,data=genes,
                  aes(x=labPOS+GLABoffset,y=labPOS-GLABoffset,label=name),
                  angle=-45,fontface='italic',size=textSCALE2[sel])+
        geom_text(inherit.aes = F,data=zmLab,
                  aes(x=Nx+LGoffset-.8,y=Nx-LGoffset+.8,label=label),
                  angle=-45,fontface='bold',size=textSCALE3[sel],col='white')+
        scale_x_continuous(expand=c(0,0))+
        scale_y_continuous(expand=c(0,0),
                           trans = 'reverse')+
        scale_fill_gradientn(name=expression(bar(italic(r)^2)),colours=clr,
                             values=rescale(c(1,.5,.08,.03,.015,.01,0)),
                             limits=c(0,1),
                             guide = 'legend',breaks=c(0,.005,.01,.02,.03,.1,1))+
        theme_void()+
        theme(legend.position = c(.9,.75),
              legend.direction = 'vertical')
  } else {
    p1 <- ggplot(dt %>% filter(!is.na(Mval)),aes(fill=Mval))+
      coord_equal()+
      geom_segment(inherit.aes = F,data=genes,
                   aes(x=Nx1+1.5,y=Nx1-1.5,xend=Nx2+1.5,yend=Nx2-1.5),
                   col=Gcol,size=1)+
      geom_tile(aes(x=miX,y=miY))+
      scale_x_continuous(expand=c(0,0))+
      scale_y_continuous(expand=c(0,0),
                         trans = 'reverse')+
      scale_fill_gradientn(name=expression(bar(r^2)),colours=clr,
                           values=rescale(c(1,.08,.03,.015,.01,0)),
                           limits=c(0,1),guide = 'legend',breaks=c(0,.005,.01,.02,.03,.1,1))+
      theme_void()+
      theme(legend.position = c(.7,.75),legend.direction = 'vertical')
    
  }
  return(p1)
}
 No newline at end of file
Loading