Navigation

 ·   Wiki Home
 ·   Data Processing
 ·   Hemileia vastatrix
 ·   Hypothenemus hampei
 ·   Coffea
 ·   Beauveria bassiana
 ·  
 ·   Title List
 ·   Uncategorized Pages
 ·   Random Page
 ·   Recent Changes
 ·   Wiki Help
 ·   What Links Here

Active Members:

Search:

 

Create or Find Page:

 

View CEGMA Analysis of Coffea arabica Unigene RNA-seq predicted proteins

cegma -g ../CarabicaRNAseqUnigenes.fasta

#      Statistics of the completeness of the genome based on 248 CEGs      #

              #Prots  %Completeness  -  #Total  Average  %Ortho 

  Complete      119       47.98      -   267     2.24     67.23

   Group 1       22       33.33      -    56     2.55     86.36
   Group 2       20       35.71      -    44     2.20     65.00
   Group 3       33       54.10      -    77     2.33     69.70
   Group 4       44       67.69      -    90     2.05     56.82

   Partial      178       71.77      -   443     2.49     70.22

   Group 1       35       53.03      -   105     3.00     82.86
   Group 2       39       69.64      -    99     2.54     71.79
   Group 3       47       77.05      -   116     2.47     74.47
   Group 4       57       87.69      -   123     2.16     57.89

#    These results are based on the set of genes selected by Genis Parra   #

#    Key:                                                                  #
#    Prots = number of 248 ultra-conserved CEGs present in genome          #
#    %Completeness = percentage of 248 ultra-conserved CEGs present        #
#    Total = total number of CEGs present including putative orthologs     #
#    Average = average number of orthologs per CEG                         #
#    %Ortho = percentage of detected CEGS that have more than 1 ortholog   #

root@ticuna:/data/process/Cafe/Transcriptomics/ESTsRNAseqAssembly/cegmaCarabicaESTsRNAseqCAP3Assembly# cegma -g ../CarabicaESTsRNAseqCAP3Assembly.fasta

#      Statistics of the completeness of the genome based on 248 CEGs      #

              #Prots  %Completeness  -  #Total  Average  %Ortho 

  Complete      196       79.03      -   516     2.63     76.53

   Group 1       46       69.70      -   127     2.76     78.26
   Group 2       44       78.57      -   110     2.50     75.00
   Group 3       48       78.69      -   130     2.71     72.92
   Group 4       58       89.23      -   149     2.57     79.31

   Partial      229       92.34      -   680     2.97     81.22

   Group 1       54       81.82      -   165     3.06     79.63
   Group 2       54       96.43      -   157     2.91     77.78
   Group 3       56       91.80      -   171     3.05     82.14
   Group 4       65      100.00      -   187     2.88     84.62

#    These results are based on the set of genes selected by Genis Parra   #

#    Key:                                                                  #
#    Prots = number of 248 ultra-conserved CEGs present in genome          #
#    %Completeness = percentage of 248 ultra-conserved CEGs present        #
#    Total = total number of CEGs present including putative orthologs     #
#    Average = average number of orthologs per CEG                         #
#    %Ortho = percentage of detected CEGS that have more than 1 ortholog   #

Extract Proteins not annotated with cegma

grep '>' output.cegma.fa | awk '{print $1}' | sed 's/\./ /' | sed 's/>//' | awk '{print $1}' > kogAnotados.ids

grep '>' kogs.fa | sed 's/_/"\t"/' | awk '{print $2}' | sed 's/"__//' | sort | uniq > kogsCegma.ids

diff -y kogsAnotados.ids kogsCegma.ids | grep '>' | sed 's/>//' | awk '{print $1}' > kogsNotInOutputCegma.ids

cdbfasta kogs.fa

cdbyank kogs.fa.cidx < kogsNotInOutputCegma.idsOk > kogsNotInOutputCegma.idsOk.fasta 

perl /opt/scripts/searchIdsFromFileInOtherFile.pl kogsNotInOutputCegma.ids kogs.fa | sed 's/>//' > kogsNotInOutputCegma.idsOk