View CEGMA Analysis of Coffea arabica Unigene RNA-seq predicted proteins
cegma -g ../CarabicaRNAseqUnigenes.fasta
# Statistics of the completeness of the genome based on 248 CEGs # #Prots %Completeness - #Total Average %Ortho Complete 119 47.98 - 267 2.24 67.23 Group 1 22 33.33 - 56 2.55 86.36 Group 2 20 35.71 - 44 2.20 65.00 Group 3 33 54.10 - 77 2.33 69.70 Group 4 44 67.69 - 90 2.05 56.82 Partial 178 71.77 - 443 2.49 70.22 Group 1 35 53.03 - 105 3.00 82.86 Group 2 39 69.64 - 99 2.54 71.79 Group 3 47 77.05 - 116 2.47 74.47 Group 4 57 87.69 - 123 2.16 57.89 # These results are based on the set of genes selected by Genis Parra # # Key: # # Prots = number of 248 ultra-conserved CEGs present in genome # # %Completeness = percentage of 248 ultra-conserved CEGs present # # Total = total number of CEGs present including putative orthologs # # Average = average number of orthologs per CEG # # %Ortho = percentage of detected CEGS that have more than 1 ortholog #
root@ticuna:/data/process/Cafe/Transcriptomics/ESTsRNAseqAssembly/cegmaCarabicaESTsRNAseqCAP3Assembly# cegma -g ../CarabicaESTsRNAseqCAP3Assembly.fasta
# Statistics of the completeness of the genome based on 248 CEGs # #Prots %Completeness - #Total Average %Ortho Complete 196 79.03 - 516 2.63 76.53 Group 1 46 69.70 - 127 2.76 78.26 Group 2 44 78.57 - 110 2.50 75.00 Group 3 48 78.69 - 130 2.71 72.92 Group 4 58 89.23 - 149 2.57 79.31 Partial 229 92.34 - 680 2.97 81.22 Group 1 54 81.82 - 165 3.06 79.63 Group 2 54 96.43 - 157 2.91 77.78 Group 3 56 91.80 - 171 3.05 82.14 Group 4 65 100.00 - 187 2.88 84.62 # These results are based on the set of genes selected by Genis Parra # # Key: # # Prots = number of 248 ultra-conserved CEGs present in genome # # %Completeness = percentage of 248 ultra-conserved CEGs present # # Total = total number of CEGs present including putative orthologs # # Average = average number of orthologs per CEG # # %Ortho = percentage of detected CEGS that have more than 1 ortholog #
Extract Proteins not annotated with cegma
grep '>' output.cegma.fa | awk '{print $1}' | sed 's/\./ /' | sed 's/>//' | awk '{print $1}' > kogAnotados.ids grep '>' kogs.fa | sed 's/_/"\t"/' | awk '{print $2}' | sed 's/"__//' | sort | uniq > kogsCegma.ids diff -y kogsAnotados.ids kogsCegma.ids | grep '>' | sed 's/>//' | awk '{print $1}' > kogsNotInOutputCegma.ids cdbfasta kogs.fa cdbyank kogs.fa.cidx < kogsNotInOutputCegma.idsOk > kogsNotInOutputCegma.idsOk.fasta perl /opt/scripts/searchIdsFromFileInOtherFile.pl kogsNotInOutputCegma.ids kogs.fa | sed 's/>//' > kogsNotInOutputCegma.idsOk