• Nenhum resultado encontrado

FullSupplementaryData

N/A
N/A
Protected

Academic year: 2021

Share "FullSupplementaryData"

Copied!
6
0
0

Texto

(1)

Supplementary Data – Bioinformatics

Applying Never-Ending Learning (NEL) Principles to build a

Gene Ontology (GO) Biocurator Assistant

L. R. do Amaral and E. R. Hruschka Jr.

The SQL statement

insert into tbschema(intTRMid, strTRMname, strTRMterm_type, intQtyTermsAdjIsFather, intQtyTermsAdjIsChildren, intDistChildrenMax, intQtyChildren, dblDistAVGChildren, dblDistSTDChildren, intDistFatherMax, intQtyFather, dblDistAVGFather, dblDistSTDFather, intNbrSpecies, intNroGeneProduct, dblAVGProductCountSpecies, intQtySeqs, dblPercAlanine, dblPercArginine, dblPercAsparagine, dblPercAsparticAcid, dblPercCysteine, dblPercGlutamicAcid, dblPercGlutamine, dblPercGlycine, dblPercHistidine, dblPercIsoleucine, dblPercLeucine, dblPercLysine, dblPercMethionine, dblPercPhenylalanine, dblPercProline, dblPercSerine, dblPercThreonine, dblPercTryptophan, dblPercTyrosine, dblPercValine)

select TRM.id as TRMid, TRM.name as TRMname,

TRM.term_type as TRMterm_type, (select count(T2T2.term2_id) from term2term T2T2

where T2T2.term1_id = TRM.id) as intQtyTermsAdjIsFather, (select count(T2T2.term1_id)

from term2term T2T2

where T2T2.term2_id = TRM.id) as intQtyTermsAdjIsChildren, (select max(GPH.distance)

from graph_path GPH

where GPH.term1_id = TRM.id) as intDistChildrenMax, (select count(GPH.distance)

from graph_path GPH

where GPH.term1_id = TRM.id) as intQtyChildren, (select avg(GPH.distance)

from graph_path GPH

where GPH.term1_id = TRM.id) as dblDistAVGChildren, (select std(GPH.distance)

from graph_path GPH

where GPH.term1_id = TRM.id) as dblDistSTDChildren, (select max(GPH.distance)

from graph_path GPH

where GPH.term2_id = TRM.id) as intDistFatherMax, (select count(GPH.distance)

from graph_path GPH

where GPH.term2_id = TRM.id) as intQtyFather, (select avg(GPH.distance)

from graph_path GPH

where GPH.term2_id = TRM.id) as dblDistAVGFather, (select std(GPH.distance)

(2)

where GPH.term2_id = TRM.id) as dblDistSTDFather, (select count(species_id)

from gene_product_count GPC

where GPC.term_id = TRM.id) as intNbrSpecies, (select sum(GPC.product_count)

from gene_product_count GPC

where GPC.term_id = TRM.id) as intNroGeneProduct, (select sum(GPC.product_count)/count(species_id) from gene_product_count GPC

where GPC.term_id = TRM.id) as dblAVGProductCountSpecies, (select count(SEQ.seq)

from gene_product_seq GPS, association ASS, seq SEQ where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as intQtySeqs,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'A',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as dblPercAlanine,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'R',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as dblPercArginine,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'N',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as dblPercAsparagine,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'D',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as dblPercAsparticAcid,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'C',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as dblPercCysteine,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'E',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as dblPercGlutamicAcid,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'Q',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

(3)

and ASS.term_id = TRM.id) as dblPercGlutamine,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'G',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as dblPercGlycine,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'H',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as dblPercHistidine,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'I',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as dblPercIsoleucine,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'L',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as dblPercLeucine,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'K',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as dblPercLysine,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'M',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as dblPercMethionine,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'F',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as dblPercPhenylalanine,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'P',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as dblPercProline,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'S',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as dblPercSerine,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'T',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

(4)

and ASS.term_id = TRM.id) as dblPercThreonine,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'W',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as dblPercTryptophan,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'Y',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as dblPercTyrosine,

(select (sum((SELECT LENGTH(SEQ.seq)-LENGTH(REPLACE(SEQ.seq,'V',''))))/sum(SEQ.seq_len)) from gene_product_seq GPS, association ASS, seq SEQ

where GPS.gene_product_id = ASS.gene_product_id and GPS.seq_id = SEQ.id

and ASS.term_id = TRM.id) as dblPercValine from liteseqdb.term TRM

where ((TRM.term_type = 'molecular_function') or (TRM.term_type = 'cellular_component') or (TRM.term_type = 'biological_process'))

having fltPorcValine is not null order by TRM.term_type

(5)

CEE Analysis

Página 1

Supplementary Data – Bioinformatics

L. R. do Amaral and E. R. Hruschka Jr.

Seed CEE CEE + NB CEE + J48 + NB

1 0,6067 0,787 0,8417 2 0,5515 0,8032 0,8376 3 0,4917 0,7617 0,8391 4 0,5895 0,787 0,8395 5 0,6124 0,7796 0,8391 6 0,6961 0,772 0,8391 7 0,6484 0,7731 0,8442 8 0,6313 0,787 0,8363 9 0,6533 0,7731 0,8404 10 0,6087 0,7731 0,8401 11 0,5629 0,787 0,841 12 0,6372 0,7797 0,8352 13 0,5765 0,7721 0,8346 14 0,6504 0,7616 0,8369 15 0,5202 0,8032 0,8391 16 0,5471 0,7623 0,8302 17 0,6525 0,7794 0,839 18 0,4882 0,8032 0,835 19 0,4662 0,7623 0,8324 20 0,7161 0,7797 0,838 21 0,6519 0,7721 0,8356 22 0,5528 0,8013 0,8329 23 0,5937 0,7616 0,8361 24 0,6395 0,8032 0,8368 25 0,578 0,7787 0,8402 26 0,6396 0,7616 0,8386 27 0,7091 0,7623 0,8408 28 0,5654 0,7702 0,8408 29 0,6435 0,7721 0,8363 30 0,6396 0,8013 0,8305 31 0,6511 0,8032 0,8394 32 0,5296 0,8013 0,8313 33 0,6351 0,7616 0,8353 34 0,6336 0,8013 0,8367 35 0,4817 0,8032 0,8401 Average 0,60146 0,7812085714 0,8374257143 Standard deviation 0,0642840397 0,0156342078 0,0032978908 Confidence interval 0,0212969411 0,0051795252 0,0010925727 MIN 0,5801630589 0,7760290462 0,8363331416 MAX 0,6227569411 0,7863880966 0,838518287

Applying Never-Ending Learning (NEL) Principles to build a Gene

Ontology (GO) Biocurator Assistant

(6)

GANEL Analysis

Página 1

Supplementary Data – Bioinformatics

L. R. do Amaral and E. R. Hruschka Jr.

Seed CEE GANEL CEE + J48 + NB GANEL + J48 + NB

1 0,6067 0,7145 0,8417 0,8647 2 0,5515 0,7216 0,8376 0,844 3 0,4917 0,649 0,8391 0,8336 4 0,5895 0,6661 0,8395 0,8548 5 0,6124 0,6738 0,8391 0,8599 6 0,6961 0,6799 0,8391 0,8449 7 0,6484 0,6013 0,8442 0,8554 8 0,6313 0,713 0,8363 0,8593 9 0,6533 0,6526 0,8404 0,8539 10 0,6087 0,6977 0,8401 0,8765 11 0,5629 0,7039 0,841 0,8481 12 0,6372 0,7187 0,8352 0,8458 13 0,5765 0,6505 0,8346 0,8565 14 0,6504 0,6473 0,8369 0,8437 15 0,5202 0,6853 0,8391 0,8587 16 0,5471 0,6473 0,8302 0,8563 17 0,6525 0,6765 0,839 0,856 18 0,4882 0,6284 0,835 0,8545 19 0,4662 0,7285 0,8324 0,8627 20 0,7161 0,6379 0,838 0,8619 21 0,6519 0,6621 0,8356 0,8494 22 0,5528 0,6769 0,8329 0,8876 23 0,5937 0,6935 0,8361 0,8457 24 0,6395 0,6651 0,8368 0,8522 25 0,578 0,7093 0,8402 0,8552 26 0,6396 0,665 0,8386 0,8624 27 0,7091 0,6666 0,8408 0,8537 28 0,5654 0,6747 0,8408 0,8484 29 0,6435 0,6443 0,8363 0,8509 30 0,6396 0,6393 0,8305 0,8553 31 0,6511 0,6712 0,8394 0,8569 32 0,5296 0,6393 0,8313 0,8603 33 0,6351 0,7293 0,8353 0,8592 34 0,6336 0,6777 0,8367 0,8535 35 0,4817 0,6969 0,8401 0,8526 Average 0,60146 0,6744285714 0,8374257143 0,8552714286 Standard deviation 0,0642840397 0,0310079646 0,0032978908 0,0094357135 Confidence interval 0,0212969411 0,0102727644 0,0010925727 0,0031259989 MIN 0,5801630589 0,664155807 0,8363331416 0,8521454297 MAX 0,6227569411 0,6847013358 0,838518287 0,8583974274

Applying Never-Ending Learning (NEL) Principles to build a Gene Ontology (GO)

Referências

Documentos relacionados

O primeiro artigo deste trabalho reflete todos os procedimentos desenvolvidos com vista a desenvolver e validar a fórmula Golden Index, efetuando-se para isso

Apesar do estágio ter a duração de oito semanas, apenas quatro são destinadas a atividades da Cirurgia Geral (CG), sendo as restantes quatro compostas por sessões teóricas,

Nas duas maiores lâminas d'água, a cultivar IAC 47 apresentou o maior rendimento de matéria seca da parte aérea, devido a sua maior altura e folhas mais compridas; nas duas

Com efeito, procedeu-se ao esboço de objetivos delineados para cada temática, adotan- do uma metodologia que nos permitisse averiguar se estes eram atingidos. Primeiramen- te,

Este é o ponto de partida de uma investigação que se estende à obra em geral dos arquitectos Fernando Távora, Álvaro Siza, Eduardo Souto Moura, Manuel e Francisco Aires Mateus,

enclaves contained within lower class residential areas – as in the present case of Bairro Alto, we may easily conclude that gentrification is an example of a new kind of

Así, presentamos las diferencias entre la expresión escrita y la expresión oral, sobre la lengua oral formal; definimos el concepto de competencia discursiva

A maioria dos hemangiomas infantis tem evolução favorável no sentido da resolução espontânea, mas nalguns casos pode haver complicações locais ou sistémicas, com necessidade