SQL Queries
From DictyWiki
Revision as of 16:54, 18 June 2021 (edit) Pfey@northwestern.edu (Talk | contribs) (→Genes, Primary IDs from Genes with specific Gene Product) ← Previous diff |
Current revision (23:28, 18 June 2021) (edit) Pfey@northwestern.edu (Talk | contribs) |
||
Line 986: | Line 986: | ||
WHERE value like 'Has Wiki Page' ) | WHERE value like 'Has Wiki Page' ) | ||
</pre> | </pre> | ||
+ | |||
+ | ==Genes with gene description== | ||
+ | <pre> | ||
+ | |||
+ | SELECT f.name, | ||
+ | p.value as descriptions | ||
+ | FROM cgm_chado.featureprop p inner | ||
+ | JOIN cgm_chado.v_gene_features l on l.feature_id=p.feature_id | ||
+ | JOIN cgm_chado.feature f on f.feature_id = p.feature_id | ||
+ | WHERE p.type_id=972 | ||
+ | </pre> | ||
+ | |||
Current revision
Total GO
- Canonical query (excluding obsolete annotations)
SELECT COUNT(fcvt.feature_cvterm_id) annotations FROM cgm_chado.feature_cvterm fcvt JOIN cgm_chado.feature gene ON gene.feature_id = fcvt.feature_id JOIN cgm_chado.cvterm type ON type.cvterm_id = gene.TYPE_ID JOIN cgm_chado.cvterm GO ON GO.cvterm_id = fcvt.cvterm_id JOIN cgm_chado.cv ON cv.cv_id = GO.cv_id JOIN cgm_chado.organism ON organism.organism_id = gene.organism_id WHERE type.name = 'gene' AND gene.is_deleted = 0 AND cv.name IN('molecular_function', 'biological_process', 'cellular_component') AND GO.is_obsolete = 0 AND organism.common_name = 'dicty'
- Including obsolete annotations
SELECT COUNT(fcvt.feature_cvterm_id) annotations FROM cgm_chado.feature_cvterm fcvt JOIN cgm_chado.feature gene ON gene.feature_id = fcvt.feature_id JOIN cgm_chado.cvterm type ON type.cvterm_id = gene.TYPE_ID JOIN cgm_chado.cvterm GO ON GO.cvterm_id = fcvt.cvterm_id JOIN cgm_chado.cv ON cv.cv_id = GO.cv_id JOIN cgm_chado.organism ON organism.organism_id = gene.organism_id WHERE type.name = 'gene' AND gene.is_deleted = 0 AND cv.name IN('molecular_function', 'biological_process', 'cellular_component') AND organism.common_name = 'dicty'
- Only obsolete annotations
SELECT COUNT(fcvt.feature_cvterm_id) obsolete_annotations FROM cgm_chado.feature_cvterm fcvt JOIN cgm_chado.feature gene ON gene.feature_id = fcvt.feature_id JOIN cgm_chado.cvterm type ON type.cvterm_id = gene.TYPE_ID JOIN cgm_chado.cvterm GO ON GO.cvterm_id = fcvt.cvterm_id JOIN cgm_chado.organism ON organism.organism_id = gene.organism_id JOIN cgm_chado.cv ON cv.cv_id = GO.cv_id WHERE type.name = 'gene' AND gene.is_deleted = 0 AND cv.name IN('molecular_function', 'biological_process', 'cellular_component') AND GO.is_obsolete = 1 AND organism.common_name = 'dicty'
Total GO by aspect
- biological_process
SELECT COUNT(fcvt.feature_cvterm_id) process_annotations FROM cgm_chado.feature_cvterm fcvt JOIN cgm_chado.feature gene ON gene.feature_id = fcvt.feature_id JOIN cgm_chado.cvterm type ON type.cvterm_id = gene.TYPE_ID JOIN cgm_chado.cvterm GO ON GO.cvterm_id = fcvt.cvterm_id JOIN cgm_chado.cv ON cv.cv_id = GO.cv_id JOIN organism ON organism.organism_id = gene.organism_id WHERE type.name = 'gene' AND gene.is_deleted = 0 AND cv.name = 'biological_process' AND GO.is_obsolete = 0 AND organism.common_name = 'dicty'
- molecular_function
SELECT COUNT(fcvt.feature_cvterm_id) function_annotations FROM cgm_chado.feature_cvterm fcvt JOIN cgm_chado.feature gene ON gene.feature_id = fcvt.feature_id JOIN cgm_chado.cvterm type ON type.cvterm_id = gene.TYPE_ID JOIN cgm_chado.cvterm GO ON GO.cvterm_id = fcvt.cvterm_id JOIN cgm_chado.cv ON cv.cv_id = GO.cv_id JOIN organism ON organism.organism_id = gene.organism_id WHERE type.name = 'gene' AND gene.is_deleted = 0 AND cv.name = 'molecular_function' AND GO.is_obsolete = 0 AND organism.common_name = 'dicty'
- cellular_component
SELECT COUNT(fcvt.feature_cvterm_id) component_annotations FROM cgm_chado.feature_cvterm fcvt JOIN cgm_chado.feature gene ON gene.feature_id = fcvt.feature_id JOIN cgm_chado.cvterm type ON type.cvterm_id = gene.TYPE_ID JOIN cgm_chado.cvterm GO ON GO.cvterm_id = fcvt.cvterm_id JOIN cgm_chado.cv ON cv.cv_id = GO.cv_id JOIN organism ON organism.organism_id = gene.organism_id WHERE type.name = 'gene' AND gene.is_deleted = 0 AND cv.name = 'cellular_component' AND GO.is_obsolete = 0 AND organism.common_name = 'dicty'
GO non IEA by aspect - FIXED WITH ECO CODE FEB2017 PF!
- Canonical query(all aspects)
SELECT cv.name aspect,count(fcvt.feature_cvterm_id) annotations FROM cgm_chado.feature_cvterm fcvt JOIN cgm_chado.feature gene ON gene.feature_id = fcvt.feature_id JOIN cgm_chado.cvterm type ON type.cvterm_id = gene.TYPE_ID JOIN cgm_chado.cvterm GO ON GO.cvterm_id = fcvt.cvterm_id JOIN cgm_chado.cv ON cv.cv_id = GO.cv_id JOIN cgm_chado.feature_cvtermprop fcvt_prop ON fcvt_prop.feature_cvterm_id=fcvt.feature_cvterm_id JOIN cgm_chado.cvterm evterm ON evterm.cvterm_id=fcvt_prop.type_id JOIN cgm_chado.cv ev ON ev.cv_id=evterm.cv_id JOIN organism ON organism.organism_id = gene.organism_id WHERE type.name = 'gene' ANd gene.is_deleted = 0 AND cv.name IN('molecular_function', 'biological_process', 'cellular_component') AND GO.is_obsolete = 0 AND ev.name like 'evidence_code%' AND evterm.name != 'automatic assertion' AND organism.common_name = 'dicty' GROUP BY cv.name ORDER BY annotations DESC
GO by evidence code
- Canonical query(all aspects)
SELECT evsyn.synonym_ evidence_code,count(fcvt.feature_cvterm_id) annotations FROM cgm_chado.feature_cvterm fcvt JOIN cgm_chado.feature gene ON gene.feature_id = fcvt.feature_id JOIN cgm_chado.cvterm type ON type.cvterm_id = gene.TYPE_ID JOIN cgm_chado.cvterm GO ON GO.cvterm_id = fcvt.cvterm_id JOIN cgm_chado.cv ON cv.cv_id = GO.cv_id JOIN cgm_chado.feature_cvtermprop fcvt_prop ON fcvt_prop.feature_cvterm_id=fcvt.feature_cvterm_id JOIN cgm_chado.cvterm evterm ON evterm.cvterm_id=fcvt_prop.type_id JOIN cgm_chado.cv ev ON ev.cv_id=evterm.cv_id JOIN cgm_chado.cvtermsynonym evsyn on evterm.cvterm_id=evsyn.cvterm_id JOIN cgm_chado.cvterm synterm on synterm.cvterm_id=evsyn.type_id JOIN organism ON organism.organism_id = gene.organism_id WHERE type.name = 'gene' AND gene.is_deleted = 0 AND organism.common_name = 'dicty' AND GO.is_obsolete = 0 AND ev.name like 'evidence_code%' AND synterm.name IN ('EXACT','RELATED', 'BROAD') AND LENGTH(evsyn.synonym_) <= 3 AND cv.name IN('molecular_function', 'biological_process', 'cellular_component') GROUP BY evsyn.synonym_ ORDER BY annotations DESC
- molecular function only
SELECT evsyn.synonym_ evidence_code,count(fcvt.feature_cvterm_id) annotations FROM cgm_chado.feature_cvterm fcvt JOIN cgm_chado.feature gene ON gene.feature_id = fcvt.feature_id JOIN cgm_chado.cvterm type ON type.cvterm_id = gene.TYPE_ID JOIN cgm_chado.cvterm GO ON GO.cvterm_id = fcvt.cvterm_id JOIN cgm_chado.cv ON cv.cv_id = GO.cv_id JOIN cgm_chado.feature_cvtermprop fcvt_prop ON fcvt_prop.feature_cvterm_id=fcvt.feature_cvterm_id JOIN cgm_chado.cvterm evterm ON evterm.cvterm_id=fcvt_prop.type_id JOIN cgm_chado.cv ev ON ev.cv_id=evterm.cv_id JOIN cgm_chado.cvtermsynonym evsyn on evterm.cvterm_id=evsyn.cvterm_id JOIN cgm_chado.cvterm synterm on synterm.cvterm_id=evsyn.type_id JOIN organism ON organism.organism_id = gene.organism_id WHERE type.name = 'gene' AND gene.is_deleted = 0 AND organism.common_name = 'dicty' AND GO.is_obsolete = 0 AND ev.name like 'evidence_code%' AND synterm.name IN ('EXACT','RELATED', 'BROAD') AND LENGTH(evsyn.synonym_) <= 3 AND cv.name = 'molecular_function' GROUP BY evsyn.synonym_ ORDER BY annotations DESC
- For other aspects replace biological_process (in cv.name) with either of biological_process or cellular_component
Genes with GO annotations
- All genes with annotations
SELECT COUNT (DISTINCT fcvt.feature_id) gene_with_annotations FROM CGM_CHADO.feature_cvterm fcvt JOIN CGM_CHADO.feature_cvtermprop fcvt_prop ON fcvt_prop.feature_cvterm_id = fcvt.feature_cvterm_id JOIN CGM_CHADO.cvterm evterm ON evterm.cvterm_id=fcvt_prop.type_id JOIN CGM_CHADO.cv ev ON ev.cv_id=evterm.cv_id JOIN CGM_CHADO.cvtermsynonym evsyn ON evterm.cvterm_id=evsyn.cvterm_id JOIN CGM_CHADO.cvterm syn_type ON syn_type.cvterm_id = evsyn.type_id JOIN CGM_CHADO.cv syn_cv ON syn_cv.cv_id = syn_type.cv_id JOIN CGM_CHADO.feature gene ON gene.feature_id = fcvt.feature_id JOIN CGM_CHADO.cvterm type ON type.cvterm_id = gene.type_id JOIN organism organism ON organism.organism_id = gene.organism_id WHERE gene.is_deleted = 0 AND type.name = 'gene' AND ev.name like 'evidence_code%' AND syn_type.name IN ('EXACT', 'RELATED', 'BROAD') AND syn_cv.name = 'synonym_type' AND organism.common_name = 'dicty'
- All genes with non-IEA annotations
SELECT COUNT (DISTINCT fcvt.feature_id) gene_with_annotations FROM CGM_CHADO.feature_cvterm fcvt JOIN CGM_CHADO.feature_cvtermprop fcvt_prop ON fcvt_prop.feature_cvterm_id = fcvt.feature_cvterm_id JOIN CGM_CHADO.cvterm evterm ON evterm.cvterm_id=fcvt_prop.type_id JOIN CGM_CHADO.cv ev ON ev.cv_id=evterm.cv_id JOIN CGM_CHADO.cvtermsynonym evsyn ON evterm.cvterm_id=evsyn.cvterm_id JOIN CGM_CHADO.cvterm syn_type ON syn_type.cvterm_id = evsyn.type_id JOIN CGM_CHADO.cv syn_cv ON syn_cv.cv_id = syn_type.cv_id JOIN CGM_CHADO.feature gene ON gene.feature_id = fcvt.feature_id JOIN CGM_CHADO.cvterm type ON type.cvterm_id = gene.type_id JOIN organism organism ON organism.organism_id = gene.organism_id WHERE gene.is_deleted = 0 AND type.name = 'gene' AND ev.name like 'evidence_code%' AND syn_type.name IN ('EXACT', 'RELATED', 'BROAD') AND syn_cv.name = 'synonym_type' AND organism.common_name = 'dicty' AND evterm.name != 'automatic assertion'
- All genes with EXP annotations
SELECT COUNT (DISTINCT fcvt.feature_id) gene_with_annotations FROM CGM_CHADO.feature_cvterm fcvt JOIN CGM_CHADO.feature_cvtermprop fcvt_prop ON fcvt_prop.feature_cvterm_id = fcvt.feature_cvterm_id JOIN CGM_CHADO.cvterm evterm ON evterm.cvterm_id=fcvt_prop.type_id JOIN CGM_CHADO.cv ev ON ev.cv_id=evterm.cv_id JOIN CGM_CHADO.cvtermsynonym evsyn ON evterm.cvterm_id=evsyn.cvterm_id JOIN CGM_CHADO.cvterm syn_type ON syn_type.cvterm_id = evsyn.type_id JOIN CGM_CHADO.cv syn_cv ON syn_cv.cv_id = syn_type.cv_id JOIN CGM_CHADO.feature gene ON gene.feature_id = fcvt.feature_id JOIN CGM_CHADO.cvterm type ON type.cvterm_id = gene.type_id JOIN organism organism ON organism.organism_id = gene.organism_id WHERE gene.is_deleted = 0 AND type.name = 'gene' AND ev.name like 'evidence_code%' AND syn_type.name IN ('EXACT', 'RELATED', 'BROAD') AND syn_cv.name = 'synonym_type' AND organism.common_name = 'dicty' AND evsyn.synonym_ IN ('direct assay evidence used in manual assertion', 'IDA', '(mutant phenotype evidence used in manual assertion', 'IMP', 'physical interaction evidence', 'IPI', 'genetic interaction evidence used in manual assertion', 'IGI', 'expression pattern evidence', 'IEP');
- To get the gene list, replace the first SELECT line with 'SELECT DISTINCT gene.uniquename gene_id' (no quotes).
New literatures by date
- To retrieve pubmed,title,date
SELECT uniquename pubmed,title,created_at from cgm_chado.pub WHERE created_at >= to_date('02/08/2011','MM/DD/YYYY') AND created_at <= to_date('02/15/2011','MM/DD/YYYY') AND pubplace = 'PUBMED' ORDER BY pub_id
- To retrieve only pubmed id
SELECT pub.uniquename pubmed from cgm_chado.pub WHERE created_at >= to_date('02/08/2011','MM/DD/YYYY') AND created_at <= to_date('02/15/2011','MM/DD/YYYY') AND pubplace = 'PUBMED' ORDER BY pub_id desc
- To retrieve all columns(kind of less useful)
SELECT * from cgm_chado.pub WHERE created_at >= to_date('02/08/2011','MM/DD/YYYY') AND created_at <= to_date('02/15/2011','MM/DD/YYYY') AND pubplace = 'PUBMED' ORDER BY pub_id
All PubMed Literature in database
SELECT * FROM cgm_chado.pub WHERE pubplace = 'PUBMED' ORDER BY UPDATED_AT
Literature, all Annotations, Curator, from Distinct Reference
- By reference no
- Remember to replace 16444 with any other number
SELECT feat.name locus, fpub.pub_id reference_no,pubterm.name literature_topic, curator.name who_did_it,cpubprop.timecreated when FROM cgm_chado.feature_pub fpub JOIN cgm_chado.feature_pubprop fpubprop ON fpubprop.feature_pub_id=fpub.feature_pub_id JOIN cgm_chado.cvterm pubterm ON pubterm.cvterm_id=fpubprop.type_id JOIN cgm_chado.cv pubcv ON pubcv.cv_id=pubterm.cv_id JOIN cgm_chado.curator_feature_pubprop cpubprop ON cpubprop.feature_pubprop_id=fpubprop.feature_pubprop_id JOIN cgm_chado.curator ON curator.curator_id=cpubprop.curator_id JOIN cgm_chado.feature feat ON feat.feature_id = fpub.feature_id JOIN cgm_chado.organism ON organism.organism_id = feat.organism_id WHERE pubcv.name = 'dictyBase_literature_topic' AND organism.common_name = 'dicty' AND fpub.pub_id = 16444
- By curator
SELECT DISTINCT c.name who_did_it FROM cgm_chado.pub p INNER JOIN cgm_chado.feature_pub fp ON p.pub_id = fp.pub_id INNER JOIN cgm_chado.feature_pubprop fpp ON fpp.feature_pub_id = fp.feature_pub_id INNER JOIN cgm_chado.curator_feature_pubprop cfpp ON cfpp.feature_pubprop_id = fpp.feature_pubprop_id INNER JOIN cgm_chado.curator c ON c.curator_id = cfpp.curator_id WHERE p.pub_id = 12399
All lit topics associated with gene names and papers
- This is ordered by lit topic but you could also ORDER by reference_no
SELECT feat.name locus, fpub.pub_id reference_no,pubterm.name literature_topic, curator.name who_did_it,cpubprop.timecreated when FROM cgm_chado.feature_pub fpub JOIN cgm_chado.feature_pubprop fpubprop ON fpubprop.feature_pub_id=fpub.feature_pub_id JOIN cgm_chado.cvterm pubterm ON pubterm.cvterm_id=fpubprop.type_id JOIN cgm_chado.cv pubcv ON pubcv.cv_id=pubterm.cv_id JOIN cgm_chado.curator_feature_pubprop cpubprop ON cpubprop.feature_pubprop_id=fpubprop.feature_pubprop_id JOIN cgm_chado.curator ON curator.curator_id=cpubprop.curator_id JOIN cgm_chado.feature feat ON feat.feature_id = fpub.feature_id JOIN cgm_chado.organism ON organism.organism_id = feat.organism_id WHERE pubcv.name = 'dictyBase_literature_topic' AND organism.common_name = 'dicty' ORDER by literature_topic
Find all lit topics associated with each gene (Gene ID) by paper
SELECT dx.accession, p.uniquename, stragg( ct.name ) AS Literature_Topics FROM cgm_chado.v_gene_features g INNER JOIN cgm_chado.dbxref dx ON g.dbxref_id = dx.dbxref_id INNER JOIN cgm_chado.feature_pub fp ON g.feature_id = fp.feature_id INNER JOIN cgm_chado.pub p ON fp.pub_id = p.pub_id INNER JOIN cgm_chado.feature_pubprop fpp ON fp.feature_pub_id = fpp.feature_pub_id INNER JOIN cgm_chado.cvterm ct ON fpp.type_id = ct.cvterm_id INNER JOIN cgm_chado.cv ON ct.cv_id = cv.cv_id INNER JOIN cgm_chado.organism o ON o.organism_id = g.organism_id WHERE o.common_name = 'dicty' AND p.pubplace = 'PUBMED' AND cv.name ='dictyBase_literature_topic' GROUP BY dx.accession, p.uniquename ORDER BY to_number(p.uniquename)
Notes for Genes containing word string, by curator
To search for both public and private notes use: 'AND ct.name in ('public note' , 'private note')'; or replace one with other. In this example notes containing UTR are searched. This orders by curator, but can be changed also.
SELECT g.name, fp.value, fp.created_by, ct.name FROM cgm_chado.featureprop fp INNER JOIN cgm_chado.v_gene_features g ON g.feature_id = fp.feature_id INNER JOIN cgm_chado.cvterm ct ON fp.type_id = ct.cvterm_id INNER JOIN cgm_chado.organism o ON o.organism_id = g.organism_id WHERE o.common_name = 'dicty' AND ct.name ='public note' AND value LIKE '%UTR%' ORDER BY fp.created_by
Convert gene names to DDB_G IDs
SELECT g.name AS GENE_NAME, dx.accession AS DDB_G_ID FROM cgm_chado.v_gene_features g INNER JOIN cgm_chado.dbxref dx ON g.dbxref_id = dx.dbxref_id INNER JOIN cgm_chado.organism o ON o.organism_id = g.organism_id WHERE o.common_name = 'dicty' AND g.name IN ('rab14', 'rab11A' , 'rab21' )
Genes, Primary IDs from Genes with specific Gene Product
Substitute any gene product for 'TRANSCRIPT%'; the % can be omitted when gene product is distinct or delete the word TRANSCRIPT to get all.
SELECT G.GENE_NAME, G.PRIMARY_FEATURE_DICTYBASEID FROM CGM_DDB.GENE_PRODUCT GP INNER JOIN CGM_DDB.LOCUS_GP LGP ON LGP.GENE_PRODUCT_NO = GP.GENE_PRODUCT_NO INNER JOIN CGM_CHADO.V_GENE_DICTYBASEID G ON LGP.LOCUS_NO = G.GENE_FEATURE_ID WHERE UPPER(GP.GENE_PRODUCT) LIKE UPPER('TRANSCRIPT%') GROUP BY G.GENE_NAME, G.PRIMARY_FEATURE_DICTYBASEID;
Genes, Primary IDs from Genes with automated Gene Product
SELECT G.GENE_NAME, G.PRIMARY_FEATURE_DICTYBASEID, GP.GENE_PRODUCT FROM CGM_DDB.GENE_PRODUCT GP INNER JOIN CGM_DDB.LOCUS_GP LGP ON LGP.GENE_PRODUCT_NO = GP.GENE_PRODUCT_NO INNER JOIN CGM_CHADO.V_GENE_DICTYBASEID G ON LGP.LOCUS_NO = G.GENE_FEATURE_ID WHERE GP.IS_AUTOMATED ='1' order by GP.GENE_PRODUCT
Pseudogene IDs
SELECT feature.uniquename FROM CGM_CHADO.feature INNER JOIN CGM_CHADO.cvterm ON cvterm.cvterm_id = feature.TYPE_ID WHERE cvterm.name = 'pseudogene' and feature.is_deleted = 0
To get also the Gene Name
SELECT pseudofeat.uniquename pseudogene,genefeat.name gene FROM CGM_CHADO.v_notdeleted_feature pseudofeat inner join CGM_CHADO.feature_relationship frel on frel.subject_id=pseudofeat.feature_id inner join CGM_CHADO.v_notdeleted_feature genefeat on frel.object_id=genefeat.feature_id inner join CGM_CHADO.cvterm on cvterm.cvterm_id=pseudofeat.type_id WHERE cvterm.name = 'pseudogene'
Map all primary DDB to GenBank Protein Accession
select d.accession, f.name, db.name from CGM_CHADO.v_primary_feature f inner join CGM_CHADO.feature_dbxref fd on fd.feature_id = f.feature_id inner join CGM_CHADO.dbxref d on d.dbxref_id = fd.dbxref_id inner join CGM_CHADO.db on d.db_id = db.db_id and db.name = 'DB:Protein Accession Number';
Genes, DDB IDs with Specific GO terms, created_by
Replace GO ID with any other; delete the 'and created_by..' if not restricted to curator; add 'order by..' after parenthesis to sort by gene_name etc.
SELECT * from cgm_chado.v_gene_dictybaseid WHERE gene_feature_id in (SELECT distinct locus_no from go_locus_goev WHERE goid = 6118 and created_by='CGM_DDB_PFEY')
Summary paragraphs with gene names
SELECT feature.uniquename, p.paragraph_text FROM cgm_chado.featureprop fp JOIN cgm_ddb.paragraph p on TO_NCHAR(fp.value)= p.paragraph_no JOIN cgm_chado.feature ON feature.feature_id=fp.feature_id WHERE fp.type_id=984 AND feature.is_deleted=0 AND paragraph_text not like '
add at end optional: order by date_created
if all, including status notes is desired, delete: and paragraph_text not like '<summary%><curation_status>%'
Summary by Text string including Curation Status
SELECT p.created_by, FEATURE_ID as n FROM cgm_chado.featureprop fp JOIN cgm_ddb.paragraph p on TO_NCHAR(fp.value)= p.paragraph_no WHERE fp.type_id=984 AND paragraph_text like '%Basic%'
Number of PM references with GO and PHenotype annotations
SELECT TAB_NAME, PRIMARY_KEY, reference_no FROM reflink WHERE reference_no in ( SELECT reference_no FROM reference WHERE pubmed is not null )
Table must be cleaned up: In excel do "convert data to columns" and distangle the PRIMARY_KEY fields. Then put PMIDs in one column and sort, then delete duplicate PMID refs.
All phenotypes
SELECT d.PRIMARY_FEATURE_DICTYBASEID as DICTYBASEID, d.gene_name, cv.name FROM cgm_chado.feature_genotype fg JOIN cgm_chado.v_gene_dictybaseid d on fg.feature_id = d.GENE_FEATURE_ID JOIN cgm_chado.phenstatement ph on fg.genotype_id = ph.genotype_id JOIN cgm_chado.phenotype p on ph.phenotype_id= p.phenotype_id JOIN cgm_chado.cvterm cv on p.observable_id=cv.cvterm_id left outer JOIN cgm_chado.dbxref xr on cv.dbxref_id = xr.dbxref_id where xr.accession like 'DDPHENO%'
Strain Descriptor, DBS ID within a period
select strain_name, dx.accession from cgm_ddb.stock_center sc inner join cgm_chado.dbxref dx on dx.dbxref_id = sc.dbxref_id where sc.date_created BETWEEN to_date('01-FEB-2011') AND to_date('31-JAN-2012')
Different Species available in the SC
select count(sc.systematic_name) num_of_strains, sc.species from cgm_ddb.stock_center sc where exists (select 'a' from cgm_ddb.stock_center_inventory sci where sc.id = sci.strain_id) group by sc.species order by sc.species DESC
Strain Descriptor, DBS ID, available in the SC
select strain_name, dx.accession from cgm_ddb.stock_center sc inner join cgm_chado.dbxref dx on dx.dbxref_id = sc.dbxref_id where exists (select 'a' from cgm_ddb.stock_center_inventory sci where sc.id = sci.strain_id)
Add "AND sc.date_created BETWEEN to_date('01-FEB-2011') AND to_date('31-JAN-2012')" No quotes and change Date as appropriate.
Strain Descriptor, DBS ID, specific mutagenesis method, available in the SC
select strain_name, dx.accession, mutagenesis_method from cgm_ddb.stock_center sc inner join cgm_chado.dbxref dx on dx.dbxref_id = sc.dbxref_id where exists (select 'a' from cgm_ddb.stock_center_inventory sci where sc.id = sci.strain_id) and mutagenesis_method = 'REMI'
You can specify other mutagenesis methods
Strain Descriptor, DBS ID, mutagenesis method, strain description with specific strain characteristic
select strain_name, dx.accession, mutagenesis_method, strain_description from cgm_ddb.stock_center sc inner join cgm_chado.dbxref dx on dx.dbxref_id = sc.dbxref_id inner join cgm_ddb.strain_char_cvterm scc ON sc.id=scc.strain_id inner join cgm_chado.cvterm ON cvterm.cvterm_id= scc.cvterm_id and cvterm.name = 'RFP marked'
if needed add: Order by mutagenesis_method; you can specify other strain characteristic
Strain Descriptor, DBS ID, Mutagenesis Method, Strain Description with specific Strain Characteristic available in the SC
select sc.strain_name, dx.accession, mutagenesis_method, sc.strain_description from cgm_ddb.stock_center sc inner join cgm_chado.dbxref dx on dx.dbxref_id = sc.dbxref_id inner join cgm_ddb.strain_char_cvterm scc ON sc.id=scc.strain_id inner join cgm_chado.cvterm ON cvterm.cvterm_id= scc.cvterm_id where exists (select 'a' from cgm_ddb.stock_center_inventory sci where sc.id = sci.strain_id) and cvterm.name = 'GFP marked'
You can specify other characteristic
Strain Names, Systematic Names, Strain Description where strain name starts with a specific string (example 'JS')
SELECT strain_name, systematic_name, strain_description FROM stock_center WHERE strain_name like 'JS%' or systematic_name like 'JS%'
Can also search for specific strains by using 'equals'
SELECT strain_name, systematic_name, strain_description FROM cgm_ddb.stock_center WHERE strain_name = 'JS65' or systematic_name = 'JS65'
Strain Names, Synonyms, Systematic Names, Created_by, Strain Description
SELECT strain_name, stragg( name) as synonyms, systematic_name, created_by, strain_description FROM cgm_ddb.stock_center sc JOIN cgm_ddb.STRAIN_SYNONYM ss on ss.strain_id = sc.id JOIN cgm_chado.synonym_ cs on ss.synonym_id= cs.synonym_id GROUP BY strain_name, created_by, strain_description, systematic_name ORDER BY created_by, strain_name
Can also restrict the curator by adding
where created_by ='CGM_DDB_PASC'
before the GROUP BY statement
Strain Names, Created_by, Strain Description, Strain Characteristics
SELECT strain_name, systematic_name, strain_description, stragg( name) as Strain_characteristics FROM cgm_ddb.stock_center s join cgm_ddb.STRAIN_CHAR_CVTERM sc on s.id = sc.strain_id join cgm_chado.CVTERM cv on sc.CVTERM_ID = cv.CVTERM_ID where strain_description like '%null%' group by strain_name, systematic_name, strain_description
Strain Names, Created_by, by a specific plasmid
SELECT strain_name, plasmid, mutagenesis_method, created_by FROM cgm_ddb.stock_center WHERE plasmid like '%pEXP%' or plasmid like '%pExp%' ORDER BY created_by
Strain Names, Systematic Name, Strain Description, by a specific Strain Characteristic
SELECT strain_name, systematic_name, name, strain_description FROM stock_center s JOIN STRAIN_CHAR_CVTERM sc on s.id = sc.strain_id JOIN cgm_chado.CVTERM cv on sc.CVTERM_ID = cv.CVTERM_ID WHERE name ='multiple null mutant' ORDER BY strain_name
Strains annotated with a specific environment condition
SELECT strain_name, systematic_name, name as envt FROM cgm_ddb.stock_center s JOIN cgm_chado.phenstatement ps ON s.genotype_id = ps.genotype_id JOIN cgm_chado.cvterm cv ON cv.CVTERM_ID = ps.ENVIRONMENT_ID WHERE name ='in suspension'
Strains annotated with a specific assay
SELECT strain_name, systematic_name, name as assay FROM cgm_ddb.stock_center s JOIN cgm_chado.phenstatement ph on s.genotype_id = ph.genotype_id JOIN cgm_chado.phenotype p ON ph.phenotype_id = p.phenotype_id JOIN cgm_chado.cvterm cv ON cv.CVTERM_ID = p.assay_ID WHERE name ='electron microscopy'
Stock Center Orders by name
THIS DOES NOT WORK - maybe View is gone?
Substitute any name or use 'last_name' and order by anything, here by order ID.
SELECT * from cgm_ddb.stock_order_view WHERE first_name = 'Baskar' ORDER BY Order_ID
Total stock center orders within a period
SELECT COUNT(stock_order_id) num_of_order FROM cgm_ddb.stock_order WHERE order_date BETWEEN to_date('01-AUG-10') AND to_date('31-JUL-11')
Replace the two to_date parameters for your date of choice
SELECT stock_order_id FROM cgm_ddb.stock_order WHERE order_date BETWEEN to_date('08-JUL-15') AND to_date('16-JUL-15')
Use this to get the list of order numbers with the selected period
Total strain orders within a period
SELECT COUNT(distinct sorder.stock_order_id) num_of_strains FROM cgm_ddb.stock_center sc JOIN cgm_ddb.stock_item_order sitem ON sc.id = sitem.item_id JOIN cgm_ddb.stock_order sorder ON sorder.stock_order_id = sitem.order_id WHERE sorder.order_date BETWEEN to_date('01-AUG-10') AND to_date('31-JUL-11')
Total number of STRAINS ordered within a period
SELECT COUNT(sorder.stock_order_id) num_of_strains FROM cgm_ddb.stock_center sc JOIN cgm_ddb.stock_item_order sitem ON sc.id = sitem.item_id JOIN cgm_ddb.stock_order sorder ON sorder.stock_order_id = sitem.order_id WHERE sorder.order_date BETWEEN to_date('01-FEB-12') AND to_date('31-JAN-13')
Number EACH STRAIN has been ordered within a period with DBS ID, strain name
SELECT dx.accession, sc.strain_name, COUNT(sio.order_id) AS NUM_ORDERS FROM cgm_ddb.stock_center sc JOIN cgm_chado.dbxref dx on dx.dbxref_id = sc.dbxref_id JOIN CGM_DDB.stock_item_order sio ON sc.id = sio.item_id JOIN cgm_ddb.strain_char_cvterm scc ON sc.id=scc.strain_id JOIN cgm_chado.cvterm ON cvterm.cvterm_id= scc.cvterm_id JOIN cgm_ddb.stock_order sorder ON sorder.stock_order_id = sio.order_id WHERE sorder.order_date BETWEEN to_date('07-JUN-10') AND to_date('29-AUG-12') GROUP BY dx.accession, sc.strain_name ORDER BY NUM_ORDERS DESC;
Replace date, change ORDER BY, or ASC instead of DESC; restrict search for specific mutant by adding "WHERE cvterm.name = 'null mutant' " after the last JOIN and changing the next WHERE to AND; restrict to specific strain name by adding AND sc.strain_name = 'AX3' (choose strain name) after the order date line.
No of plasmids added within a period
select plasmid.name,plasmid.description FROM plasmid where plasmid.date_created BETWEEN to_date('01-FEB-2011') AND to_date('31-JAN-2012')
The above query does not work!
Total number of PLASMIDS ordered within a period
SELECT COUNT(sorder.stock_order_id) num_of_plasmids FROM cgm_ddb.plasmid sc JOIN cgm_ddb.stock_item_order sitem ON sc.id = sitem.item_id JOIN cgm_ddb.stock_order sorder ON sorder.stock_order_id = sitem.order_id WHERE sorder.order_date BETWEEN to_date('01-FEB-12') AND to_date('31-JAN-13')
Total plasmid orders within a period
SELECT COUNT(distinct sorder.stock_order_id) num_of_plasmids FROM cgm_ddb.plasmid sc JOIN cgm_ddb.stock_item_order sitem ON sc.id = sitem.item_id JOIN cgm_ddb.stock_order sorder ON sorder.stock_order_id = sitem.order_id WHERE sorder.order_date BETWEEN to_date('01-AUG-10') AND to_date('31-JUL-11')
Distinct Stock Center users within a period
SELECT COUNT(distinct colleague_no) num_of_order FROM cgm_ddb.stock_order_view WHERE order_date BETWEEN to_date('01-FEB-11') AND to_date('31-JAN-12')
Stock center orders by countries
SELECT COUNT(so.stock_order_id) num_of_orders, coll.country country FROM cgm_ddb.stock_order so JOIN cgm_ddb.colleague coll ON coll.colleague_no=so.colleague_id WHERE so.order_date BETWEEN to_date('01-AUG-10') AND to_date('31-JUL-11') GROUP BY coll.country ORDER BY num_of_orders DESC
Most, least and avg no of items per stock center order
WITH item_counts AS (SELECT COUNT(colleague_no) items, order_id FROM stock_order_view WHERE order_date BETWEEN to_date('01-AUG-10') AND to_date('31-JUL-11') GROUP BY order_id) SELECT MAX(items) max_items, MIN(items) min_items, ROUND(AVG(items)) avg_items FROM item_counts
Order rows in a resultset
select name,uniquename,seqlen,residues,created_by from cgm_chado.feature order by timelastmodified asc|desc asc => should sort the result by ascending order desc => should sort the result by descending order
No of Colleagues and more
select count(COLLEAGUE_NO) ccount from cgm_ddb.COLLEAGUE;
select LAST_NAME, DATE_MODIFIED, COUNTRY FROM cgm_ddb.COLLEAGUE ORDER BY COUNTRY
This query gives the last name, date modified, and country of all subscribed colleagues.
Select feature types by dictyBase ID
select ncrna.uniquename FROM cgm_chado.feature ncrna join cgm_chado.cvterm nctype on nctype.cvterm_id=ncrna.type_id where nctype.name like '%snoRNA%' OR nctype.name = 'ncRNA' or nctype.name like 'class%RNA' or nctype.name = 'rRNA' or nctype.name = 'snRNA' OR nctype.name LIKE 'RNase%' OR nctype.name LIKE 'SRP%'
Select Feature types (these are all ncRNAs including tRNAs) by DDB ID with gene name, gene products (bundled into one field by comma).
select gene.name, stragg (gp.gene_product) as gene_product, ncrna.uniquename FROM cgm_chado.feature ncrna join cgm_chado.cvterm nctype on nctype.cvterm_id=ncrna.type_id INNER JOIN cgm_chado.feature_relationship fr ON ncrna.feature_id = fr.subject_id INNER JOIN cgm_chado.cvterm rcvt ON rcvt.cvterm_id = fr.type_id INNER JOIN cgm_chado.feature gene ON gene.feature_id = fr.object_id INNER JOIN cgm_ddb.locus_gp lg ON lg.locus_no = gene.feature_id INNER JOIN cgm_ddb.gene_product gp ON lg.gene_product_no = gp.gene_product_no where nctype.name like '%snoRNA%' OR nctype.name = 'ncRNA' OR nctype.name = 'tRNA' or nctype.name like 'class%RNA' or nctype.name = 'rRNA' or nctype.name = 'snRNA' OR nctype.name LIKE 'RNase%' OR nctype.name LIKE 'SRP%' group by gene.name, ncrna.uniquename
Select gene name, gene synonym and protein synonym, DDB_G ID
You can order by gsyn instead of gene.name, or both separated by a comma; this query excludes pseudogenes:
WITH dicty_coding_genes AS ( SELECT dbxref.accession gene_id FROM cgm_chado.feature gene JOIN organism ON organism.organism_id=gene.organism_id JOIN dbxref on dbxref.dbxref_id=gene.dbxref_id JOIN cgm_chado.cvterm gtype on gtype.cvterm_id=gene.type_id JOIN cgm_chado.feature_relationship frel ON frel.object_id=gene.feature_id JOIN cgm_chado.feature mrna ON frel.subject_id=mrna.feature_id JOIN cgm_chado.cvterm mtype ON mtype.cvterm_id=mrna.type_id WHERE gtype.name='gene' AND mtype.name='mRNA' AND organism.common_name = 'dicty' AND gene.name NOT LIKE '%\_ps%' ESCAPE '\' group by dbxref.accession ) SELECT gene.name,wm_concat(syn.name) gsyn,dg.gene_id gene_id FROM cgm_chado.feature gene JOIN cgm_chado.dbxref on gene.dbxref_id=dbxref.dbxref_id JOIN dicty_coding_genes dg on dg.gene_id=dbxref.accession LEFT JOIN cgm_chado.feature_synonym fsyn on gene.feature_id=fsyn.feature_id LEFT JOIN cgm_chado.synonym_ syn on syn.synonym_id=fsyn.SYNONYM_ID group by gene.name,dg.gene_id order by gene.name DESC
To include pseudogenes:
WITH dicty_coding_genes AS ( SELECT dbxref.accession gene_id FROM cgm_chado.feature gene JOIN organism ON organism.organism_id=gene.organism_id JOIN dbxref on dbxref.dbxref_id=gene.dbxref_id JOIN cgm_chado.cvterm gtype on gtype.cvterm_id=gene.type_id JOIN cgm_chado.feature_relationship frel ON frel.object_id=gene.feature_id JOIN cgm_chado.feature mrna ON frel.subject_id=mrna.feature_id JOIN cgm_chado.cvterm mtype ON mtype.cvterm_id=mrna.type_id WHERE gtype.name='gene' AND mtype.name='mRNA' AND organism.common_name = 'dicty' group by dbxref.accession ) SELECT gene.name,wm_concat(syn.name) gsyn,dg.gene_id gene_id FROM cgm_chado.feature gene JOIN cgm_chado.dbxref on gene.dbxref_id=dbxref.dbxref_id JOIN dicty_coding_genes dg on dg.gene_id=dbxref.accession LEFT JOIN cgm_chado.feature_synonym fsyn on gene.feature_id=fsyn.feature_id LEFT JOIN cgm_chado.synonym_ syn on syn.synonym_id=fsyn.SYNONYM_ID group by gene.name,dg.gene_id order by gene.name DESC
Find genes by gene name common root, e.g. _RTE
change 'gene.is_deleted = 0' to gene.is_deleted = 1' to get the deleted genes; works for _TE, or _ps% (% wildcard to get ps1,2 etc).
WITH dicty_genes AS ( SELECT dbxref.accession gene_id,gene.name gene_name FROM cgm_chado.feature gene JOIN cgm_chado.organism ON organism.organism_id=gene.organism_id JOIN cgm_chado.dbxref ON dbxref.dbxref_id=gene.dbxref_id JOIN cgm_chado.cvterm gtype ON gtype.cvterm_id=gene.type_id WHERE gtype.name='gene' AND organism.common_name = 'dicty' AND gene.is_deleted = 0 ) SELECT gene_id,gene_name FROM dicty_genes WHERE gene_name LIKE '%\_RTE' ESCAPE '\'
Find feature types of genes by ID
Add as many IDs as desired.
SELECT dx.accession, stragg(ct.name) as models FROM cgm_chado.v_gene_features g INNER JOIN cgm_chado.feature_relationship fr ON g.feature_id = fr.object_id INNER JOIN cgm_chado.feature f ON fr.subject_id = f.feature_id INNER JOIN cgm_chado.cvterm ct on f.type_id = ct.cvterm_id inner join cgm_chado.dbxref dx on dx.dbxref_id = g.dbxref_id where dx.accession in ( 'DDB_G0271556', 'DDB_G0278875', 'DDB_G0305569' ) group by dx.accession
Genes with community annotations
SELECT f.name FROM cgm_chado.feature f JOIN cgm_chado.FEATUREPROP fp on f.feature_id = fp.feature_id WHERE fp.value like 'Has Wiki Page' ORDER BY f.name
Genes with community annotations on Gene Page (description)
SELECT f.name, p.value as descriptions FROM cgm_chado.featureprop p inner JOIN cgm_chado.v_gene_features l on l.feature_id=p.feature_id JOIN cgm_chado.feature f on f.feature_id = p.feature_id WHERE p.type_id=972 AND p.feature_id in ( SELECT feature_id FROM cgm_chado.FEATUREPROP WHERE value like 'Has Wiki Page' )
Genes with gene description
SELECT f.name, p.value as descriptions FROM cgm_chado.featureprop p inner JOIN cgm_chado.v_gene_features l on l.feature_id=p.feature_id JOIN cgm_chado.feature f on f.feature_id = p.feature_id WHERE p.type_id=972
Return to [Curation To Do]