SQL Queries
From DictyWiki
Notes for Genes containing word string, Public
Replace 994 with 995 for Private Notes. In this example notes containing UTR are searched.
SELECT d.gene_name , p.value FROM cgm_chado.featureprop p JOIN cgm_chado.v_gene_dictybaseid d on d.GENE_FEATURE_ID= p.FEATURE_ID WHERE p.type_id=994 AND value like '%UTR%'
Literature, all Annotations, Curator, from Distinct Reference
SELECT * from locus_gene_info where reference_no = 10060
Replace 10060 with any other reference number.
Can also just search for Created_by:
SELECT distinct created_by from locus_gene_info where reference_no = 10060
Genes, Primary IDs from Genes with specific Gene Product
Substitute any gene product for 'TRANSCRIPT%'; the % can be omitted when gene product is distinct.
SELECT G.GENE_NAME, G.PRIMARY_FEATURE_DICTYBASEID
FROM CGM_DDB.GENE_PRODUCT GP
INNER JOIN CGM_DDB.LOCUS_GP LGP
ON LGP.GENE_PRODUCT_NO = GP.GENE_PRODUCT_NO
INNER JOIN CGM_CHADO.V_GENE_DICTYBASEID G
ON LGP.LOCUS_NO = G.GENE_FEATURE_ID
WHERE UPPER(GP.GENE_PRODUCT) LIKE UPPER('TRANSCRIPT%')
GROUP BY G.GENE_NAME, G.PRIMARY_FEATURE_DICTYBASEID;
Pseudogene IDs
SELECT feature.uniquename FROM CGM_CHADO.feature INNER JOIN CGM_CHADO.cvterm ON cvterm.cvterm_id = feature.TYPE_ID WHERE cvterm.name = 'pseudogene' and feature.is_deleted = 0
To get also the Gene Name
SELECT psuedofeat.uniquename psuedogene,genefeat.name gene FROM CGM_CHADO.v_notdeleted_feature psuedofeat inner join CGM_CHADO.feature_relationship frel on frel.subject_id=psuedofeat.feature_id inner join CGM_CHADO.v_notdeleted_feature genefeat on frel.object_id=genefeat.feature_id inner join CGM_CHADO.cvterm on cvterm.cvterm_id=psuedofeat.type_id WHERE cvterm.name = 'pseudogene'
Notes for Genes containing word string, Public
Replace 994 with 995 for Private Notes. In this example notes containing UTR are searched.
SELECT d.gene_name , p.value FROM cgm_chado.featureprop p JOIN cgm_chado.v_gene_dictybaseid d on d.GENE_FEATURE_ID= p.FEATURE_ID WHERE p.type_id=994 AND value like '%UTR%'
Literature, all Annotations, Curator, from Distinct Reference
SELECT * from locus_gene_info where reference_no = 10060
Replace 10060 with any other reference number.
Can also just search for Created_by:
SELECT distinct created_by from locus_gene_info where reference_no = 10060
Genes, Primary IDs from Genes with specific Gene Product
Substitute any gene product for 'TRANSCRIPT%'; the % can be omitted when gene product is distinct.
SELECT G.GENE_NAME, G.PRIMARY_FEATURE_DICTYBASEID
FROM CGM_DDB.GENE_PRODUCT GP
INNER JOIN CGM_DDB.LOCUS_GP LGP
ON LGP.GENE_PRODUCT_NO = GP.GENE_PRODUCT_NO
INNER JOIN CGM_CHADO.V_GENE_DICTYBASEID G
ON LGP.LOCUS_NO = G.GENE_FEATURE_ID
WHERE UPPER(GP.GENE_PRODUCT) LIKE UPPER('TRANSCRIPT%')
GROUP BY G.GENE_NAME, G.PRIMARY_FEATURE_DICTYBASEID;
Genes with community annotations
SELECT f.name FROM cgm_chado.feature f JOIN cgm_chado.FEATUREPROP fp on f.feature_id = fp.feature_id WHERE fp.value like 'Has Wiki Page' ORDER BY f.name
Map all primary DDB to GenBank Protein Accession
select d.accession, f.name, db.name
from CGM_CHADO.v_primary_feature f
inner join CGM_CHADO.feature_dbxref fd
on fd.feature_id = f.feature_id
inner join CGM_CHADO.dbxref d
on d.dbxref_id = fd.dbxref_id
inner join CGM_CHADO.db
on d.db_id = db.db_id
and db.name = 'DB:Protein Accession Number';
Genes, DDB IDs with Specific GO terms, created_by
Replace GO ID with any other; delete the 'and created_by..' if not restricted to curator; add 'order by..' after parenthesis to sort by gene_name etc.
SELECT * from cgm_chado.v_gene_dictybaseid WHERE gene_feature_id in (SELECT distinct locus_no from go_locus_goev WHERE goid = 6118 and created_by='CGM_DDB_PFEY')
Summaries
select fp.FEATURE_ID, p.paragraph_text from cgm_chado.featureprop fp JOIN paragraph p on TO_NCHAR(fp.value)= p.paragraph_no WHERE fp.type_id=984 and written_by ='CGM_DDB_KPIL' and not paragraph_text like '<summary%><curation_status>%' and exists (select * from cgm_chado.featureprop where type_id=984 and value like paragraph_no) order by date_created
Strain Names, Systematic Names, Strain Description where strain name starts with a specific string (example 'JS')
SELECT strain_name, systematic_name, strain_description FROM stock_center WHERE strain_name like 'JS%' or systematic_name like 'JS%'
Can also search for specific strains by using 'equals'
SELECT strain_name, systematic_name, strain_description FROM stock_center WHERE strain_name = 'JS65' or systematic_name = 'JS65'
Strain Names, Synonyms, Systematic Names, Created_by, Strain Description
SELECT strain_name, stragg( name) as synonyms, systematic_name, created_by, strain_description FROM stock_center sc JOIN STRAIN_SYNONYM ss on ss.strain_id = sc.id JOIN cgm_chado.synonym_ cs on ss.synonym_id= cs.synonym_id GROUP BY strain_name, created_by, strain_description, systematic_name ORDER BY created_by, strain_name
Can also restrict the curator by adding
where created_by ='CGM_DDB_PASC'
before the GROUP BY statement
Strain Names, Created_by, Strain Description, Strain Characteristics
SELECT strain_name,
systematic_name,
strain_description,
stragg( name) as Strain_characteristics
FROM stock_center s
join STRAIN_CHAR_CVTERM sc on s.id = sc.strain_id
join cgm_chado.CVTERM cv on sc.CVTERM_ID = cv.CVTERM_ID
where strain_description like '%null%'
group by strain_name, systematic_name, strain_description
Strain Names, Systematic Name, Strain Description, by a specific Strain Characteristic
SELECT strain_name,
systematic_name,
name,
strain_description
FROM stock_center s
JOIN STRAIN_CHAR_CVTERM sc on s.id = sc.strain_id
JOIN cgm_chado.CVTERM cv on sc.CVTERM_ID = cv.CVTERM_ID
WHERE name ='multiple null mutant'
ORDER BY strain_name
Strains annotated with a specific environment condition
SELECT strain_name, systematic_name, name as envt FROM stock_center s JOIN cgm_chado.phenstatement ps ON s.genotype_id = ps.genotype_id JOIN cgm_chado.cvterm cv ON cv.CVTERM_ID = ps.ENVIRONMENT_ID WHERE name ='in suspension'
Strains annotated with a specific assay
SELECT strain_name, systematic_name, name as assay FROM stock_center s JOIN cgm_chado.phenstatement ph on s.genotype_id = ph.genotype_id JOIN cgm_chado.phenotype p ON ph.phenotype_id = p.phenotype_id JOIN cgm_chado.cvterm cv ON cv.CVTERM_ID = p.assay_ID WHERE name ='electron microscopy'
Return to [Curation To Do]
