SQL Queries

From DictyWiki

Jump to: navigation, search

Contents


Notes for Genes containing word string, Public

Replace 994 with 995 for Private Notes. In this example notes containing UTR are searched.

SELECT d.gene_name , p.value FROM cgm_chado.featureprop p JOIN cgm_chado.v_gene_dictybaseid d on 
d.GENE_FEATURE_ID= p.FEATURE_ID WHERE p.type_id=994 AND value like '%UTR%'


Literature, all Annotations, Curator, from Distinct Reference

SELECT * from locus_gene_info where reference_no = 10060

Replace 10060 with any other reference number.
Can also just search for Created_by:

SELECT distinct created_by from locus_gene_info where reference_no = 10060


Genes, Primary IDs from Genes with specific Gene Product

Substitute any gene product for 'TRANSCRIPT%'; the % can be omitted when gene product is distinct.

      SELECT G.GENE_NAME, G.PRIMARY_FEATURE_DICTYBASEID
        FROM CGM_DDB.GENE_PRODUCT GP
  INNER JOIN CGM_DDB.LOCUS_GP LGP
          ON LGP.GENE_PRODUCT_NO = GP.GENE_PRODUCT_NO
  INNER JOIN CGM_CHADO.V_GENE_DICTYBASEID G
          ON LGP.LOCUS_NO = G.GENE_FEATURE_ID
       WHERE UPPER(GP.GENE_PRODUCT) LIKE UPPER('TRANSCRIPT%')
    GROUP BY G.GENE_NAME, G.PRIMARY_FEATURE_DICTYBASEID;


Pseudogene IDs

SELECT feature.uniquename
FROM CGM_CHADO.feature
INNER JOIN CGM_CHADO.cvterm ON cvterm.cvterm_id = feature.TYPE_ID
WHERE cvterm.name = 'pseudogene'
and feature.is_deleted = 0

To get also the Gene Name

SELECT psuedofeat.uniquename psuedogene,genefeat.name gene
FROM CGM_CHADO.v_notdeleted_feature psuedofeat
inner join CGM_CHADO.feature_relationship frel on frel.subject_id=psuedofeat.feature_id
inner join CGM_CHADO.v_notdeleted_feature genefeat on frel.object_id=genefeat.feature_id
inner join CGM_CHADO.cvterm on cvterm.cvterm_id=psuedofeat.type_id
WHERE cvterm.name = 'pseudogene'

Notes for Genes containing word string, Public

Replace 994 with 995 for Private Notes. In this example notes containing UTR are searched.

SELECT d.gene_name , p.value FROM cgm_chado.featureprop p JOIN cgm_chado.v_gene_dictybaseid d on 
d.GENE_FEATURE_ID= p.FEATURE_ID WHERE p.type_id=994 AND value like '%UTR%'


Literature, all Annotations, Curator, from Distinct Reference

SELECT * from locus_gene_info where reference_no = 10060

Replace 10060 with any other reference number.
Can also just search for Created_by:

SELECT distinct created_by from locus_gene_info where reference_no = 10060


Genes, Primary IDs from Genes with specific Gene Product

Substitute any gene product for 'TRANSCRIPT%'; the % can be omitted when gene product is distinct.

      SELECT G.GENE_NAME, G.PRIMARY_FEATURE_DICTYBASEID
        FROM CGM_DDB.GENE_PRODUCT GP
  INNER JOIN CGM_DDB.LOCUS_GP LGP
          ON LGP.GENE_PRODUCT_NO = GP.GENE_PRODUCT_NO
  INNER JOIN CGM_CHADO.V_GENE_DICTYBASEID G
          ON LGP.LOCUS_NO = G.GENE_FEATURE_ID
       WHERE UPPER(GP.GENE_PRODUCT) LIKE UPPER('TRANSCRIPT%')
    GROUP BY G.GENE_NAME, G.PRIMARY_FEATURE_DICTYBASEID;

Genes with community annotations

SELECT f.name 
  FROM cgm_chado.feature f 
  JOIN cgm_chado.FEATUREPROP fp on f.feature_id = fp.feature_id 
 WHERE fp.value like 'Has Wiki Page'
 ORDER BY f.name


Map all primary DDB to GenBank Protein Accession

 select d.accession, f.name, db.name
              from CGM_CHADO.v_primary_feature f
        inner join CGM_CHADO.feature_dbxref fd
                on fd.feature_id = f.feature_id
        inner join CGM_CHADO.dbxref d
                on d.dbxref_id = fd.dbxref_id
        inner join CGM_CHADO.db
                on d.db_id = db.db_id
               and db.name = 'DB:Protein Accession Number';

Genes, DDB IDs with Specific GO terms, created_by

Replace GO ID with any other; delete the 'and created_by..' if not restricted to curator; add 'order by..' after parenthesis to sort by gene_name etc.

SELECT * from cgm_chado.v_gene_dictybaseid 
WHERE gene_feature_id in
	(SELECT distinct locus_no from go_locus_goev WHERE goid = 6118 and created_by='CGM_DDB_PFEY')

Summaries

select fp.FEATURE_ID, p.paragraph_text from cgm_chado.featureprop fp  
JOIN paragraph p on TO_NCHAR(fp.value)= p.paragraph_no
WHERE fp.type_id=984 and written_by ='CGM_DDB_KPIL' and not paragraph_text  like '<summary%><curation_status>%' 
 
and exists
(select * from cgm_chado.featureprop where type_id=984 and value like paragraph_no) order by date_created

Strain Names, Systematic Names, Strain Description where strain name starts with a specific string (example 'JS')

SELECT strain_name, systematic_name, strain_description FROM stock_center WHERE strain_name like 'JS%' or 
systematic_name like 'JS%'

Can also search for specific strains by using 'equals'

SELECT strain_name, systematic_name, strain_description FROM stock_center WHERE strain_name = 'JS65' or 
systematic_name = 'JS65'


Strain Names, Synonyms, Systematic Names, Created_by, Strain Description

SELECT strain_name, stragg( name) as synonyms, systematic_name, created_by, strain_description FROM stock_center  sc
JOIN STRAIN_SYNONYM ss on ss.strain_id = sc.id
JOIN cgm_chado.synonym_ cs on ss.synonym_id= cs.synonym_id 
GROUP BY strain_name, created_by, strain_description, systematic_name
ORDER BY created_by, strain_name

Can also restrict the curator by adding

where created_by ='CGM_DDB_PASC' 

before the GROUP BY statement

Strain Names, Created_by, Strain Description, Strain Characteristics

  SELECT strain_name,   
         systematic_name,
         strain_description,
         stragg( name) as Strain_characteristics
     FROM stock_center s 
     join STRAIN_CHAR_CVTERM sc on s.id = sc.strain_id
     join cgm_chado.CVTERM cv on sc.CVTERM_ID = cv.CVTERM_ID
    where strain_description like '%null%'
    group by strain_name,  systematic_name, strain_description


Strain Names, Systematic Name, Strain Description, by a specific Strain Characteristic

SELECT strain_name,
       systematic_name,
       name,
       strain_description 
  FROM stock_center s 
  JOIN STRAIN_CHAR_CVTERM sc on s.id = sc.strain_id 
  JOIN cgm_chado.CVTERM cv on sc.CVTERM_ID = cv.CVTERM_ID 
 WHERE name ='multiple null mutant'
 ORDER BY strain_name

Strains annotated with a specific environment condition

  SELECT strain_name, systematic_name, name as envt     
  FROM stock_center s    
  JOIN cgm_chado.phenstatement ps  ON s.genotype_id = ps.genotype_id 
  JOIN cgm_chado.cvterm cv ON cv.CVTERM_ID = ps.ENVIRONMENT_ID
  WHERE name ='in suspension'

Strains annotated with a specific assay

 SELECT strain_name, systematic_name, name as assay  
  FROM stock_center s    
JOIN cgm_chado.phenstatement ph on s.genotype_id = ph.genotype_id 
JOIN cgm_chado.phenotype p ON ph.phenotype_id = p.phenotype_id
  JOIN cgm_chado.cvterm cv ON cv.CVTERM_ID = p.assay_ID
  WHERE name ='electron microscopy'


Return to [Curation To Do]

Personal tools