User:Lindenb/Notebook/UMR915/20100622
From OpenWetWare
myql
variation tables
create table variation(id int unsigned primary key auto_increment, chrom varchar(30) not null, position int unsigned not null, ref varchar(10) not null, alt varchar(255), creation datetime, modification timestamp) engine=InnoDB; create unique index uniq_var on variation(chrom asc,position asc,ref,alt); alter table variation add index(chrom); alter table variation add index(chrom,position);
vcf input
create table vcf_input(id int unsigned primary key auto_increment,fileformat varchar(50) null,filename varchar(255) null,description TEXT,creation datetime,modified timestamp) engine=InnoDB; create table vcf_input_meta(id int unsigned primary key auto_increment,input_id int unsigned not null , propKey varchar(255) ,propValue varchar(255),creation datetime, modified timestamp,index(input_id), foreign key(input_id) references vcf_input(id) on delete cascade) engine=InnoDB;
variant
create table vcf_call( id int unsigned primary key auto_increment,input_id int unsigned not null, variation_id int unsigned not null , sample_id int unsigned not null, consensus_quality float, snp_quality float, rms_quality float, depth int,meta text, creation datetime, modified timestamp, index(input_id), index(variation_id), index(sample_id), foreign key(input_id) references vcf_input(id) on delete cascade, foreign key(variation_id) references variation(id) , foreign key(sample_id) references sample(id) ) engine=InnoDB;
Note about sam2vcf
in sam2vcf.pl
my ($chr,$pos,$ref,$cons,$cons_qual,$snp_qual,$rms_qual,$depth,@items) = split(/\t/,$line);
SAM/PILEUP:
- 0 chr
- 1 pos
- 2 ref
- 3 cons
- 4 cons_qual
- 5 snp_qual
- 6 rms_qual
- 7 depth
and
print $fh_out "$chr\t$pos\t.\t$ref\t$alt\t$snp_qual\t0\t\tGT:GQ:DP\t$gt:$cons_qual:$depth\n";
VCF:
- 0 chr
- 1 pos
- 2 ID="."
- 3 ref
- 4 alt
- 5 snp_qual = "QUAL" (was $5 in pileup)
- 6 empty (filter)
- 7 empty (INFO)
- 8 FORMAT
- 9 CALL
- GT= genotype 0|1 , 1|2
- GQ= genotype qual (was $4 in pileup)
- DP= depth was ($7 in pileup)
inserting SLS2 dindel data in mysql
wrote tool to insert data http://code.google.com/p/code915/source/browse/trunk/tools/src/java/fr/inserm/umr915/tools/InsertVariations.java
grep "#" ./XXXX1_variantCalls.VCF > header.txt grep -v "#" ./XXXX1_variantCalls.VCF | java -jarposfilter.jar -db XXXX_0809_PCCR.bed > jeter.vcf cat header.txt jeter.vcf | java -cp insertvariants.jar:mysql-connector-java-5.1.12-bin.jar fr.inserm.umr915.tools.InsertVariations -s XXX1 -d "XXX" -t vcf
in SLS2: numerous duplicated rows:
select vcf_call.* from vcf_call,vcf_input where vcf_call.input_id=vcf_input.id group by vcf_call.variation_id having count(*)>3; =3515