predictosaurus 0.9.2

Uncertainty aware haplotype based genomic variant effect prediction
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of structural variant (inclusive, 1-based).">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
##contig=<ID=OX512233.1,length=29890>
##INFO=<ID=ALT_INDEL_OPERATIONS,Number=.,Type=Integer,Description="Varlociraptor observations (binary encoded, meant for internal use only).">
##varlociraptor_preprocess_args={"Preprocess":{"kind":{"Variants":{"reference":"results/orthanq/candidates/reference_genome/ncbi_dataset/data/genomic.fna","candidates":"results/orthanq/candidates/candidates.vcf","bam":"results/orthanq/preprocess/Sample_sorted.bam","report_fragment_ids":true,"atomic_candidate_variants":true,"omit_mapq_adjustment":true,"reference_buffer_size":10,"min_bam_refetch_distance":1,"alignment_properties":null,"output":"results/orthanq/preprocess/Sample_obs.bcf","propagate_info_fields":[],"protocol_strandedness":"Opposite","realignment_window":64,"max_depth":200,"omit_insert_size":false,"pairhmm_mode":"exact","log_mode":"default","output_raw_observations":null}}}}
##varlociraptor_observation_format_version=15
##INFO=<ID=PROB_PRESENT,Number=A,Type=Float,Description="Posterior probability for event present (PHRED)">
##INFO=<ID=PROB_ARTIFACT,Number=A,Type=Float,Description="Posterior probability for any artifact, indicated by strand, read position, read orientation, softclip bias, or divindel bias (PHRED). See the bias specific records below for an explanation for each type of bias.">
##INFO=<ID=PROB_ABSENT,Number=A,Type=Float,Description="Posterior probability for not having a variant (PHRED)">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Expected sequencing depth, while considering mapping uncertainty">
##FORMAT=<ID=AF,Number=A,Type=Float,Description="Maximum a posteriori probability estimate of allele frequency">
##FORMAT=<ID=SAOBS,Number=A,Type=String,Description="Summary of simplified observations favoring the ALT allele (has to be considered together with SROBS). Each entry is encoded as CB, with C being a count, B being the posterior odds for the alt allele. The provided letter denotes an extended Kass Raftery score: B=barely, P=positive, S=strong, V=very strong (lower case if probability for correct mapping of fragment is <95%). Note that we extend Kass Raftery scores with a term for equality between the evidence of the two alleles (E=equal). Further note that there is no N=none score, as such observations occur with an opposite direction score (odds for the reference or a third allele) in the SROBS field.">
##FORMAT=<ID=SROBS,Number=A,Type=String,Description="Summary of simplified observations favoring the reference or a third allele (has to be considered together with SAOBS). Each entry is encoded as CB, with C being a count, B being the posterior odds for the reference or a third allele. The latter denotes an extended Kass Raftery score: E=equal, B=barely, P=positive, S=strong, V=very strong (lower case if probability for correct mapping of fragment is <95%).">
##FORMAT=<ID=OBS,Number=A,Type=String,Description="Summary of observations. Each entry is encoded as CBDTASOPXI, with C being a count, and B being the posterior odds for the alt or the reference allele. The latter are given as a two letter code. The first letter (`A` or `R`) defines whether the odds favor the alt allele (`A`) or any other allele including the reference allele `R`. The second letter denotes an extended Kass Raftery score: N=none, E=equal, B=barely, P=positive, S=strong, V=very strong (lower case if probability for correct mapping of fragment is <95%). Note that we extend Kass Raftery scores with a term for equality between the evidence of the two alleles (E=equal). D denotes the edit distance to the ALT allele in case it is higher than what could be expected from sequencing errors (in that case, Varlociraptor derives a third allele from the read sequence and considers that as an alternative to the alt allele, instead of the reference allele), T being the type of alignment, encoded as s=single end and p=paired end, A denoting whether the observations also map to an alternative locus (# = most found alternative locus, * = other locus, . = no locus), S being the strand that supports the observation (+, -, or * for both), O being the read orientation (> = F1R2, < = F2R1, * = unknown, ! = non standard, e.g. R1F2), P being the read position (^ = most found read position, * = any other position or position is irrelevant), X denoting whether the respective alignments entail a softclip ($ = softclip, . = no soft clip), and I denoting indel operations in the respective alignments against the alt allele (* = some indel, . = no indel or information irrelevant for variant type).">
##FORMAT=<ID=OOBS,Number=A,Type=Integer,Description="Number of omitted observations. For SNVs and MNVs, read pairs are omitted if they have a non-standard read orientation (neither F1R2 nor F2R1) as those can frequently lead to alignment artifacts.">
##FORMAT=<ID=SB,Number=A,Type=String,Description="Strand bias estimate: + indicates that ALT allele is associated with forward strand, - indicates that ALT allele is associated with reverse strand, . indicates no strand bias. Strand bias is indicative for systematic sequencing errors. Probability for strand bias is captured by the ARTIFACT event (PROB_ARTIFACT).">
##FORMAT=<ID=ROB,Number=A,Type=String,Description="Read orientation bias estimate: > indicates that ALT allele is associated with F1R2 orientation, < indicates that ALT allele is associated with F2R1 orientation, . indicates no read orientation bias. Read orientation bias is indicative of Guanin oxidation artifacts. Probability for read orientation bias is captured by the ARTIFACT event (PROB_ARTIFACT).">
##FORMAT=<ID=RPB,Number=A,Type=String,Description="Read position bias estimate: ^ indicates that ALT allele is associated with the most found read position, . indicates that there is no read position bias. Read position bias is indicative of systematic sequencing errors, e.g. in a specific cycle. Probability for read position bias is captured by the ARTIFACT event (PROB_ARTIFACT).">
##FORMAT=<ID=SCB,Number=A,Type=String,Description="Softclip bias estimate: $ indicates that ALT allele is associated with with softclips in the same alignment, . indicates that there is no softclip bias. Softclip bias is indicative of systematic alignment errors, cause by a part of the read that does not properly align to the reference (and is thus soft clipped). Note that softclips can also be caused by structural variants. However, structural variants on the same haplotype as e.g. an SNV should not cause a softclip bias, because there will usually still be reads that do not reach the SV, thereby providing evidence against a softclip bias. Probability for softclip bias is captured by the ARTIFACT event (PROB_ARTIFACT).">
##FORMAT=<ID=HE,Number=A,Type=String,Description="Homopolymer error estimate: * indicates that ALT allele is associated with with homopolymer indel operations of varying length, . indicates that there is no homopolymer error. Homopolymer error is indicative of systematic PCR amplification errors. Probability for such homopolymer artifacts is captured by the ARTIFACT event (PROB_ARTIFACT).">
##FORMAT=<ID=ALB,Number=A,Type=String,Description="Alt locus bias estimate: * indicates that ALT allele is systematically associated with either MAPQs smaller than the maximum MAPQ or a major alternative alignment (XA tag) reported by the used read mapper. This would be indicative of ALT reads actually coming from another locus (e.g. some repeat, a homology, a distant variant allele, or a CNV). Probability for alt locus bias is captured by the ARTIFACT event (PROB_ARTIFACT).">
##FORMAT=<ID=AFD,Number=.,Type=String,Description="Sampled posterior probability densities of allele frequencies in PHRED scale (the smaller the higher, with 0 being equal to an unscaled probability of 1). In the discrete case (no somatic mutation rate or continuous universe in the scenario), these can be seen as posterior probabilities. Note that densities can be greater than one.">
##bcftools_viewVersion=1.19+htslib-1.19.1
##bcftools_viewCommand=view tests/resources/calls.bcf; Date=Thu Mar 28 10:56:22 2024
#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	sample
OX512233.1	61	0	G	T	.	.	PROB_ABSENT=0.0360974;PROB_PRESENT=20.8211;PROB_ARTIFACT=inf	DP:AF:SAOBS:SROBS:OBS:OOBS:SB:ROB:RPB:SCB:HE:ALB:AFD	44:0:.:34V10v:34RV.p.+**..10rv.p.+**..:0:.:.:.:.:.:.:0.000=-2.97,0.023=1.42,0.025=1.92,0.028=2.42,0.030=2.92,0.038=4.43,0.040=4.90,0.050=6.90,0.053=7.48,0.060=8.92,0.084=13.75,0.145=26.93,0.267=56.39,0.511=133.86,0.756=266.28,1.000=1862.76
OX512233.1	64	1	C	T	.	.	PROB_ABSENT=0.0360965;PROB_PRESENT=20.8212;PROB_ARTIFACT=inf	DP:AF:SAOBS:SROBS:OBS:OOBS:SB:ROB:RPB:SCB:HE:ALB:AFD	44:0:.:34V10v:34RV.p.+**..10rv.p.+**..:0:.:.:.:.:.:.:0.000=-2.97,0.023=1.42,0.025=1.92,0.028=2.42,0.030=2.92,0.038=4.43,0.040=4.90,0.050=6.90,0.053=7.48,0.060=8.92,0.084=13.75,0.145=26.93,0.267=56.39,0.511=133.86,0.756=266.28,1.000=1867.87
OX512233.1	66	2	C	A	.	.	PROB_ABSENT=0.0331666;PROB_PRESENT=21.1874;PROB_ARTIFACT=inf	DP:AF:SAOBS:SROBS:OBS:OOBS:SB:ROB:RPB:SCB:HE:ALB:AFD	48:0:.:38V10v:38RV.p.+**..10rv.p.+**..:0:.:.:.:.:.:.:0.000=-2.98,0.021=1.41,0.023=1.95,0.026=2.50,0.028=3.05,0.036=4.69,0.038=5.20,0.048=7.38,0.051=8.03,0.058=9.58,0.082=14.86,0.143=29.25,0.266=61.38,0.510=145.89,0.755=290.35,1.000=2020.22
OX512233.1	82	3	A	G	.	.	PROB_ABSENT=0.0275519;PROB_PRESENT=21.9901;PROB_ARTIFACT=inf	DP:AF:SAOBS:SROBS:OBS:OOBS:SB:ROB:RPB:SCB:HE:ALB:AFD	58:0:.:48V10v:48RV.p.+**..10rv.p.+**..:0:.:.:.:.:.:.:0.000=-2.98,0.017=1.40,0.020=2.05,0.022=2.71,0.025=3.37,0.033=5.36,0.035=5.97,0.045=8.59,0.048=9.39,0.055=11.24,0.079=17.65,0.140=35.03,0.263=73.85,0.509=175.97,0.754=350.53,1.000=2443.17