column_name	dtype	unit	source_column	description	example	in_main_parquet	in_sqlite_no_structure	in_sqlite_structures
dsRNA_id	string		_derived_	Per-chromosome dsRNA identifier of the form 'dsRNA_chrN_M'. M is the 1-based dsRNA index within the chromosome, sorted by (i_start, j_start).	dsRNA_chr1_1	yes	yes	yes
chr	string		er_chr	Chromosome (e.g., 'chr1', 'chrX').	chr1	yes	yes	yes
strand	string		er_strand	Strand of the dsRNA: '+' or '-'.	-	yes	yes	yes
start	int32	bp	_derived_	Overall dsRNA span start = min(i_start, j_start). 1-based, inclusive (GFF3 convention).	10195	yes	yes	yes
end	int32	bp	_derived_	Overall dsRNA span end = max(i_end, j_end). 1-based, inclusive (GFF3 convention).	19583	yes	yes	yes
i_start	int32	bp	er_i_start	Start coordinate of the i-arm (5' arm relative to dsRNA). 1-based, inclusive.	10195	yes	yes	yes
i_end	int32	bp	er_i_end	End coordinate of the i-arm. 1-based, inclusive.	10512	yes	yes	yes
j_start	int32	bp	er_j_start	Start coordinate of the j-arm (3' arm relative to dsRNA). 1-based, inclusive.	19260	yes	yes	yes
j_end	int32	bp	er_j_end	End coordinate of the j-arm. 1-based, inclusive.	19583	yes	yes	yes
i_length	int32	nt	i_length	Length of the i-arm in nucleotides.	317	yes	yes	yes
j_length	int32	nt	j_length	Length of the j-arm in nucleotides.	323	yes	yes	yes
loop_length	int32	nt	loop_length	Distance between the two arms (j_start - i_end - 1).	8748	yes	yes	yes
energy_kcal_mol	float32	kcal/mol	er_energy	Free energy of formation of the predicted duplex (RNAduplex).	-270.1	yes	yes	yes
percent_paired	float32	%	percent_paired	Percentage of nucleotides in the duplex that are base-paired.	71.03	yes	yes	yes
longest_helix	int32	bp	bp_count	Length of the longest contiguous base-paired helix in the dsRNA.	11	yes	yes	yes
length_category	string		category	Length-based bin: '30-40 nt', '40-300 nt', or '> 300 nt'.	> 300 nt	yes	yes	yes
i_gene_name	string		i_gene_name	Gene symbol(s) overlapping the i-arm (comma-separated). 'NA' if intergenic.	DDX11L2	yes	yes	yes
j_gene_name	string		j_gene_name	Gene symbol(s) overlapping the j-arm (comma-separated). 'NA' if intergenic.	OR4F5	yes	yes	yes
i_gene_id	string		i_gene_id	Ensembl gene/transcript ID(s) overlapping the i-arm. 'NA' if intergenic.	ENST00000456328.2	yes	yes	yes
j_gene_id	string		j_gene_id	Ensembl gene/transcript ID(s) overlapping the j-arm. 'NA' if intergenic.	ENST00000641515.2	yes	yes	yes
genic_intergenic	string		genic	Whether the dsRNA overlaps any gene: 'Genic' or 'Intergenic'.	Intergenic	yes	yes	yes
repetitive	string		repetitive	Whether either arm overlaps a repetitive element: 'Repetitive' or 'Non-Repetitive'.	Repetitive	yes	yes	yes
alu	string		alu	Whether either arm overlaps an Alu element: 'Alu' or 'Non-Alu'.	Non-Alu	yes	yes	yes
i_repetitive_element	string		features_arm1	Repeat family/name(s) overlapping the i-arm (comma-separated). 'FALSE' if none.	(TAACCC)n,TAR1	yes	yes	yes
j_repetitive_element	string		features_arm2	Repeat family/name(s) overlapping the j-arm (comma-separated). 'FALSE' if none.	FALSE	yes	yes	yes
Editing	string		Editing	Editing status of the dsRNA: 'Edited' (at least one A-to-I site detected on either arm) or 'Unedited'.	Unedited	yes	yes	yes
stranded_editing_i_sites	int32	sites	red_i_hits	Number of stranded A-to-I editing sites on the i-arm (REDIportal, strand-aware).	0	yes	yes	yes
stranded_editing_j_sites	int32	sites	red_j_hits	Number of stranded A-to-I editing sites on the j-arm.	0	yes	yes	yes
unstranded_editing_i_sites	int32	sites	i_sorted	Number of unstranded A-to-I editing sites on the i-arm (REDIportal, strand-agnostic).	0	yes	yes	yes
unstranded_editing_j_sites	int32	sites	j_sorted	Number of unstranded A-to-I editing sites on the j-arm.	0	yes	yes	yes
stranded_editing_sites	int32	sites	_derived_	Total stranded editing sites = stranded_editing_i_sites + stranded_editing_j_sites.	0	yes	yes	yes
unstranded_editing_sites	int32	sites	_derived_	Total unstranded editing sites = unstranded_editing_i_sites + unstranded_editing_j_sites.	0	yes	yes	yes
i_phast100	float32		i_phast100	PhastCons 100-vertebrate conservation score for the i-arm (range 0-1).	0.0	yes	yes	yes
j_phast100	float32		j_phast100	PhastCons 100-vertebrate conservation score for the j-arm.	0.0085	yes	yes	yes
i_phast17	float32		i_phast17	PhastCons 17-primate conservation score for the i-arm.	0.0	yes	yes	yes
j_phast17	float32		j_phast17	PhastCons 17-primate conservation score for the j-arm.	0.0672	yes	yes	yes
i_phyp100	float32		i_phyp100	PhyloP 100-vertebrate conservation score for the i-arm.	0.0	yes	yes	yes
j_phyp100	float32		j_phyp100	PhyloP 100-vertebrate conservation score for the j-arm.	-0.1841	yes	yes	yes
i_phyp17	float32		i_phyp17	PhyloP 17-primate conservation score for the i-arm.	0.0	yes	yes	yes
j_phyp17	float32		j_phyp17	PhyloP 17-primate conservation score for the j-arm.	0.056	yes	yes	yes
gtex_model_score	float32		pred_prob_editing_gtex_alu100pct	GTEx editing-prediction model score (YDF, trained on Alu+nonAlu, 100% Alu fraction). High-confidence threshold: 0.2513.	0.10172017	yes	yes	yes
stability_model_score	float32		pred_prob_editing_structure_only_alu100pct	Structure-only stability-prediction model score (YDF, no GTEx expression features). High-confidence threshold: 0.2471.	0.026494239	yes	yes	yes
structure_probing_score	float32		pred_3utr_All_power_weighted_advantage	Power-weighted advantage score from the structure-probing 3'UTR model. High-confidence threshold: 0.0315 (raw scale), equivalent to 0.4574 after min-max normalization.	-0.010320702	yes	yes	yes
gtex_high_conf	bool		_derived_	True if gtex_model_score >= 0.2513.	False	yes	yes	yes
stability_high_conf	bool		_derived_	True if stability_model_score >= 0.2471.	False	yes	yes	yes
structure_probing_high_conf	bool		_derived_	True if structure_probing_score >= 0.0315 (raw scale).	False	yes	yes	yes
n_models_high_conf	int8		_derived_	Number of the three ML models calling this dsRNA high-confidence (0-3).	0	yes	yes	yes
gtex_confidence_label	string		Confidence	Pre-computed string label from the upstream pipeline: 'High Confidence' or 'Low Confidence'. Mostly agrees with gtex_high_conf; use that boolean for filtering and this label for cross-check.	High Confidence	yes	yes	yes
stability_confidence_label	string		NoGTExConfidence	Pre-computed string label from the upstream pipeline for the stability (no-GTEx) model. Mostly agrees with stability_high_conf.	Low Confidence	yes	yes	yes
sequence	string		_join_struct_:sequence	RNA sequence of the dsRNA. i-arm and j-arm joined by '&'. U used in place of T.	(see extended parquet)	no	no	yes
predicted_structure	string		_join_struct_:predicted_structure	Dot-bracket secondary structure annotation matching the sequence string. i-arm and j-arm joined by '&'.	(see extended parquet)	no	no	yes
