1pub mod fastx {
9 use std::collections::HashMap;
10 use std::fs::File;
11 use std::io::{BufRead, BufReader, Write};
12
13 #[derive(Debug)]
15 pub struct NameWithRange {
16 pub name: String,
18 pub start: usize,
20 pub end: usize,
22 }
23
24 fn clamp_slice(s: &str, start: usize, end: usize) -> &str {
25 let real_start = start.min(end).min( s.len() );
26 let real_end = end.min( s.len() );
27 &s[real_start..real_end]
28 }
29
30 pub trait FastxRecord: Clone {
32 fn get_index(&self) -> u32;
34 fn get_sequence(&self) -> &str;
36 fn subsequence(&self, start: usize, end: usize) -> Self;
38 fn format_output(&self, name: &str) -> String;
40 }
41
42 #[derive(Clone, Debug)]
44 pub struct IndexedSequence {
45 original_index_: u32,
46 sequence_: String,
47 }
48
49 impl IndexedSequence {
50 pub fn new(original_index: u32, sequence: &str) -> IndexedSequence {
57 IndexedSequence {
58 original_index_: original_index,
59 sequence_: sequence.to_string(),
60 }
61 }
62
63 }
64
65 impl FastxRecord for IndexedSequence {
66 fn get_index(&self) -> u32 { self.original_index_ }
67
68 fn get_sequence(&self) -> &str { &self.sequence_ }
69
70 fn subsequence(&self, start: usize, end: usize) -> IndexedSequence {
71 IndexedSequence::new( self.original_index_, clamp_slice(&self.sequence_, start, end) )
72 }
73
74 fn format_output(&self, name: &str) -> String {
75 format!("{name}\n{}\n", self.sequence_)
76 }
77 }
78
79 #[derive(Clone, Debug)]
81 pub struct IndexedSequenceWithQuality {
82 original_index_: u32,
83 quality_scores_: String,
84 sequence_: String,
85 }
86
87 impl IndexedSequenceWithQuality {
88 pub fn new(original_index: u32, quality_scores: &str, sequence: &str) -> Result<IndexedSequenceWithQuality, String> {
100 if quality_scores.len() != sequence.len() {
101 return Err( "Quality scores must be the same length as sequence".to_string() )
102 }
103 Ok(
104 IndexedSequenceWithQuality {
105 original_index_: original_index,
106 quality_scores_: quality_scores.to_string(),
107 sequence_: sequence.to_string(),
108 }
109 )
110 }
111
112 pub fn get_quality_scores(&self) -> &str { &self.quality_scores_ }
114 }
115
116 impl FastxRecord for IndexedSequenceWithQuality {
117 fn get_index(&self) -> u32 { self.original_index_ }
118
119 fn get_sequence(&self) -> &str { &self.sequence_ }
120
121 fn subsequence(&self, start: usize, end: usize) -> IndexedSequenceWithQuality {
122 IndexedSequenceWithQuality::new(
123 self.original_index_,
124 clamp_slice(&self.quality_scores_, start, end),
125 clamp_slice(&self.sequence_, start, end),
126 ).unwrap()
127 }
128
129 fn format_output(&self, name: &str) -> String {
130 format!("{name}\n{}\n+\n{}\n", self.sequence_, self.quality_scores_)
131 }
132 }
133
134 #[derive(Debug)]
136 pub struct FastxRecords<T> {
137 records_: HashMap<String, T>,
138 max_sequence_length_: usize,
139 }
140
141 impl<T: FastxRecord> FastxRecords<T> {
142 pub fn num_records(&self) -> usize { self.records_.len() }
144 pub fn get_max_length(&self) -> usize { self.max_sequence_length_ }
146
147 pub fn records_by_name(&self, names: Vec<String>) -> (FastxRecords<T>, String) {
161 let mut subset: HashMap<String, T> = HashMap::new();
162 let mut absent_names = String::new();
163 let mut current_max_length: usize = 0;
164 for name in names {
165 if let Some(record) = self.records_.get(&name) {
166 subset.insert( name, record.clone() );
167 current_max_length = current_max_length.max( record.get_sequence().len() );
168 } else {
169 if !absent_names.is_empty() { absent_names.push('\n'); }
170 absent_names.push_str(&name);
171 }
172 }
173 (
174 FastxRecords {
175 records_: subset,
176 max_sequence_length_: current_max_length
177 },
178 absent_names
179 )
180 }
181
182 pub fn subsequences(&self, start: usize, end: usize) -> FastxRecords<T> {
194 let mut subset: HashMap<String, T> = HashMap::new();
195 let mut current_max_length: usize = 0;
197 for (name, record) in &self.records_ {
198 let local_subsequence = record.subsequence(start, end);
199 current_max_length = current_max_length.max( local_subsequence.get_sequence().len() );
200 subset.insert(name.clone(), local_subsequence);
201 }
202 FastxRecords { records_: subset, max_sequence_length_: current_max_length }
203 }
204
205 pub fn subsequences_by_name(&self, names_ranges: Vec<NameWithRange>) -> (FastxRecords<T>, String) {
219 let mut subset: HashMap<String, T> = HashMap::new();
220 let mut absent_names = String::new();
221 let mut current_max_length: usize = 0;
222 for name_range in names_ranges {
223 if let Some(record) = self.records_.get(&name_range.name) {
224 let local_subsequence = record.subsequence(name_range.start, name_range.end);
225 current_max_length = current_max_length.max( local_subsequence.get_sequence().len() );
226 subset.insert(name_range.name, local_subsequence);
227 } else {
228 if !absent_names.is_empty() { absent_names.push('\n'); }
229 absent_names.push_str(&name_range.name);
230 }
231 }
232 (FastxRecords { records_: subset, max_sequence_length_: current_max_length }, absent_names)
233 }
234
235 pub fn merge(&mut self, from: FastxRecords<T>) {
244 self.max_sequence_length_ = self.max_sequence_length_.max(from.max_sequence_length_);
245 self.records_.extend(from.records_);
246 }
247
248 pub fn save_records(&self, output_path: &str) -> Result<(), String> {
259 let mut file = File::create(output_path)
260 .map_err( |error| error.to_string() )?;
261 for (name, record) in &self.records_ {
262 file.write_all( record.format_output(name).as_bytes() )
263 .map_err( |error| error.to_string() )?;
264 }
265 Ok( () )
266 }
267
268 pub fn save_sorted_records(&self, output_path: &str) -> Result<(), String> {
279 let mut file = File::create(output_path)
280 .map_err( |error| error.to_string() )?;
281 let mut sorted: Vec<(&String, &T)> = self.records_.iter().collect();
282 sorted.sort_by_key( |(_, record)| record.get_index() );
283 for (name, record) in sorted {
284 file.write_all( record.format_output(name).as_bytes() )
285 .map_err( |error| error.to_string() )?;
286 }
287 Ok( () )
288 }
289 }
290
291 pub fn read_fasta(fasta_path: &str) -> Result<FastxRecords<IndexedSequence>, String> {
303 let mut local_records: HashMap<String, IndexedSequence> = HashMap::new();
304 let mut current_header = String::new();
305 let mut current_sequence = String::new();
306 let mut current_max_length: usize = 0;
307 let file = File::open(fasta_path)
308 .map_err( |error| error.to_string() )?;
309 let mut record_idx: u32 = 0;
310 for line in BufReader::new(file).lines() {
311 let line = line.map_err( |error| error.to_string() )?;
312 if line.starts_with('>') {
313 if !current_header.is_empty() && !current_sequence.is_empty() {
314 local_records.insert( current_header.clone(), IndexedSequence::new(record_idx, ¤t_sequence) );
315 current_max_length = current_max_length.max( current_sequence.len() );
316 record_idx += 1;
317 }
318 current_header = line;
319 current_sequence.clear();
320 continue;
321 }
322 current_sequence.push_str(&line);
323 }
324 if !current_header.is_empty() && !current_sequence.is_empty() {
325 local_records.insert(
326 current_header.clone(),
327 IndexedSequence::new(record_idx, ¤t_sequence)
328 );
329 current_max_length = current_max_length.max( current_sequence.len() );
330 }
331 if local_records.is_empty() {
332 return Err( format!("No valid FASTA records in {fasta_path} file") )
333 }
334 Ok( FastxRecords { records_: local_records, max_sequence_length_: current_max_length } )
335 }
336
337 pub fn read_fastq(fastq_path: &str) -> Result<FastxRecords<IndexedSequenceWithQuality>, String> {
350 let mut local_records: HashMap<String, IndexedSequenceWithQuality> = HashMap::new();
351 let file = File::open(fastq_path)
352 .map_err( |error| error.to_string() )?;
353 let mut record_idx: u32 = 0;
354 let mut current_max_length: usize = 0;
355 let mut lines = BufReader::new(file).lines();
356 loop {
357 let header = match lines.next() {
358 None => break,
359 Some(l) => l.map_err( |error| error.to_string() )?,
360 };
361 if header.is_empty() { continue; }
362 if !header.starts_with('@') {
363 return Err( format!("Expected '@' header line, got: {header}") );
364 }
365 let sequence = lines.next()
366 .ok_or_else( || format!("Missing sequence line after header: {header}") )?
367 .map_err( |error| error.to_string() )?;
368 let plus = lines.next()
369 .ok_or_else( || format!("Missing '+' line after sequence in record: {header}") )?
370 .map_err( |error| error.to_string() )?;
371 if !plus.starts_with('+') {
372 return Err( format!("Expected '+' separator line, got: {plus} in record: {header}") );
373 }
374 let quality = lines.next()
375 .ok_or_else( || format!("Missing quality line after '+' in record: {header}") )?
376 .map_err( |error| error.to_string() )?;
377 current_max_length = current_max_length.max( sequence.len() );
378 local_records.insert(
379 header.clone(),
380 IndexedSequenceWithQuality::new(record_idx, &quality, &sequence)
381 .map_err( |error| format!("{error} in record: {header}") )?
382 );
383 record_idx += 1;
384 }
385 if local_records.is_empty() {
386 return Err( format!("No valid FASTQ records in {fastq_path} file") );
387 }
388 Ok( FastxRecords { records_: local_records, max_sequence_length_: current_max_length } )
389 }
390}
391
392#[cfg(test)]
393mod tests {
394 use super::fastx::*;
395 use std::io::Write;
396 use tempfile::NamedTempFile;
397
398 fn make_standard_fasta() -> NamedTempFile {
399 let mut tmp = NamedTempFile::new().unwrap();
400 writeln!(tmp, ">seq1").unwrap();
401 writeln!(tmp, "ACGTACGT").unwrap();
402 writeln!(tmp, ">seq2").unwrap();
403 writeln!(tmp, "TTGGCCAA").unwrap();
404 writeln!(tmp, ">seq3").unwrap();
405 writeln!(tmp, "GCGCGCGC").unwrap();
406 tmp
407 }
408
409 fn make_standard_fastq() -> NamedTempFile {
410 let mut tmp = NamedTempFile::new().unwrap();
411 writeln!(tmp, "@record1").unwrap();
412 writeln!(tmp, "ACGTACGTACGT").unwrap();
413 writeln!(tmp, "+").unwrap();
414 writeln!(tmp, "IIIIIIIIIIII").unwrap();
415 writeln!(tmp, "@record2").unwrap();
416 writeln!(tmp, "TTGGCCAATTGG").unwrap();
417 writeln!(tmp, "+").unwrap();
418 writeln!(tmp, "HHHHHHHHHHHH").unwrap();
419 writeln!(tmp, "@record3").unwrap();
420 writeln!(tmp, "GCGCGCGCGCGC").unwrap();
421 writeln!(tmp, "+").unwrap();
422 writeln!(tmp, "????????????").unwrap();
423 tmp
424 }
425
426 #[test]
429 fn test_fasta_records_new_loads_all_records() {
430 let fasta = make_standard_fasta();
431 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
432 assert_eq!(records.num_records(), 3);
433 }
434
435 #[test]
436 fn test_fasta_records_by_name_returns_present_records() {
437 let fasta = make_standard_fasta();
438 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
439 let names = vec![">seq1".to_string(), ">seq2".to_string()];
440 let (subset, absent) = records.records_by_name(names);
441 assert_eq!(subset.num_records(), 2);
442 assert!( absent.is_empty() );
443 }
444
445 #[test]
446 fn test_fasta_records_by_name_single_record() {
447 let fasta = make_standard_fasta();
448 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
449 let names = vec![">seq1".to_string()];
450 let (subset, absent) = records.records_by_name(names);
451 assert_eq!(subset.num_records(), 1);
452 assert!( absent.is_empty() );
453 }
454
455 #[test]
456 fn test_fasta_records_by_name_absent_names_reported() {
457 let fasta = make_standard_fasta();
458 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
459 let names = vec!["not_a_real_record".to_string()];
460 let (subset, absent) = records.records_by_name(names);
461 assert_eq!(subset.num_records(), 0);
462 assert!( absent.contains("not_a_real_record") );
463 }
464
465 #[test]
466 fn test_fasta_records_by_name_empty_input_returns_empty() {
467 let fasta = make_standard_fasta();
468 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
469 let (subset, absent) = records.records_by_name(vec![]);
470 assert_eq!(subset.num_records(), 0);
471 assert!( absent.is_empty() );
472 }
473
474 #[test]
475 fn test_fasta_records_by_name_duplicate_present_name_deduplicated() {
476 let fasta = make_standard_fasta();
477 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
478 let names = vec![">seq1".to_string(), ">seq1".to_string()];
479 let (subset, absent) = records.records_by_name(names);
480 assert_eq!(subset.num_records(), 1);
481 assert!( absent.is_empty() );
482 }
483
484 #[test]
485 fn test_fasta_records_by_name_duplicate_absent_name_repeated_in_output() {
486 let fasta = make_standard_fasta();
487 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
488 let names = vec!["missing".to_string(), "missing".to_string()];
489 let (subset, absent) = records.records_by_name(names);
490 assert_eq!(subset.num_records(), 0);
491 assert_eq!(absent, "missing\nmissing");
492 }
493
494 #[test]
495 fn test_fasta_records_new_empty_file_returns_error() {
496 let tmp = NamedTempFile::new().unwrap();
497 let result = read_fasta( tmp.path().to_str().unwrap() );
498 assert!( result.unwrap_err().contains("No valid FASTA records in") );
499 }
500
501 #[test]
502 fn test_fasta_records_new_no_headers_returns_error() {
503 let mut tmp = NamedTempFile::new().unwrap();
504 writeln!(tmp, "ACGTACGT").unwrap();
505 writeln!(tmp, "TTGGCCAA").unwrap();
506 let result = read_fasta( tmp.path().to_str().unwrap() );
507 assert!( result.unwrap_err().contains("No valid FASTA records in") );
508 }
509
510 #[test]
511 fn test_fasta_records_new_header_without_sequence_returns_error() {
512 let mut tmp = NamedTempFile::new().unwrap();
513 writeln!(tmp, ">solo_header").unwrap();
514 let result = read_fasta( tmp.path().to_str().unwrap() );
515 assert!( result.unwrap_err().contains("No valid FASTA records in") );
516 }
517
518 #[test]
519 fn test_fasta_records_new_nonexistent_file_returns_error() {
520 let tmp = NamedTempFile::new().unwrap();
521 let path = tmp.path().to_str().unwrap().to_string();
522 drop(tmp);
523 let result = read_fasta(&path);
524 assert!( result.unwrap_err().contains("No such file or directory") );
525 }
526
527 #[test]
529 fn test_fasta_multiline_sequence_is_concatenated() {
530 let mut tmp = NamedTempFile::new().unwrap();
531 writeln!(tmp, ">record1").unwrap();
532 writeln!(tmp, "ACGT").unwrap();
533 writeln!(tmp, "GCGC").unwrap();
534 writeln!(tmp, "TTTT").unwrap();
535 let records = read_fasta( tmp.path().to_str().unwrap() ).unwrap();
536 assert_eq!(records.num_records(), 1);
537 let out = NamedTempFile::new().unwrap();
538 records.save_sorted_records( out.path().to_str().unwrap() ).unwrap();
539 let content = std::fs::read_to_string( out.path() ).unwrap();
540 let lines: Vec<&str> = content.lines().collect();
541 assert_eq!(lines[1], "ACGTGCGCTTTT");
542 }
543
544 #[test]
545 fn test_fasta_multiline_sequence_multiple_records() {
546 let mut tmp = NamedTempFile::new().unwrap();
547 writeln!(tmp, ">record1").unwrap();
548 writeln!(tmp, "ACGT").unwrap();
549 writeln!(tmp, "GCGC").unwrap();
550 writeln!(tmp, ">record2").unwrap();
551 writeln!(tmp, "TTTT").unwrap();
552 writeln!(tmp, "AAAA").unwrap();
553 let records = read_fasta( tmp.path().to_str().unwrap() ).unwrap();
554 assert_eq!(records.num_records(), 2);
555 }
556
557 #[test]
559 fn test_fasta_records_subsequences_preserves_record_count() {
560 let fasta = make_standard_fasta();
561 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
562 let sub = records.subsequences(0, 10);
563 assert_eq!( sub.num_records(), records.num_records() );
564 }
565
566 #[test]
567 fn test_fasta_records_subsequences_within_bounds() {
568 let fasta = make_standard_fasta();
569 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
570 let (first, _) = records.records_by_name(vec![">seq1".to_string()]);
571 let sub = first.subsequences(2, 5);
572 let (result, _) = sub.records_by_name(vec![">seq1".to_string()]);
573 assert_eq!(result.num_records(), 1);
574 }
575
576 #[test]
577 fn test_fasta_records_subsequences_full_sequence_preserves_length() {
578 let fasta = make_standard_fasta();
579 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
580 let sub = records.subsequences(0, usize::MAX);
581 assert_eq!(sub.num_records(), 3);
582 }
583
584 #[test]
585 fn test_fasta_records_subsequences_start_equals_end_returns_empty_sequences() {
586 let fasta = make_standard_fasta();
587 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
588 let sub = records.subsequences(5, 5);
589 assert_eq!(sub.num_records(), 3);
590 }
591
592 #[test]
593 fn test_fasta_records_subsequences_start_beyond_end_returns_empty_sequences() {
594 let fasta = make_standard_fasta();
595 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
596 let sub = records.subsequences(10, 2);
597 assert_eq!(sub.num_records(), 3);
598 }
599
600 #[test]
601 fn test_fasta_records_subsequences_start_beyond_sequence_returns_empty_sequences() {
602 let fasta = make_standard_fasta();
603 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
604 let sub = records.subsequences(usize::MAX - 1, usize::MAX);
605 assert_eq!(sub.num_records(), 3);
606 }
607
608 #[test]
610 fn test_subsequences_by_name_returns_present_records() {
611 let fasta = make_standard_fasta();
612 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
613 let names_ranges = vec![
614 NameWithRange { name: ">seq1".to_string(), start: 0, end: 5 },
615 NameWithRange { name: ">seq2".to_string(), start: 0, end: 5 },
616 ];
617 let (subset, absent) = records.subsequences_by_name(names_ranges);
618 assert_eq!(subset.num_records(), 2);
619 assert!( absent.is_empty() );
620 }
621
622 #[test]
623 fn test_subsequences_by_name_single_record() {
624 let fasta = make_standard_fasta();
625 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
626 let names_ranges = vec![
627 NameWithRange { name: ">seq1".to_string(), start: 2, end: 7 },
628 ];
629 let (subset, absent) = records.subsequences_by_name(names_ranges);
630 assert_eq!(subset.num_records(), 1);
631 assert!( absent.is_empty() );
632 }
633
634 #[test]
635 fn test_subsequences_by_name_absent_names_reported() {
636 let fasta = make_standard_fasta();
637 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
638 let names_ranges = vec![
639 NameWithRange { name: "not_a_real_record".to_string(), start: 0, end: 5 },
640 ];
641 let (subset, absent) = records.subsequences_by_name(names_ranges);
642 assert_eq!(subset.num_records(), 0);
643 assert_eq!(absent, "not_a_real_record");
644 }
645
646 #[test]
647 fn test_subsequences_by_name_multiple_absent_names_separated_by_newline() {
648 let fasta = make_standard_fasta();
649 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
650 let names_ranges = vec![
651 NameWithRange { name: "missing_one".to_string(), start: 0, end: 5 },
652 NameWithRange { name: "missing_two".to_string(), start: 0, end: 5 },
653 ];
654 let (_, absent) = records.subsequences_by_name(names_ranges);
655 assert_eq!(absent, "missing_one\nmissing_two");
656 }
657
658 #[test]
659 fn test_subsequences_by_name_mixed_present_and_absent() {
660 let fasta = make_standard_fasta();
661 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
662 let names_ranges = vec![
663 NameWithRange { name: ">seq1".to_string(), start: 0, end: 5 },
664 NameWithRange { name: "missing_record".to_string(), start: 0, end: 5 },
665 ];
666 let (subset, absent) = records.subsequences_by_name(names_ranges);
667 assert_eq!(subset.num_records(), 1);
668 assert_eq!(absent, "missing_record");
669 }
670
671 #[test]
672 fn test_subsequences_by_name_empty_input_returns_empty() {
673 let fasta = make_standard_fasta();
674 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
675 let (subset, absent) = records.subsequences_by_name(vec![]);
676 assert_eq!(subset.num_records(), 0);
677 assert!( absent.is_empty() );
678 }
679
680 #[test]
681 fn test_subsequences_by_name_start_equals_end() {
682 let fasta = make_standard_fasta();
683 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
684 let names_ranges = vec![
685 NameWithRange { name: ">seq1".to_string(), start: 5, end: 5 },
686 ];
687 let (subset, absent) = records.subsequences_by_name(names_ranges);
688 assert_eq!(subset.num_records(), 1);
689 assert!( absent.is_empty() );
690 }
691
692 #[test]
693 fn test_subsequences_by_name_start_beyond_end() {
694 let fasta = make_standard_fasta();
695 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
696 let names_ranges = vec![
697 NameWithRange { name: ">seq1".to_string(), start: 10, end: 2 },
698 ];
699 let (subset, absent) = records.subsequences_by_name(names_ranges);
700 assert_eq!(subset.num_records(), 1);
701 assert!( absent.is_empty() );
702 }
703
704 #[test]
705 fn test_subsequences_by_name_start_beyond_sequence() {
706 let fasta = make_standard_fasta();
707 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
708 let names_ranges = vec![
709 NameWithRange { name: ">seq1".to_string(), start: usize::MAX - 1, end: usize::MAX },
710 ];
711 let (subset, absent) = records.subsequences_by_name(names_ranges);
712 assert_eq!(subset.num_records(), 1);
713 assert!( absent.is_empty() );
714 }
715
716 #[test]
718 fn test_fasta_save_records_roundtrip() {
719 let fasta = make_standard_fasta();
720 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
721 let tmp = NamedTempFile::new().unwrap();
722 records.save_records( tmp.path().to_str().unwrap() ).unwrap();
723 let saved = read_fasta( tmp.path().to_str().unwrap() ).unwrap();
724 assert_eq!( saved.num_records(), records.num_records() );
725 }
726
727 #[test]
728 fn test_fasta_save_sorted_records_roundtrip() {
729 let fasta = make_standard_fasta();
730 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
731 let tmp = NamedTempFile::new().unwrap();
732 records.save_sorted_records( tmp.path().to_str().unwrap() ).unwrap();
733 let saved = read_fasta( tmp.path().to_str().unwrap() ).unwrap();
734 assert_eq!( saved.num_records(), records.num_records() );
735 }
736
737 #[test]
738 fn test_fasta_save_sorted_records_order() {
739 let mut input_tmp = NamedTempFile::new().unwrap();
740 writeln!(input_tmp, ">first").unwrap();
741 writeln!(input_tmp, "AAAA").unwrap();
742 writeln!(input_tmp, ">second").unwrap();
743 writeln!(input_tmp, "CCCC").unwrap();
744 writeln!(input_tmp, ">third").unwrap();
745 writeln!(input_tmp, "GGGG").unwrap();
746 let records = read_fasta( input_tmp.path().to_str().unwrap() ).unwrap();
747 let output_tmp = NamedTempFile::new().unwrap();
748 records.save_sorted_records( output_tmp.path().to_str().unwrap() ).unwrap();
749 let content = std::fs::read_to_string( output_tmp.path() ).unwrap();
750 let headers: Vec<&str> = content.lines().filter( |line| line.starts_with('>') ).collect();
751 assert_eq!(headers, vec![">first", ">second", ">third"]);
752 }
753
754 #[test]
757 fn test_fastq_records_new_loads_all_records() {
758 let fastq = make_standard_fastq();
759 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
760 assert_eq!(records.num_records(), 3);
761 }
762
763 #[test]
764 fn test_fastq_records_by_name_returns_present_records() {
765 let fastq = make_standard_fastq();
766 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
767 let names = vec!["@record1".to_string(), "@record2".to_string()];
768 let (subset, absent) = records.records_by_name(names);
769 assert_eq!(subset.num_records(), 2);
770 assert!( absent.is_empty() );
771 }
772
773 #[test]
774 fn test_fastq_records_by_name_single_record() {
775 let fastq = make_standard_fastq();
776 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
777 let names = vec!["@record3".to_string()];
778 let (subset, absent) = records.records_by_name(names);
779 assert_eq!(subset.num_records(), 1);
780 assert!( absent.is_empty() );
781 }
782
783 #[test]
784 fn test_fastq_records_by_name_absent_names_reported() {
785 let fastq = make_standard_fastq();
786 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
787 let names = vec!["not_a_real_record".to_string()];
788 let (subset, absent) = records.records_by_name(names);
789 assert_eq!(subset.num_records(), 0);
790 assert!( absent.contains("not_a_real_record") );
791 }
792
793 #[test]
794 fn test_fastq_records_by_name_empty_input_returns_empty() {
795 let fastq = make_standard_fastq();
796 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
797 let (subset, absent) = records.records_by_name(vec![]);
798 assert_eq!(subset.num_records(), 0);
799 assert!( absent.is_empty() );
800 }
801
802 #[test]
803 fn test_fastq_records_by_name_duplicate_present_name_deduplicated() {
804 let fastq = make_standard_fastq();
805 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
806 let names = vec!["@record1".to_string(), "@record1".to_string()];
807 let (subset, absent) = records.records_by_name(names);
808 assert_eq!(subset.num_records(), 1);
809 assert!( absent.is_empty() );
810 }
811
812 #[test]
813 fn test_fastq_records_by_name_duplicate_absent_name_repeated_in_output() {
814 let fastq = make_standard_fastq();
815 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
816 let names = vec!["missing".to_string(), "missing".to_string()];
817 let (subset, absent) = records.records_by_name(names);
818 assert_eq!(subset.num_records(), 0);
819 assert_eq!(absent, "missing\nmissing");
820 }
821
822 #[test]
823 fn test_fastq_records_new_empty_file_returns_error() {
824 let tmp = NamedTempFile::new().unwrap();
825 let result = read_fastq( tmp.path().to_str().unwrap() );
826 assert!( result.unwrap_err().contains("No valid FASTQ records in") );
827 }
828
829 #[test]
830 fn test_fastq_records_new_no_at_header_returns_error() {
831 let mut tmp = NamedTempFile::new().unwrap();
832 writeln!(tmp, "record1").unwrap();
833 writeln!(tmp, "ACGTACGT").unwrap();
834 writeln!(tmp, "+").unwrap();
835 writeln!(tmp, "IIIIIIII").unwrap();
836 let result = read_fastq( tmp.path().to_str().unwrap() );
837 assert!( result.unwrap_err().contains("Expected '@' header line") );
838 }
839
840 #[test]
841 fn test_fastq_records_new_missing_sequence_line_returns_error() {
842 let mut tmp = NamedTempFile::new().unwrap();
843 writeln!(tmp, "@record1").unwrap();
844 let result = read_fastq( tmp.path().to_str().unwrap() );
845 assert!( result.unwrap_err().contains("Missing sequence line after header") );
846 }
847
848 #[test]
849 fn test_fastq_records_new_truncated_record_returns_error() {
850 let mut tmp = NamedTempFile::new().unwrap();
851 writeln!(tmp, "@record1").unwrap();
852 writeln!(tmp, "ACGTACGT").unwrap();
853 let result = read_fastq( tmp.path().to_str().unwrap() );
854 assert!( result.unwrap_err().contains("Missing '+' line") );
855 }
856
857 #[test]
858 fn test_fastq_records_new_blank_lines_between_records_are_skipped() {
859 let mut tmp = NamedTempFile::new().unwrap();
860 writeln!(tmp, "@record1").unwrap();
861 writeln!(tmp, "ACGTACGT").unwrap();
862 writeln!(tmp, "+").unwrap();
863 writeln!(tmp, "IIIIIIII").unwrap();
864 writeln!(tmp, "").unwrap();
865 writeln!(tmp, "@record2").unwrap();
866 writeln!(tmp, "GCGCGCGC").unwrap();
867 writeln!(tmp, "+").unwrap();
868 writeln!(tmp, "HHHHHHHH").unwrap();
869 let records = read_fastq( tmp.path().to_str().unwrap() ).unwrap();
870 assert_eq!(records.num_records(), 2);
871 }
872
873 #[test]
874 fn test_fastq_records_new_invalid_separator_returns_error() {
875 let mut tmp = NamedTempFile::new().unwrap();
876 writeln!(tmp, "@record1").unwrap();
877 writeln!(tmp, "ACGTACGT").unwrap();
878 writeln!(tmp, "GCGCGCGC").unwrap();
879 writeln!(tmp, "IIIIIIII").unwrap();
880 let result = read_fastq( tmp.path().to_str().unwrap() );
881 assert!( result.unwrap_err().contains("Expected '+' separator line") );
882 }
883
884 #[test]
885 fn test_fastq_records_new_missing_quality_line_returns_error() {
886 let mut tmp = NamedTempFile::new().unwrap();
887 writeln!(tmp, "@record1").unwrap();
888 writeln!(tmp, "ACGTACGT").unwrap();
889 writeln!(tmp, "+").unwrap();
890 let result = read_fastq( tmp.path().to_str().unwrap() );
891 assert!( result.unwrap_err().contains("Missing quality line after '+'") );
892 }
893
894 #[test]
895 fn test_fastq_records_new_mismatched_quality_length_returns_error() {
896 let mut tmp = NamedTempFile::new().unwrap();
897 writeln!(tmp, "@record1").unwrap();
898 writeln!(tmp, "ACGTACGT").unwrap();
899 writeln!(tmp, "+").unwrap();
900 writeln!(tmp, "IIII").unwrap();
901 let result = read_fastq( tmp.path().to_str().unwrap() );
902 assert!( result.unwrap_err().contains("Quality scores must be the same length") );
903 }
904
905 #[test]
906 fn test_fastq_records_new_nonexistent_file_returns_error() {
907 let tmp = NamedTempFile::new().unwrap();
908 let path = tmp.path().to_str().unwrap().to_string();
909 drop(tmp);
910 let result = read_fastq(&path);
911 assert!( result.unwrap_err().contains("No such file or directory") );
912 }
913
914 #[test]
916 fn test_fastq_records_subsequences_preserves_record_count() {
917 let fastq = make_standard_fastq();
918 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
919 let sub = records.subsequences(0, 5);
920 assert_eq!( sub.num_records(), records.num_records() );
921 }
922
923 #[test]
924 fn test_fastq_records_subsequences_within_bounds() {
925 let fastq = make_standard_fastq();
926 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
927 let (first, _) = records.records_by_name(vec!["@record1".to_string()]);
928 let sub = first.subsequences(2, 5);
929 let (result, _) = sub.records_by_name(vec!["@record1".to_string()]);
930 assert_eq!(result.num_records(), 1);
931 }
932
933 #[test]
934 fn test_fastq_records_subsequences_full_sequence_preserves_count() {
935 let fastq = make_standard_fastq();
936 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
937 let sub = records.subsequences(0, usize::MAX);
938 assert_eq!(sub.num_records(), 3);
939 }
940
941 #[test]
942 fn test_fastq_records_subsequences_start_equals_end_returns_empty_sequences() {
943 let fastq = make_standard_fastq();
944 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
945 let sub = records.subsequences(5, 5);
946 assert_eq!(sub.num_records(), 3);
947 }
948
949 #[test]
950 fn test_fastq_records_subsequences_start_beyond_end_returns_empty_sequences() {
951 let fastq = make_standard_fastq();
952 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
953 let sub = records.subsequences(10, 2);
954 assert_eq!(sub.num_records(), 3);
955 }
956
957 #[test]
958 fn test_fastq_records_subsequences_start_beyond_sequence_returns_empty_sequences() {
959 let fastq = make_standard_fastq();
960 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
961 let sub = records.subsequences(usize::MAX - 1, usize::MAX);
962 assert_eq!(sub.num_records(), 3);
963 }
964
965 #[test]
967 fn test_fastq_subsequences_by_name_returns_present_records() {
968 let fastq = make_standard_fastq();
969 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
970 let names_ranges = vec![
971 NameWithRange { name: "@record1".to_string(), start: 0, end: 5 },
972 NameWithRange { name: "@record2".to_string(), start: 0, end: 5 },
973 ];
974 let (subset, absent) = records.subsequences_by_name(names_ranges);
975 assert_eq!(subset.num_records(), 2);
976 assert!( absent.is_empty() );
977 }
978
979 #[test]
980 fn test_fastq_subsequences_by_name_single_record() {
981 let fastq = make_standard_fastq();
982 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
983 let names_ranges = vec![
984 NameWithRange { name: "@record3".to_string(), start: 2, end: 7 },
985 ];
986 let (subset, absent) = records.subsequences_by_name(names_ranges);
987 assert_eq!(subset.num_records(), 1);
988 assert!( absent.is_empty() );
989 }
990
991 #[test]
992 fn test_fastq_subsequences_by_name_absent_names_reported() {
993 let fastq = make_standard_fastq();
994 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
995 let names_ranges = vec![
996 NameWithRange { name: "not_a_real_record".to_string(), start: 0, end: 5 },
997 ];
998 let (subset, absent) = records.subsequences_by_name(names_ranges);
999 assert_eq!(subset.num_records(), 0);
1000 assert_eq!(absent, "not_a_real_record");
1001 }
1002
1003 #[test]
1004 fn test_fastq_subsequences_by_name_multiple_absent_names_separated_by_newline() {
1005 let fastq = make_standard_fastq();
1006 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1007 let names_ranges = vec![
1008 NameWithRange { name: "missing_one".to_string(), start: 0, end: 5 },
1009 NameWithRange { name: "missing_two".to_string(), start: 0, end: 5 },
1010 ];
1011 let (_, absent) = records.subsequences_by_name(names_ranges);
1012 assert_eq!(absent, "missing_one\nmissing_two");
1013 }
1014
1015 #[test]
1016 fn test_fastq_subsequences_by_name_mixed_present_and_absent() {
1017 let fastq = make_standard_fastq();
1018 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1019 let names_ranges = vec![
1020 NameWithRange { name: "@record1".to_string(), start: 0, end: 5 },
1021 NameWithRange { name: "missing_record".to_string(), start: 0, end: 5 },
1022 ];
1023 let (subset, absent) = records.subsequences_by_name(names_ranges);
1024 assert_eq!(subset.num_records(), 1);
1025 assert_eq!(absent, "missing_record");
1026 }
1027
1028 #[test]
1029 fn test_fastq_subsequences_by_name_empty_input_returns_empty() {
1030 let fastq = make_standard_fastq();
1031 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1032 let (subset, absent) = records.subsequences_by_name(vec![]);
1033 assert_eq!(subset.num_records(), 0);
1034 assert!( absent.is_empty() );
1035 }
1036
1037 #[test]
1038 fn test_fastq_subsequences_by_name_start_equals_end() {
1039 let fastq = make_standard_fastq();
1040 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1041 let names_ranges = vec![
1042 NameWithRange { name: "@record1".to_string(), start: 5, end: 5 },
1043 ];
1044 let (subset, absent) = records.subsequences_by_name(names_ranges);
1045 assert_eq!(subset.num_records(), 1);
1046 assert!( absent.is_empty() );
1047 }
1048
1049 #[test]
1050 fn test_fastq_subsequences_by_name_start_beyond_end() {
1051 let fastq = make_standard_fastq();
1052 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1053 let names_ranges = vec![
1054 NameWithRange { name: "@record1".to_string(), start: 10, end: 2 },
1055 ];
1056 let (subset, absent) = records.subsequences_by_name(names_ranges);
1057 assert_eq!(subset.num_records(), 1);
1058 assert!( absent.is_empty() );
1059 }
1060
1061 #[test]
1062 fn test_fastq_subsequences_by_name_start_beyond_sequence() {
1063 let fastq = make_standard_fastq();
1064 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1065 let names_ranges = vec![
1066 NameWithRange { name: "@record1".to_string(), start: usize::MAX - 1, end: usize::MAX },
1067 ];
1068 let (subset, absent) = records.subsequences_by_name(names_ranges);
1069 assert_eq!(subset.num_records(), 1);
1070 assert!( absent.is_empty() );
1071 }
1072
1073 #[test]
1075 fn test_fastq_save_records_roundtrip() {
1076 let fastq = make_standard_fastq();
1077 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1078 let tmp = NamedTempFile::new().unwrap();
1079 records.save_records( tmp.path().to_str().unwrap() ).unwrap();
1080 let saved = read_fastq( tmp.path().to_str().unwrap() ).unwrap();
1081 assert_eq!( saved.num_records(), records.num_records() );
1082 }
1083
1084 #[test]
1085 fn test_fastq_save_sorted_records_roundtrip() {
1086 let fastq = make_standard_fastq();
1087 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1088 let tmp = NamedTempFile::new().unwrap();
1089 records.save_sorted_records( tmp.path().to_str().unwrap() ).unwrap();
1090 let saved = read_fastq( tmp.path().to_str().unwrap() ).unwrap();
1091 assert_eq!( saved.num_records(), records.num_records() );
1092 }
1093
1094 #[test]
1095 fn test_fastq_save_sorted_records_order() {
1096 let fastq = make_standard_fastq();
1097 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1098 let tmp = NamedTempFile::new().unwrap();
1099 records.save_sorted_records( tmp.path().to_str().unwrap() ).unwrap();
1100 let content = std::fs::read_to_string( tmp.path() ).unwrap();
1101 let headers: Vec<&str> = content.lines().filter( |line| line.starts_with('@') ).collect();
1102 assert_eq!(headers, vec!["@record1", "@record2", "@record3"]);
1103 }
1104
1105 #[test]
1107 fn test_merge_disjoint_sets_combines_all_records() {
1108 let mut tmp1 = NamedTempFile::new().unwrap();
1109 writeln!(tmp1, ">r1").unwrap();
1110 writeln!(tmp1, "AAAA").unwrap();
1111 let mut tmp2 = NamedTempFile::new().unwrap();
1112 writeln!(tmp2, ">r2").unwrap();
1113 writeln!(tmp2, "CCCC").unwrap();
1114 let mut a = read_fasta( tmp1.path().to_str().unwrap() ).unwrap();
1115 let b = read_fasta( tmp2.path().to_str().unwrap() ).unwrap();
1116 a.merge(b);
1117 assert_eq!(a.num_records(), 2);
1118 }
1119
1120 #[test]
1121 fn test_merge_duplicate_key_overwrites_existing_record() {
1122 let mut tmp1 = NamedTempFile::new().unwrap();
1123 writeln!(tmp1, ">r1").unwrap();
1124 writeln!(tmp1, "AAAA").unwrap();
1125 let mut tmp2 = NamedTempFile::new().unwrap();
1126 writeln!(tmp2, ">r1").unwrap();
1127 writeln!(tmp2, "CCCC").unwrap();
1128 let mut a = read_fasta( tmp1.path().to_str().unwrap() ).unwrap();
1129 let b = read_fasta( tmp2.path().to_str().unwrap() ).unwrap();
1130 a.merge(b);
1131 assert_eq!(a.num_records(), 1);
1132 let out = NamedTempFile::new().unwrap();
1133 a.save_records( out.path().to_str().unwrap() ).unwrap();
1134 let content = std::fs::read_to_string( out.path() ).unwrap();
1135 assert!( content.contains("CCCC") );
1136 assert!( !content.contains("AAAA") );
1137 }
1138
1139 #[test]
1140 fn test_merge_into_empty_collection() {
1141 let mut tmp1 = NamedTempFile::new().unwrap();
1142 writeln!(tmp1, ">r1").unwrap();
1143 writeln!(tmp1, "AAAA").unwrap();
1144 let mut tmp2 = NamedTempFile::new().unwrap();
1145 writeln!(tmp2, ">r2").unwrap();
1146 writeln!(tmp2, "CCCC").unwrap();
1147 let a = read_fasta( tmp1.path().to_str().unwrap() ).unwrap();
1148 let b = read_fasta( tmp2.path().to_str().unwrap() ).unwrap();
1149 let (mut empty, _) = a.records_by_name(vec![]);
1150 empty.merge(b);
1151 assert_eq!(empty.num_records(), 1);
1152 }
1153
1154 #[test]
1155 fn test_merge_empty_collection_into_existing() {
1156 let mut tmp = NamedTempFile::new().unwrap();
1157 writeln!(tmp, ">r1").unwrap();
1158 writeln!(tmp, "AAAA").unwrap();
1159 let mut a = read_fasta( tmp.path().to_str().unwrap() ).unwrap();
1160 let (empty, _) = a.records_by_name(vec![]);
1161 a.merge(empty);
1162 assert_eq!(a.num_records(), 1);
1163 }
1164
1165 #[test]
1166 fn test_merge_updates_max_length_when_incoming_is_longer() {
1167 let mut tmp1 = NamedTempFile::new().unwrap();
1168 writeln!(tmp1, ">r1").unwrap();
1169 writeln!(tmp1, "AAAA").unwrap();
1170 let mut tmp2 = NamedTempFile::new().unwrap();
1171 writeln!(tmp2, ">r2").unwrap();
1172 writeln!(tmp2, "CCCCCCCC").unwrap();
1173 let mut a = read_fasta( tmp1.path().to_str().unwrap() ).unwrap();
1174 let b = read_fasta( tmp2.path().to_str().unwrap() ).unwrap();
1175 a.merge(b);
1176 assert_eq!(a.get_max_length(), 8);
1177 }
1178
1179 #[test]
1180 fn test_merge_max_length_unchanged_when_incoming_is_shorter() {
1181 let mut tmp1 = NamedTempFile::new().unwrap();
1182 writeln!(tmp1, ">r1").unwrap();
1183 writeln!(tmp1, "CCCCCCCC").unwrap();
1184 let mut tmp2 = NamedTempFile::new().unwrap();
1185 writeln!(tmp2, ">r2").unwrap();
1186 writeln!(tmp2, "AAAA").unwrap();
1187 let mut a = read_fasta( tmp1.path().to_str().unwrap() ).unwrap();
1188 let b = read_fasta( tmp2.path().to_str().unwrap() ).unwrap();
1189 a.merge(b);
1190 assert_eq!(a.get_max_length(), 8);
1191 }
1192
1193 #[test]
1195 fn test_fasta_get_max_length_after_read() {
1196 let mut tmp = NamedTempFile::new().unwrap();
1197 writeln!(tmp, ">r1").unwrap();
1198 writeln!(tmp, "AAAA").unwrap();
1199 writeln!(tmp, ">r2").unwrap();
1200 writeln!(tmp, "CCCCCCCC").unwrap();
1201 writeln!(tmp, ">r3").unwrap();
1202 writeln!(tmp, "GGG").unwrap();
1203 let records = read_fasta( tmp.path().to_str().unwrap() ).unwrap();
1204 assert_eq!(records.get_max_length(), 8);
1205 }
1206
1207 #[test]
1208 fn test_fastq_get_max_length_after_read() {
1209 let fastq = make_standard_fastq();
1210 let records = read_fastq( fastq.path().to_str().unwrap() ).unwrap();
1211 assert_eq!(records.get_max_length(), 12);
1212 }
1213
1214 #[test]
1215 fn test_fasta_get_max_length_after_records_by_name() {
1216 let mut tmp = NamedTempFile::new().unwrap();
1217 writeln!(tmp, ">r1").unwrap();
1218 writeln!(tmp, "AAAA").unwrap();
1219 writeln!(tmp, ">r2").unwrap();
1220 writeln!(tmp, "CCCCCCCC").unwrap();
1221 writeln!(tmp, ">r3").unwrap();
1222 writeln!(tmp, "GGG").unwrap();
1223 let records = read_fasta( tmp.path().to_str().unwrap() ).unwrap();
1224 let (subset, _) = records.records_by_name(vec![">r1".to_string(), ">r3".to_string()]);
1225 assert_eq!(subset.get_max_length(), 4);
1226 }
1227
1228 #[test]
1229 fn test_fasta_get_max_length_after_subsequences() {
1230 let mut tmp = NamedTempFile::new().unwrap();
1231 writeln!(tmp, ">r1").unwrap();
1232 writeln!(tmp, "AAAA").unwrap();
1233 writeln!(tmp, ">r2").unwrap();
1234 writeln!(tmp, "CCCCCCCC").unwrap();
1235 writeln!(tmp, ">r3").unwrap();
1236 writeln!(tmp, "GGG").unwrap();
1237 let records = read_fasta( tmp.path().to_str().unwrap() ).unwrap();
1238 let sub = records.subsequences(0, 5);
1239 assert_eq!(sub.get_max_length(), 5);
1241 }
1242
1243 #[test]
1244 fn test_fasta_get_max_length_after_subsequences_by_name() {
1245 let mut tmp = NamedTempFile::new().unwrap();
1246 writeln!(tmp, ">r1").unwrap();
1247 writeln!(tmp, "AAAA").unwrap();
1248 writeln!(tmp, ">r2").unwrap();
1249 writeln!(tmp, "CCCCCCCC").unwrap();
1250 writeln!(tmp, ">r3").unwrap();
1251 writeln!(tmp, "GGG").unwrap();
1252 let records = read_fasta( tmp.path().to_str().unwrap() ).unwrap();
1253 let names_ranges = vec![
1254 NameWithRange { name: ">r2".to_string(), start: 0, end: 5 },
1255 NameWithRange { name: ">r1".to_string(), start: 0, end: 3 },
1256 ];
1257 let (subset, _) = records.subsequences_by_name(names_ranges);
1258 assert_eq!(subset.get_max_length(), 5);
1259 }
1260
1261 #[test]
1262 fn test_fasta_get_max_length_empty_subset_is_zero() {
1263 let fasta = make_standard_fasta();
1264 let records = read_fasta( fasta.path().to_str().unwrap() ).unwrap();
1265 let (subset, _) = records.records_by_name(vec![]);
1266 assert_eq!(subset.get_max_length(), 0);
1267 }
1268
1269 #[test]
1271 fn test_new_and_get_index() {
1272 let seq = IndexedSequence::new( 3, "ACGT" );
1273 assert_eq!(seq.get_index(), 3);
1274 }
1275
1276 #[test]
1277 fn test_slice_within_bounds() {
1278 let seq = IndexedSequence::new( 0, "ACGTACGT" );
1279 assert_eq!(seq.subsequence(2, 5).get_sequence(), "GTA");
1280 }
1281
1282 #[test]
1283 fn test_slice_full_sequence() {
1284 let seq = IndexedSequence::new( 0, "ACGT" );
1285 assert_eq!(seq.subsequence(0, 4).get_sequence(), "ACGT");
1286 }
1287
1288 #[test]
1289 fn test_slice_equal_start_and_end() {
1290 let seq = IndexedSequence::new( 0, "ACGT" );
1291 assert_eq!(seq.subsequence(2, 2).get_sequence(), "");
1292 }
1293
1294 #[test]
1295 fn test_slice_end_beyond_sequence() {
1296 let seq = IndexedSequence::new( 0, "ACGT" );
1297 assert_eq!(seq.subsequence(1, 10).get_sequence(), "CGT");
1298 }
1299
1300 #[test]
1301 fn test_slice_start_beyond_sequence() {
1302 let seq = IndexedSequence::new( 0, "ACGT" );
1303 assert_eq!(seq.subsequence(10, 20).get_sequence(), "");
1304 }
1305
1306 #[test]
1307 fn test_slice_start_greater_than_end() {
1308 let seq = IndexedSequence::new( 0, "ACGT" );
1309 assert_eq!(seq.subsequence(3, 1).get_sequence(), "");
1310 }
1311
1312 #[test]
1313 fn test_empty_sequence_zero_indices() {
1314 let seq = IndexedSequence::new( 0, "" );
1315 assert_eq!(seq.subsequence(0, 0).get_sequence(), "");
1316 }
1317
1318 #[test]
1319 fn test_empty_sequence_nonzero_indices() {
1320 let seq = IndexedSequence::new( 0, "" );
1321 assert_eq!(seq.subsequence(2, 5).get_sequence(), "");
1322 }
1323
1324 #[test]
1325 fn test_empty_sequence_start_greater_than_end() {
1326 let seq = IndexedSequence::new( 0, "" );
1327 assert_eq!(seq.subsequence(5, 2).get_sequence(), "");
1328 }
1329
1330 #[test]
1331 fn test_clone_indexed_sequence() {
1332 let original = IndexedSequence::new( 7, "ACGT" );
1333 let cloned = original.clone();
1334 assert_eq!( cloned.get_index(), original.get_index() );
1335 assert_eq!( cloned.get_sequence(), original.get_sequence() );
1336 }
1337
1338 #[test]
1339 fn test_clone_indexed_sequence_is_independent() {
1340 let original = IndexedSequence::new( 2, "ACGT" );
1341 let cloned = original.clone();
1342 assert_eq!(cloned.get_index(), 2);
1343 assert_eq!(cloned.get_sequence(), "ACGT");
1344 }
1345
1346 #[test]
1348 fn test_quality_new_and_get_index() {
1349 let seq = IndexedSequenceWithQuality::new( 5, "IIII", "ACGT" ).unwrap();
1350 assert_eq!(seq.get_index(), 5);
1351 }
1352
1353 #[test]
1354 fn test_quality_new_mismatched_lengths_returns_error() {
1355 let result = IndexedSequenceWithQuality::new( 0, "II", "ACGT" );
1356 assert!( result.is_err() );
1357 }
1358
1359 #[test]
1360 fn test_quality_subsequence_within_bounds() {
1361 let seq = IndexedSequenceWithQuality::new( 0, "IIHH????", "ACGTACGT" ).unwrap();
1362 let sub = seq.subsequence(2, 5);
1363 assert_eq!(sub.get_sequence(), "GTA");
1364 assert_eq!(sub.get_quality_scores(), "HH?");
1365 }
1366
1367 #[test]
1368 fn test_quality_subsequence_full_sequence() {
1369 let seq = IndexedSequenceWithQuality::new( 0, "IIII", "ACGT" ).unwrap();
1370 let sub = seq.subsequence(0, 4);
1371 assert_eq!(sub.get_sequence(), "ACGT");
1372 assert_eq!(sub.get_quality_scores(), "IIII");
1373 }
1374
1375 #[test]
1376 fn test_quality_subsequence_equal_start_and_end() {
1377 let seq = IndexedSequenceWithQuality::new( 0, "IIII", "ACGT" ).unwrap();
1378 let sub = seq.subsequence(2, 2);
1379 assert_eq!(sub.get_sequence(), "");
1380 assert_eq!(sub.get_quality_scores(), "");
1381 }
1382
1383 #[test]
1384 fn test_quality_subsequence_end_beyond_sequence() {
1385 let seq = IndexedSequenceWithQuality::new( 0, "IIII", "ACGT" ).unwrap();
1386 let sub = seq.subsequence(1, 10);
1387 assert_eq!(sub.get_sequence(), "CGT");
1388 assert_eq!(sub.get_quality_scores(), "III");
1389 }
1390
1391 #[test]
1392 fn test_quality_subsequence_start_beyond_sequence() {
1393 let seq = IndexedSequenceWithQuality::new( 0, "IIII", "ACGT" ).unwrap();
1394 let sub = seq.subsequence(10, 20);
1395 assert_eq!(sub.get_sequence(), "");
1396 assert_eq!(sub.get_quality_scores(), "");
1397 }
1398
1399 #[test]
1400 fn test_quality_subsequence_start_greater_than_end() {
1401 let seq = IndexedSequenceWithQuality::new( 0, "IIII", "ACGT" ).unwrap();
1402 let sub = seq.subsequence(3, 1);
1403 assert_eq!(sub.get_sequence(), "");
1404 assert_eq!(sub.get_quality_scores(), "");
1405 }
1406
1407 #[test]
1408 fn test_quality_empty_sequence_zero_indices() {
1409 let seq = IndexedSequenceWithQuality::new( 0, "", "" ).unwrap();
1410 let sub = seq.subsequence(0, 0);
1411 assert_eq!(sub.get_sequence(), "");
1412 assert_eq!(sub.get_quality_scores(), "");
1413 }
1414
1415 #[test]
1416 fn test_quality_empty_sequence_nonzero_indices() {
1417 let seq = IndexedSequenceWithQuality::new( 0, "", "" ).unwrap();
1418 let sub = seq.subsequence(2, 5);
1419 assert_eq!(sub.get_sequence(), "");
1420 assert_eq!(sub.get_quality_scores(), "");
1421 }
1422
1423 #[test]
1424 fn test_quality_empty_sequence_start_greater_than_end() {
1425 let seq = IndexedSequenceWithQuality::new( 0, "", "" ).unwrap();
1426 let sub = seq.subsequence(5, 2);
1427 assert_eq!(sub.get_sequence(), "");
1428 assert_eq!(sub.get_quality_scores(), "");
1429 }
1430
1431 #[test]
1432 fn test_clone_indexed_sequence_with_quality() {
1433 let original = IndexedSequenceWithQuality::new( 3, "IIII", "ACGT" ).unwrap();
1434 let cloned = original.clone();
1435 assert_eq!( cloned.get_index(), original.get_index() );
1436 assert_eq!( cloned.get_sequence(), original.get_sequence() );
1437 assert_eq!( cloned.get_quality_scores(), original.get_quality_scores() );
1438 }
1439
1440 #[test]
1441 fn test_clone_indexed_sequence_with_quality_is_independent() {
1442 let original = IndexedSequenceWithQuality::new( 1, "IIII", "ACGT" ).unwrap();
1443 let cloned = original.clone();
1444 assert_eq!(cloned.get_index(), 1);
1445 assert_eq!(cloned.get_sequence(), "ACGT");
1446 assert_eq!(cloned.get_quality_scores(), "IIII");
1447 }
1448
1449}