diff --git a/pybedtools/bedtool.py b/pybedtools/bedtool.py index 1b8b491d..96ada4b0 100644 --- a/pybedtools/bedtool.py +++ b/pybedtools/bedtool.py @@ -700,7 +700,11 @@ def tabix_intervals(self, interval_or_string, check_coordinates=False): # tabix expects 1-based coords, but BEDTools works with # zero-based. pybedtools and pysam also work with zero-based. So we can # pass zero-based directly to the pysam tabix interface. - tbx = pysam.TabixFile(self.fn) + try: + tbx = pysam.TabixFile(self.fn) + except OSError: + # if the file is indexed using csi, we need to specify the path for index + tbx = pysam.TabixFile(self.fn, index=self.fn+".csi") # If an interval is passed, use its coordinates directly if isinstance(interval_or_string, Interval): @@ -749,10 +753,14 @@ def tabix_contigs(self): "-- please use the .tabix() method" ) - tbx = pysam.TabixFile(self.fn) + try: + tbx = pysam.TabixFile(self.fn) + except OSError: + # if the file is indexed using csi, we need to specify the path for index + tbx = pysam.TabixFile(self.fn, index=self.fn+".csi") return tbx.contigs - def tabix(self, in_place=True, force=False, is_sorted=False): + def tabix(self, in_place=True, force=False, is_sorted=False, use_csi=False): """ Prepare a BedTool for use with Tabix. @@ -773,6 +781,10 @@ def tabix(self, in_place=True, force=False, is_sorted=False): is_sorted : bool If True (default is False), then assume the file is already sorted so that BedTool.bgzip() doesn't have to do that work. + + use_csi : bool + If True (default is False), then generate a csi instead of tbi index. + This can be useful when working with chromosomes larger than 512 Mbp, such as barley """ # Return quickly if nothing to do if self._tabixed() and not force: @@ -781,18 +793,18 @@ def tabix(self, in_place=True, force=False, is_sorted=False): # Make sure it's BGZIPed fn = self.bgzip(in_place=in_place, force=force) - pysam.tabix_index(fn, force=force, preset=self.file_type) + pysam.tabix_index(fn, force=force, preset=self.file_type, csi=use_csi) return BedTool(fn) def _tabixed(self): """ Verifies that we're working with a tabixed file: a string filename - pointing to a BGZIPed file with a .tbi file in the same dir. + pointing to a BGZIPed file with a .tbi or .csi file in the same dir. """ if ( isinstance(self.fn, str) and isBGZIP(self.fn) - and os.path.exists(self.fn + ".tbi") + and (os.path.exists(self.fn + ".tbi") or os.path.exists(self.fn + ".csi")) ): return True diff --git a/pybedtools/test/test_1.py b/pybedtools/test/test_1.py index b9d67fe5..20a68393 100644 --- a/pybedtools/test/test_1.py +++ b/pybedtools/test/test_1.py @@ -133,35 +133,36 @@ def test_tuple_creation(): def test_tabix(): - try: - a = pybedtools.example_bedtool("a.bed") - t = a.tabix(force=True) - assert t._tabixed() - results = t.tabix_intervals("chr1:99-200") - results = str(results) - print(results) - assert results == fix( - """ - chr1 1 100 feature1 0 + - chr1 100 200 feature2 0 + - chr1 150 500 feature3 0 -""" - ) + for idx_type in ("tbi", "csi"): + try: + a = pybedtools.example_bedtool("a.bed") + t = a.tabix(force=True, use_csi=True if idx_type == "csi" else False) + assert t._tabixed() + results = t.tabix_intervals("chr1:99-200") + results = str(results) + print(results) + assert results == fix( + """ + chr1 1 100 feature1 0 + + chr1 100 200 feature2 0 + + chr1 150 500 feature3 0 -""" + ) - assert str(t.tabix_intervals(a[2])) == fix( - """ - chr1 100 200 feature2 0 + - chr1 150 500 feature3 0 -""" - ) + assert str(t.tabix_intervals(a[2])) == fix( + """ + chr1 100 200 feature2 0 + + chr1 150 500 feature3 0 -""" + ) - finally: - # clean up - fns = [ - pybedtools.example_filename("a.bed.gz"), - pybedtools.example_filename("a.bed.gz.tbi"), - ] - for fn in fns: - if os.path.exists(fn): - os.unlink(fn) + finally: + # clean up + fns = [ + pybedtools.example_filename("a.bed.gz"), + pybedtools.example_filename("a.bed.gz." + idx_type), + ] + for fn in fns: + if os.path.exists(fn): + os.unlink(fn) def test_tabix_intervals(): @@ -177,6 +178,12 @@ def test_tabix_intervals(): assert len(a.tabix_intervals("chr1")) == 1 +def test_tabix_contigs_csi(): + a = pybedtools.example_bedtool("a.bed") + a = a.tabix(force=True, use_csi=True) + assert a.tabix_contigs() == ["chr1"] + + # ---------------------------------------------------------------------------- # Streaming and non-file BedTool tests # ----------------------------------------------------------------------------