Skip to content
代码片段 群组 项目
提交 0dc96692 编辑于 作者: Alec Wysoker's avatar Alec Wysoker
浏览文件

Support bgzipped SAM files in SAMFileReader.

上级 37e6d218
无相关合并请求
......@@ -27,6 +27,7 @@ package net.sf.samtools;
import net.sf.samtools.util.*;
import java.io.*;
import java.util.Arrays;
import java.util.zip.GZIPInputStream;
import java.net.URL;
......@@ -133,7 +134,7 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
* @param eagerDecode if true, decode SAM record entirely when reading it.
*/
public SAMFileReader(final InputStream stream, final boolean eagerDecode) {
init(stream, eagerDecode, defaultValidationStringency);
init(stream, null, null, eagerDecode, defaultValidationStringency);
}
/**
......@@ -144,7 +145,7 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
* @param eagerDecode if true, decode SAM record entirely when reading it.
*/
public SAMFileReader(final File file, final boolean eagerDecode) {
init(file, null, eagerDecode, defaultValidationStringency);
this(file, null, eagerDecode);
}
/**
......@@ -156,7 +157,7 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
* @param eagerDecode eagerDecode if true, decode SAM record entirely when reading it.
*/
public SAMFileReader(final File file, final File indexFile, final boolean eagerDecode){
init(file, indexFile, eagerDecode, defaultValidationStringency);
init(null, file, indexFile, eagerDecode, defaultValidationStringency);
}
/**
......@@ -446,27 +447,6 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
}
}
private void init(final InputStream stream, final boolean eagerDecode, final ValidationStringency validationStringency) {
try {
final BufferedInputStream bufferedStream = IOUtil.toBufferedStream(stream);
if (isBAMFile(bufferedStream)) {
mIsBinary = true;
mReader = new BAMFileReader(bufferedStream, null, eagerDecode, validationStringency);
} else if (isGzippedSAMFile(bufferedStream)) {
mIsBinary = false;
mReader = new SAMTextReader(new GZIPInputStream(bufferedStream), validationStringency);
} else if (isSAMFile(bufferedStream)) {
mIsBinary = false;
mReader = new SAMTextReader(bufferedStream, validationStringency);
} else {
throw new SAMFormatException("Unrecognized file format");
}
setValidationStringency(validationStringency);
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
private void init(final SeekableStream strm, final File indexFile, final boolean eagerDecode,
......@@ -489,21 +469,26 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
}
private void init(final File file, File indexFile, final boolean eagerDecode, final ValidationStringency validationStringency) {
private void init(final InputStream stream, final File file, File indexFile, final boolean eagerDecode, final ValidationStringency validationStringency) {
if (stream != null && file != null) throw new IllegalArgumentException("stream and file are mutually exclusive");
this.samFile = file;
try {
final BufferedInputStream bufferedStream = new BufferedInputStream(new FileInputStream(file));
final BufferedInputStream bufferedStream;
if (file != null) bufferedStream = new BufferedInputStream(new FileInputStream(file));
else bufferedStream = IOUtil.toBufferedStream(stream);
if (isBAMFile(bufferedStream)) {
mIsBinary = true;
if (!file.isFile()) {
if (file == null || !file.isFile()) {
// Handle case in which file is a named pipe, e.g. /dev/stdin or created by mkfifo
mReader = new BAMFileReader(bufferedStream, indexFile, eagerDecode, validationStringency);
} else {
bufferedStream.close();
final BAMFileReader reader = new BAMFileReader(file, indexFile, eagerDecode, validationStringency);
mReader = reader;
mReader = new BAMFileReader(file, indexFile, eagerDecode, validationStringency);
}
} else if (BlockCompressedInputStream.isValidFile(bufferedStream)) {
mIsBinary = false;
mReader = new SAMTextReader(new BlockCompressedInputStream(bufferedStream), validationStringency);
} else if (isGzippedSAMFile(bufferedStream)) {
mIsBinary = false;
mReader = new SAMTextReader(new GZIPInputStream(bufferedStream), validationStringency);
......@@ -531,7 +516,30 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
*/
private boolean isBAMFile(final InputStream stream)
throws IOException {
return BlockCompressedInputStream.isValidFile(stream);
if (!BlockCompressedInputStream.isValidFile(stream)) {
return false;
}
final int buffSize = BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE;
stream.mark(buffSize);
final byte[] buffer = new byte[buffSize];
readBytes(stream, buffer, 0, buffSize);
stream.reset();
final byte[] magicBuf = new byte[4];
final int magicLength = readBytes(new BlockCompressedInputStream(new ByteArrayInputStream(buffer)), magicBuf, 0, 4);
return magicLength == BAMFileConstants.BAM_MAGIC.length && Arrays.equals(BAMFileConstants.BAM_MAGIC, magicBuf);
}
private static int readBytes(final InputStream stream, final byte[] buffer, final int offset, final int length)
throws IOException {
int bytesRead = 0;
while (bytesRead < length) {
final int count = stream.read(buffer, offset + bytesRead, length - bytesRead);
if (count <= 0) {
break;
}
bytesRead += count;
}
return bytesRead;
}
/**
......
/*
* The MIT License
*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package net.sf.samtools;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
public class SAMFileReaderTest {
private static final File TEST_DATA_DIR = new File("testdata/net/sf/samtools");
@Test(dataProvider = "variousFormatReaderTestCases")
public void variousFormatReaderTest(final String inputFile) {
final File input = new File(TEST_DATA_DIR, inputFile);
final SAMFileReader reader = new SAMFileReader(input);
for (final SAMRecord rec: reader) {}
reader.close();
}
@DataProvider(name = "variousFormatReaderTestCases")
public Object[][] variousFormatReaderTestCases() {
final Object[][] scenarios = new Object[][] {
{ "block_compressed.sam.gz"},
{ "uncompressed.sam"},
{ "compressed.sam.gz"},
{ "compressed.bam"},
};
return scenarios;
}
}
文件已添加
文件已添加
文件已添加
@HD VN:1.0 SO:unsorted
@SQ SN:chr1 LN:101
@SQ SN:chr2 LN:101
@SQ SN:chr3 LN:101
@RG ID:0 SM:Hi,Mom!
A 73 chr2 1 255 10M * 0 0 CAACAGAAGC )'.*.+2,)) RG:Z:0
A 133 * 0 0 * chr2 1 0 CAACAGAAGC )'.*.+2,)) RG:Z:0
B 99 chr1 1 255 10M = 26 35 CAACAGAAGC )'.*.+2,)) RG:Z:0
B 147 chr1 26 255 10M = 1 -35 CAACAGAAGC )'.*.+2,)) RG:Z:0
C 99 chr2 1 255 10M = 26 35 CAACAGAAGC )'.*.+2,)) RG:Z:0
C 147 chr2 26 255 10M = 1 -35 CAACAGAAGC )'.*.+2,)) RG:Z:0
D 99 chr3 1 255 10M = 25 35 CAACAGAAGC )'.*.+2,)) RG:Z:0
D 147 chr3 26 255 10M = 1 -35 CAACAGAAGC )'.*.+2,)) RG:Z:0
E 99 chr1 2 255 10M = 15 30 CAACAGAAGC )'.*.+2,)) RG:Z:0
E 147 chr1 15 255 10M = 2 -30 CAACAGAAGC )'.*.+2,)) RG:Z:0
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册