Skip to content
代码片段 群组 项目
提交 4f1be7c4 编辑于 作者: Martha Borkan's avatar Martha Borkan
浏览文件

Improve interface to bam index metaData - public getMetaData in AbstractBamFileIndex.

上级 0dc96692
标签
无相关合并请求
......@@ -23,7 +23,6 @@
*/
package net.sf.samtools;
import net.sf.samtools.util.BlockCompressedFilePointerUtil;
import net.sf.samtools.util.RuntimeIOException;
import java.io.*;
......@@ -202,14 +201,31 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
return lastLinearIndexPointer;
}
/**
* Can only be called once all other references have been read, before the file is closed
/**
* Return meta data for the given reference including information about number of aligned, unaligned, and noCoordinate records
* @param reference the reference of interest
* @return meta data for the reference
*/
public BAMIndexMetaData getMetaData(int reference) {
BAMIndexContent content = query(reference, 0, -1); // todo: it would be faster just to skip to the last bin
return content.getMetaData();
}
/**
* Returns count of records unassociated with any reference. Call before the index file is closed
*
* @return meta data at the end of the bam index that indicates count of records holding no coordinates
* or null if no meta data (old index format)
*/
Long getNoCoordinateCount(){
public Long getNoCoordinateCount() {
seek(4);
final int sequenceCount = readInteger();
skipToSequence(sequenceCount);
try { // in case of old index file without meta data
return readLong();
} catch (Exception e){
return readLong();
} catch (Exception e) {
return null;
}
}
......@@ -217,8 +233,6 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
protected BAMIndexContent query(final int referenceSequence, final int startPos, final int endPos) {
seek(4);
Bin[] bins = null;
LinearIndex linearIndex = null;
List<Chunk> metaDataChunks = new ArrayList<Chunk>();
final int sequenceCount = readInteger();
......@@ -236,7 +250,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
int binCount = readInteger();
boolean metaDataSeen = false;
bins = new Bin[getMaxBinNumberForReference(referenceSequence) +1];
Bin[] bins = new Bin[getMaxBinNumberForReference(referenceSequence) +1];
for (int binNumber = 0; binNumber < binCount; binNumber++) {
final int indexBin = readInteger();
final int nChunks = readInteger();
......@@ -267,13 +281,13 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
Bin bin = new Bin(referenceSequence, indexBin);
bin.setChunkList(chunks);
bin.setLastChunk(lastChunk);
bins[bin.getBinNumber()] = bin;
bins[indexBin] = bin;
}
final int nLinearBins = readInteger();
final int regionLinearBinStart = LinearIndex.convertToLinearIndexOffset(startPos);
final int regionLinearBinStop = LinearIndex.convertToLinearIndexOffset(endPos)>0 ? LinearIndex.convertToLinearIndexOffset(endPos) : nLinearBins-1;
final int regionLinearBinStop = endPos > 0 ? LinearIndex.convertToLinearIndexOffset(endPos) : nLinearBins-1;
final int actualStop = Math.min(regionLinearBinStop, nLinearBins -1);
long[] linearIndexEntries = new long[0];
......@@ -284,7 +298,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
linearIndexEntries[linearBin-regionLinearBinStart] = readLong();
}
linearIndex = new LinearIndex(referenceSequence,regionLinearBinStart,linearIndexEntries);
final LinearIndex linearIndex = new LinearIndex(referenceSequence,regionLinearBinStart,linearIndexEntries);
return new BAMIndexContent(referenceSequence, bins, binCount - (metaDataSeen? 1 : 0), new BAMIndexMetaData(metaDataChunks), linearIndex);
}
......@@ -351,7 +365,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
final List<Chunk> result = new ArrayList<Chunk>();
for (final Chunk chunk : chunks) {
if (chunk.getChunkEnd() <= minimumOffset) {
continue;
continue; // linear index optimization
}
if (result.isEmpty()) {
result.add(chunk);
......@@ -408,4 +422,4 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
private void seek(final int position) {
mFileBuffer.position(position);
}
}
\ No newline at end of file
}
......@@ -49,5 +49,12 @@ public interface BAMIndex {
* Gets the start of the last linear bin in the index.
* @return The chunk indicating the start of the last bin in the linear index.
*/
long getStartOfLastLinearBin();
long getStartOfLastLinearBin();
/**
* Gets meta data for the given reference including information about number of aligned, unaligned, and noCoordinate records
* @param reference the reference of interest
* @return meta data for the reference
*/
public BAMIndexMetaData getMetaData(int reference);
}
......@@ -101,13 +101,6 @@ public class BAMIndexMetaData {
return unAlignedRecords;
}
/**
* @return the count of records with no coordinate information in the bam file
*/
public long getNoCoordinateRecordCount() {
return noCoordinateRecords;
}
/**
* Call for each new reference sequence encountered
*/
......@@ -160,12 +153,22 @@ public class BAMIndexMetaData {
}
/**
* Call whenever a reference with no coordinate information is encountered in the bam file
* Set local variable. Normally noCoordinateRecord count accessed from AbstractBAMFileIndex when reading
*/
void setNoCoordinateRecordCount(long count) {
private void setNoCoordinateRecordCount(long count) {
noCoordinateRecords = count;
}
/**
* @return the count of records with no coordinate information in the bam file.
* Not public, since only used by BAMIndexer when writing bam index.
* Readers of bam index should use AbstractBAMFileIndex.getNoCoordinateRecordCount.
*/
long getNoCoordinateRecordCount() {
return noCoordinateRecords;
}
/**
* @return the first virtual file offset used by this reference
*/
......@@ -218,21 +221,22 @@ public class BAMIndexMetaData {
* Statistics include count of aligned and unaligned reads for each reference sequence
* and a count of all records with no start coordinate
*/
static public BAMIndexMetaData[] getIndexStats(final BAMFileReader bam) {
static public BAMIndexMetaData[] getIndexStats(final BAMFileReader bam){
AbstractBAMFileIndex index = (AbstractBAMFileIndex) bam.getIndex();
// read through all the bins of every reference.
int nRefs = index.getNumberOfReferences();
BAMIndexMetaData[] result = new BAMIndexMetaData[nRefs == 0 ? 1 : nRefs];
for (int i = 0; i < nRefs; i++) {
BAMIndexContent content = index.query(i, 0, -1); // todo: it would be faster just to skip to the last bin
result[i] = content.getMetaData();
result[i] = index.getMetaData(i);
}
if (result[0] == null){
result[0] = new BAMIndexMetaData();
}
result[0].setNoCoordinateRecordCount(index.getNoCoordinateCount());
final Long noCoordCount = index.getNoCoordinateCount();
if (noCoordCount != null) // null in old index files without metadata
result[0].setNoCoordinateRecordCount(noCoordCount);
return result;
}
......
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册