Skip to content
GitLab
探索
登录
主导航
搜索或转到…
项目
A
allhic
管理
动态
成员
标记
计划
议题
0
议题看板
里程碑
迭代
Wiki
代码
合并请求
0
仓库
分支
提交
标签
仓库图
比较修订版本
代码片段
锁定的文件
构建
流水线
作业
流水线计划
产物
部署
发布
软件包库
运维
环境
Terraform 模块
监控
事件
服务台
分析
价值流分析
Contributor analytics
CI/CD 分析
仓库分析
代码评审分析
议题分析
模型实验
帮助
帮助
支持
GitLab 文档
比较 GitLab 各版本
社区论坛
为极狐GitLab 提交贡献
提交反馈
快捷键
?
支持
扫描加入微信群:
1. 获取企业级DevOps解决方案支持
2. 免费或折扣极狐GitLab 官方培训认证
代码片段
群组
项目
HPCSource
allhic
提交
e16bbfa2
提交
e16bbfa2
编辑于
6年前
作者:
Haibao Tang
浏览文件
操作
下载
补丁
差异文件
[optimize] Simplify optimize interface
上级
82abd1b5
无相关合并请求
变更
6
隐藏空白变更内容
行内
左右并排
显示
6 个更改的文件
README.md
+4
-2
4 个添加, 2 个删除
README.md
clm.go
+11
-11
11 个添加, 11 个删除
clm.go
cmd/allhic.go
+2
-5
2 个添加, 5 个删除
cmd/allhic.go
extract.go
+24
-13
24 个添加, 13 个删除
extract.go
optimize.go
+11
-14
11 个添加, 14 个删除
optimize.go
partition.go
+14
-0
14 个添加, 0 个删除
partition.go
有
66 个添加
和
45 个删除
README.md
+
4
−
2
浏览文件 @
e16bbfa2
...
...
@@ -65,7 +65,8 @@ Optimize uses Genetic Algorithm (GA) to search for the best scoring solution. GA

```
console
allhic optimize tests/test.counts_GATC.txt tests/test.clm
allhic optimize tests/test.counts_GATC.g0.txt tests/test.clm
allhic optimize tests/test.counts_GATC.g1.txt tests/test.clm
```
### <kbd>Build</kbd>
...
...
@@ -83,7 +84,8 @@ Following the 4 steps of `prune`, `extract`, `partition`, `optimize`
```
console
allhic extract T4_Chr1/{prunning.sub.bam,seq.fasta}
allhic partition T4_Chr1/{prunning.sub.counts_GATC.txt,prunning.sub.pairs.txt} 2
allhic optimize T4_Chr1/{prunning.sub.counts_GATC.txt,prunning.sub.clm}
allhic optimize T4_Chr1/{prunning.sub.counts_GATC.g0.txt,prunning.sub.clm}
allhic optimize T4_Chr1/{prunning.sub.counts_GATC.g1.txt,prunning.sub.clm}
allhic build T4_Chr/{prunning.sub.tour,seq.fasta}
```
...
...
This diff is collapsed.
Click to expand it.
clm.go
+
11
−
11
浏览文件 @
e16bbfa2
...
...
@@ -99,28 +99,28 @@ func NewCLM(Clmfile, REfile string) *CLM {
p
.
contacts
=
make
(
map
[
Pair
]
Contact
)
p
.
orientedContacts
=
make
(
map
[
OrientedPair
]
GArray
)
p
.
ParseIds
()
p
.
Parse
Clm
()
p
.
readRE
()
p
.
read
Clm
()
return
p
}
//
ParseIds
parses the idsfile into data stored in CLM.
//
readRE
parses the idsfile into data stored in CLM.
// IDS file has a list of contigs that need to be ordered. 'recover',
// keyword, if available in the third column, is less confident.
// tig00015093 46912
// tig00035238 46779 recover
// tig00030900 119291
func
(
r
*
CLM
)
ParseIds
()
{
func
(
r
*
CLM
)
readRE
()
{
file
,
_
:=
os
.
Open
(
r
.
REfile
)
log
.
Noticef
(
"Parse
ids
file `%s`"
,
r
.
REfile
)
log
.
Noticef
(
"Parse
RE
file `%s`"
,
r
.
REfile
)
scanner
:=
bufio
.
NewScanner
(
file
)
idx
:=
0
for
scanner
.
Scan
()
{
words
:=
strings
.
Fields
(
scanner
.
Text
())
tig
:=
words
[
0
]
size
,
_
:=
strconv
.
Atoi
(
words
[
len
(
words
)
-
1
])
r
.
Tigs
=
append
(
r
.
Tigs
,
&
TigF
{
idx
,
tig
,
size
,
fals
e
})
r
.
Tigs
=
append
(
r
.
Tigs
,
&
TigF
{
idx
,
tig
,
size
,
tru
e
})
r
.
tigToIdx
[
tig
]
=
idx
idx
++
}
...
...
@@ -134,8 +134,8 @@ func rr(b byte) byte {
return
'-'
}
//
Parse
ClmLines parses the clmfile into a slice of CLMLine
func
Parse
ClmLines
(
clmfile
string
)
[]
CLMLine
{
//
read
ClmLines parses the clmfile into a slice of CLMLine
func
read
ClmLines
(
clmfile
string
)
[]
CLMLine
{
file
,
_
:=
os
.
Open
(
clmfile
)
log
.
Noticef
(
"Parse clmfile `%s`"
,
clmfile
)
reader
:=
bufio
.
NewReader
(
file
)
...
...
@@ -172,9 +172,9 @@ func ParseClmLines(clmfile string) []CLMLine {
return
lines
}
//
Parse
Clm parses the clmfile into data stored in CLM.
func
(
r
*
CLM
)
Parse
Clm
()
{
lines
:=
Parse
ClmLines
(
r
.
Clmfile
)
//
read
Clm parses the clmfile into data stored in CLM.
func
(
r
*
CLM
)
read
Clm
()
{
lines
:=
read
ClmLines
(
r
.
Clmfile
)
for
_
,
line
:=
range
lines
{
// Make sure both contigs are in the ids file
ai
,
aok
:=
r
.
tigToIdx
[
line
.
at
]
...
...
This diff is collapsed.
Click to expand it.
cmd/allhic.go
+
2
−
5
浏览文件 @
e16bbfa2
...
...
@@ -154,7 +154,7 @@ can be generated with the "extract" sub-command.
Name
:
"optimize"
,
Usage
:
"Order-and-orient tigs in a group"
,
UsageText
:
`
allhic optimize counts_RE.txt clmfile
clusters.txt group_number
[options]
allhic optimize counts_RE.txt clmfile [options]
Optimize function:
Given a set of Hi-C contacts between contigs, as specified in the
...
...
@@ -196,15 +196,13 @@ on a cluster).
},
},
Action
:
func
(
c
*
cli
.
Context
)
error
{
if
len
(
c
.
Args
())
<
4
{
if
len
(
c
.
Args
())
<
2
{
cli
.
ShowSubcommandHelp
(
c
)
return
cli
.
NewExitError
(
"Must specify clmfile"
,
1
)
}
refile
:=
c
.
Args
()
.
Get
(
0
)
clmfile
:=
c
.
Args
()
.
Get
(
1
)
clustersfile
:=
c
.
Args
()
.
Get
(
2
)
group
,
_
:=
strconv
.
Atoi
(
c
.
Args
()
.
Get
(
3
))
runGA
:=
!
c
.
Bool
(
"skipGA"
)
resume
:=
c
.
Bool
(
"resume"
)
seed
:=
c
.
Int64
(
"seed"
)
...
...
@@ -212,7 +210,6 @@ on a cluster).
ngen
:=
c
.
Int
(
"ngen"
)
mutpb
:=
c
.
Float64
(
"mutpb"
)
p
:=
allhic
.
Optimizer
{
REfile
:
refile
,
Clmfile
:
clmfile
,
Clustersfile
:
clustersfile
,
Group
:
group
,
RunGA
:
runGA
,
Resume
:
resume
,
Seed
:
seed
,
NPop
:
npop
,
NGen
:
ngen
,
MutProb
:
mutpb
}
p
.
Run
()
...
...
This diff is collapsed.
Click to expand it.
extract.go
+
24
−
13
浏览文件 @
e16bbfa2
...
...
@@ -151,22 +151,35 @@ func (r *Extracter) makeModel(outfile string) {
r
.
model
=
m
}
// readFastaAndWriteRE writes out the number of restriction fragments, one per line
func
(
r
*
Extracter
)
readFastaAndWriteRE
()
{
outfile
:=
RemoveExt
(
r
.
Bamfile
)
+
".counts_"
+
r
.
RE
+
".txt"
reader
,
_
:=
fastx
.
NewDefaultReader
(
r
.
Fastafile
)
// writeRE write a RE file
func
writeRE
(
outfile
string
,
contigs
[]
*
ContigInfo
)
{
f
,
_
:=
os
.
Create
(
outfile
)
w
:=
bufio
.
NewWriter
(
f
)
defer
f
.
Close
()
totalCounts
:=
0
totalBp
:=
int64
(
0
)
r
.
contigs
=
[]
*
ContigInfo
{}
r
.
contigToIdx
=
map
[
string
]
int
{}
fmt
.
Fprintf
(
w
,
"#Contig
\t
RECounts
\t
Length
\n
"
)
for
_
,
contig
:=
range
contigs
{
totalCounts
+=
contig
.
recounts
totalBp
+=
int64
(
contig
.
length
)
fmt
.
Fprintf
(
w
,
"%s
\n
"
,
contig
)
}
w
.
Flush
()
log
.
Noticef
(
"RE counts (total: %d, avg 1 per %d bp) written to `%s`"
,
totalCounts
,
totalBp
/
int64
(
totalCounts
),
outfile
)
}
// readFastaAndWriteRE writes out the number of restriction fragments, one per line
func
(
r
*
Extracter
)
readFastaAndWriteRE
()
{
outfile
:=
RemoveExt
(
r
.
Bamfile
)
+
".counts_"
+
r
.
RE
+
".txt"
reader
,
_
:=
fastx
.
NewDefaultReader
(
r
.
Fastafile
)
seq
.
ValidateSeq
=
false
// This flag makes parsing FASTA much faster
r
.
contigs
=
[]
*
ContigInfo
{}
r
.
contigToIdx
=
map
[
string
]
int
{}
totalCounts
:=
0
totalBp
:=
int64
(
0
)
for
{
rec
,
err
:=
reader
.
Read
()
if
err
==
io
.
EOF
{
...
...
@@ -178,18 +191,16 @@ func (r *Extracter) readFastaAndWriteRE() {
length
:=
rec
.
Seq
.
Length
()
totalCounts
+=
count
totalBp
+=
int64
(
length
)
fmt
.
Fprintf
(
w
,
"%s
\t
%d
\t
%d
\n
"
,
name
,
count
,
length
)
contig
:=
&
ContigInfo
{
name
:
name
,
recounts
:
count
,
// To account for contigs with 0 RE sites
length
:
length
,
}
r
.
contigToIdx
[
name
]
=
len
(
r
.
contigs
)
r
.
contigs
=
append
(
r
.
contigs
,
contig
)
}
w
.
Flush
()
log
.
Noticef
(
"RE counts (total: %d, avg 1 per %d bp) written to `%s`"
,
totalCounts
,
totalBp
/
int64
(
totalCounts
),
outfile
)
writeRE
(
outfile
,
r
.
contigs
)
}
// calcIntraContigs determine the local enrichment of links on this contig.
...
...
@@ -207,7 +218,7 @@ func (r *Extracter) calcIntraContigs() {
// calcInterContigs calculates the MLE of distance between all contigs
func
(
r
*
Extracter
)
calcInterContigs
()
{
clmfile
:=
RemoveExt
(
r
.
Bamfile
)
+
".clm"
lines
:=
Parse
ClmLines
(
clmfile
)
lines
:=
read
ClmLines
(
clmfile
)
contigPairs
:=
make
(
map
[[
2
]
int
]
*
ContigPair
)
for
i
:=
0
;
i
<
len
(
lines
);
i
++
{
...
...
This diff is collapsed.
Click to expand it.
optimize.go
+
11
−
14
浏览文件 @
e16bbfa2
...
...
@@ -19,23 +19,21 @@ import (
// Optimizer runs the order-and-orientation procedure, given a clmfile
type
Optimizer
struct
{
Clmfile
string
REfile
string
Clustersfile
string
Group
int
RunGA
bool
Resume
bool
Seed
int64
NPop
int
NGen
int
MutProb
float64
CrossProb
float64
REfile
string
Clmfile
string
RunGA
bool
Resume
bool
Seed
int64
NPop
int
NGen
int
MutProb
float64
CrossProb
float64
}
// Run kicks off the Optimizer
func
(
r
*
Optimizer
)
Run
()
{
clm
:=
NewCLM
(
r
.
Clmfile
,
r
.
REfile
)
tourfile
:=
fmt
.
Sprintf
(
"%s.g%d.tour"
,
RemoveExt
(
r
.
Clm
file
)
,
r
.
Group
)
tourfile
:=
RemoveExt
(
r
.
RE
file
)
+
".tour"
// Load tourfile if it exists
if
_
,
err
:=
os
.
Stat
(
tourfile
);
r
.
Resume
&&
err
==
nil
{
...
...
@@ -45,8 +43,6 @@ func (r *Optimizer) Run() {
backupTourFile
:=
tourfile
+
".sav"
os
.
Rename
(
tourfile
,
backupTourFile
)
log
.
Noticef
(
"Backup `%s` to `%s`"
,
tourfile
,
backupTourFile
)
}
else
{
clm
.
parseClustersFile
(
r
.
Clustersfile
,
r
.
Group
)
}
shuffle
:=
false
// If one wants randomized initialization, set this to true
...
...
@@ -71,6 +67,7 @@ func (r *Optimizer) Run() {
break
}
}
clm
.
printTour
(
os
.
Stdout
,
clm
.
Tour
,
"FINAL"
)
log
.
Notice
(
"Success"
)
}
...
...
This diff is collapsed.
Click to expand it.
partition.go
+
14
−
0
浏览文件 @
e16bbfa2
...
...
@@ -39,6 +39,7 @@ func (r *Partitioner) Run() {
r
.
Cluster
()
}
r
.
printClusters
()
r
.
splitRE
()
log
.
Notice
(
"Success"
)
}
...
...
@@ -183,6 +184,19 @@ func (r *Partitioner) readRE() {
len
(
r
.
contigs
),
r
.
Contigsfile
)
}
// splitRE reads in a three-column tab-separated file
// #Contig REcounts Length
func
(
r
*
Partitioner
)
splitRE
()
{
for
j
,
cl
:=
range
r
.
clusters
{
contigs
:=
[]
*
ContigInfo
{}
for
_
,
idx
:=
range
cl
{
contigs
=
append
(
contigs
,
r
.
contigs
[
idx
])
}
outfile
:=
fmt
.
Sprintf
(
"%s.g%d.txt"
,
RemoveExt
(
r
.
Contigsfile
),
j
)
writeRE
(
outfile
,
contigs
)
}
}
// parseDist imports the edges of the contig into a slice of DistLine
// DistLine stores the data structure of the distfile
// #X Y Contig1 Contig2 RE1 RE2 ObservedLinks ExpectedLinksIfAdjacent
...
...
This diff is collapsed.
Click to expand it.
预览
0%
请重试
或
添加新附件
.
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
保存评论
取消
想要评论请
注册
或
登录