Skip to content

Commit 8242189

Browse files
authored
Implemented knucleotide in D (hanabi1224#355)
Code based on 8-m.rs and 1.zig codes with D analogues.
1 parent d454527 commit 8242189

File tree

2 files changed

+212
-0
lines changed

2 files changed

+212
-0
lines changed

bench/algorithm/knucleotide/1-m.d

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
// port from 8-m.rs and 1.zig
2+
import std;
3+
import std.outbuffer : OutBuffer;
4+
5+
alias Map = uint[Nullable!Code];
6+
static double hundreed = 100.0;
7+
8+
static struct Code
9+
{
10+
ulong data;
11+
12+
void push(ubyte c, ulong mask)
13+
{
14+
data = ((data << 2) | cast(ulong) c) & mask;
15+
}
16+
17+
static Nullable!Code fromStr(ubyte[] s)
18+
{
19+
auto mask = Code.makeMask(s.length);
20+
auto res = Code(0);
21+
foreach (c; s)
22+
res.push(Code.encodeByte(c), mask);
23+
return nullable(res);
24+
}
25+
26+
string toStr(size_t frame)
27+
{
28+
char[] res;
29+
auto code = this.data;
30+
ubyte c;
31+
foreach (_; 0 .. frame)
32+
{
33+
switch (cast(ubyte) code & 0b11)
34+
{
35+
case Code.encodeByte('A'):
36+
c = 'A';
37+
break;
38+
case Code.encodeByte('C'):
39+
c = 'C';
40+
break;
41+
case Code.encodeByte('G'):
42+
c = 'G';
43+
break;
44+
case Code.encodeByte('T'):
45+
c = 'T';
46+
break;
47+
default:
48+
break;
49+
}
50+
res ~= c;
51+
code >>= 2;
52+
}
53+
return cast(string) res.reverse;
54+
}
55+
56+
pragma(inline, true)
57+
static ulong makeMask(size_t frame)
58+
{
59+
return (1L << (2 * frame)) - 1L;
60+
}
61+
62+
pragma(inline, true)
63+
static ubyte encodeByte(ubyte c)
64+
{
65+
return (c >> 1) & 0b11;
66+
}
67+
}
68+
69+
static struct CodeRange
70+
{
71+
size_t i = 0;
72+
ubyte[] input;
73+
Nullable!Code code;
74+
ulong mask;
75+
76+
bool empty() {
77+
return this.i >= this.input.length;
78+
}
79+
80+
Nullable!Code front() {
81+
const c = this.input[this.i];
82+
this.code.get.push(c, this.mask);
83+
return this.code;
84+
}
85+
86+
void popFront() {
87+
this.i += 1;
88+
}
89+
90+
this(ubyte[] input, size_t frame)
91+
{
92+
const mask = Code.makeMask(frame);
93+
Nullable!Code tmpCode = Code(0);
94+
foreach (c; input[0 .. frame - 1])
95+
tmpCode.get.push(c, mask);
96+
this.mask = mask;
97+
this.code = tmpCode;
98+
this.input = input[frame - 1 .. $];
99+
}
100+
}
101+
102+
Map genMap(Tuple!(ubyte[], size_t) t)
103+
{
104+
Map myMap;
105+
foreach(code; CodeRange(t[0], t[1]))
106+
{
107+
myMap.update(code,
108+
() => 1,
109+
(ref uint v) { v += 1; });
110+
}
111+
return myMap;
112+
}
113+
114+
struct CountCode
115+
{
116+
ulong count;
117+
Nullable!Code code;
118+
}
119+
120+
void printMap(size_t self, Map myMap, ref OutBuffer buf)
121+
{
122+
CountCode[] v;
123+
ulong total;
124+
uint count;
125+
foreach (pair; myMap.byPair)
126+
{
127+
total += pair.value;
128+
v ~= CountCode(pair.value, pair.key);
129+
}
130+
alias asc = (a, b) =>
131+
a.count < b.count ||
132+
(a.count == b.count && b.code.get.data < a.code.get.data);
133+
134+
v.sort!(asc);
135+
136+
foreach (i; iota(cast(int)(v.length) - 1, -1, -1))
137+
{
138+
auto cc = v[i];
139+
buf.writefln("%s %.3f", cc.code.get.toStr(self), cast(double) cc.count / cast(
140+
double) total * hundreed);
141+
}
142+
buf.write("\n");
143+
}
144+
145+
void printOcc(ubyte[] s, ref Map myMap, ref OutBuffer buf)
146+
{
147+
auto tmp = Code.fromStr(s);
148+
buf.writefln("%d\t%s", myMap.get(tmp, 0), cast(string) s);
149+
}
150+
151+
ubyte[] readInput(string[] args)
152+
{
153+
immutable fileName = args.length > 1 ? args[1] : "25000_in";
154+
char key = '>';
155+
ubyte[] res;
156+
auto app = appender(&res);
157+
app.reserve(65_536);
158+
auto file = File(args[1]);
159+
byte x = 3;
160+
foreach (line; file.byLine())
161+
{
162+
if (line[0] == key)
163+
x--;
164+
else
165+
continue;
166+
if (x == 0)
167+
break;
168+
}
169+
foreach (line; file.byChunk(61))
170+
{
171+
app ~= line[0..$-1].map!(a => Code.encodeByte(a));
172+
}
173+
174+
return res;
175+
}
176+
177+
void main(string[] args)
178+
{
179+
auto buf1 = new OutBuffer();
180+
auto buf2 = new OutBuffer();
181+
182+
static ubyte[][5] occs = [
183+
cast(ubyte[]) "GGTATTTTAATTTATAGT",
184+
cast(ubyte[]) "GGTATTTTAATT",
185+
cast(ubyte[]) "GGTATT",
186+
cast(ubyte[]) "GGTA",
187+
cast(ubyte[]) "GGT",
188+
];
189+
auto input = readInput(args);
190+
191+
alias myTaskType = Task!(run, uint[Nullable!(Code)]function(Tuple!(ubyte[], ulong)), Tuple!(ubyte[], ulong))*;
192+
myTaskType[] calls;
193+
foreach (i; 0 .. occs.length)
194+
{
195+
auto t = task(&genMap, tuple(input, occs[i].length));
196+
t.executeInNewThread();
197+
calls ~= t;
198+
}
199+
200+
printMap(1, genMap(tuple(input, 1UL)), buf1);
201+
printMap(2, genMap(tuple(input, 2UL)), buf1);
202+
203+
foreach (i; iota(4, -1, -1))
204+
{
205+
printOcc(occs[i], calls[i].yieldForce(), buf2);
206+
}
207+
write(buf1);
208+
write(buf2);
209+
}

bench/bench_d.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ problems:
5151
- 1-im.d
5252
- 2-i.d
5353
- 2-im.d
54+
- name: knucleotide
55+
source:
56+
- 1-m.d
5457
compiler_version_command:
5558
compiler_version_regex:
5659
runtime_version_parameter:

0 commit comments

Comments
 (0)