Skip to content

Commit d447e9a

Browse files
johnmayegonw
authored andcommitted
Eliminate commons-math3 dependency by reimplementing the MersenneTwister logic. This was only used for a over-complicated PRNG used in the shortest path fingerprinter. A simpler linear-feedback shift PRNG would work just as well here but we want to keep backwards compatbility. Note this fingerprint it's really very good since the fingerprints are non-transativie and can't be used for substructure screening.
1 parent ff49a24 commit d447e9a

File tree

2 files changed

+141
-27
lines changed

2 files changed

+141
-27
lines changed

descriptor/fingerprint/pom.xml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,6 @@
1818
<groupId>javax.vecmath</groupId>
1919
<artifactId>vecmath</artifactId>
2020
</dependency>
21-
<dependency>
22-
<groupId>org.apache.commons</groupId>
23-
<artifactId>commons-math3</artifactId>
24-
<version>3.6.1</version>
25-
</dependency>
2621
<dependency>
2722
<groupId>org.hamcrest</groupId>
2823
<artifactId>hamcrest</artifactId>

descriptor/fingerprint/src/main/java/org/openscience/cdk/fingerprint/RandomNumber.java

Lines changed: 141 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,38 +21,157 @@
2121
* You should have received a copy of the GNU Lesser General Public License
2222
* along with this program; if not, write to the Free Software
2323
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24+
*
25+
* Original algorithm from commons-math3.
26+
*
27+
* Licensed to the Apache Software Foundation (ASF) under one or more
28+
* contributor license agreements. See the NOTICE file distributed with
29+
* this work for additional information regarding copyright ownership.
30+
* The ASF licenses this file to You under the Apache License, Version 2.0
31+
* (the "License"); you may not use this file except in compliance with
32+
* the License. You may obtain a copy of the License at
33+
*
34+
* http://www.apache.org/licenses/LICENSE-2.0
35+
*
36+
* Unless required by applicable law or agreed to in writing, software
37+
* distributed under the License is distributed on an "AS IS" BASIS,
38+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
39+
* See the License for the specific language governing permissions and
40+
* limitations under the License.
2441
*/
2542
package org.openscience.cdk.fingerprint;
2643

2744
import java.io.Serializable;
28-
import org.apache.commons.math3.random.MersenneTwister;
29-
import org.apache.commons.math3.random.RandomAdaptor;
30-
import org.apache.commons.math3.random.RandomGenerator;
3145

3246
/**
33-
* Generates pseudorandom numbers using the MersenneTwister method from commons-math.
47+
* Generates pseudorandom numbers using the MersenneTwister adapted from commons-math3.
48+
* Previously we included commons-math3 as a dependency but since this was the only usage
49+
* we have reimplemented the algorithm here.<br/>
50+
*
51+
* Note - since chemical fingerprints don't need to be secure a very simple PRNG
52+
* like XorShift would be simpler and faster:
53+
*
54+
* <pre>{@code
55+
* long xorshift64(long seed) {
56+
* seed = seed ^ seed << 21;
57+
* seed = seed ^ seed >>> 35;
58+
* return seed ^ seed << 4;
59+
* }
60+
* }</pre>
3461
*
3562
* @author Syed Asad Rahman (2012)
63+
* @author John Mayfield
3664
* @cdk.keyword fingerprint
3765
* @cdk.keyword similarity
38-
* @cdk.module fingerprint
39-
* @cdk.githash
4066
*/
41-
public class RandomNumber implements Serializable {
42-
43-
private static final long serialVersionUID = 23345464573453571L;
44-
45-
private transient final RandomGenerator rg = new RandomAdaptor(new MersenneTwister());
46-
47-
/**
48-
* Mersenne Twister Random Number for a hashcode within a range between 0 to n.
49-
*
50-
* @param n the maximum value the
51-
* @param seed the seed for the next pseudorandom number
52-
* @return next pseudorandom number
53-
*/
54-
public int generateMersenneTwisterRandomNumber(int n, long seed) {
55-
rg.setSeed(seed);
56-
return rg.nextInt(n);
67+
final class RandomNumber implements Serializable {
68+
69+
private static final int N = 624;
70+
private static final int M = 397;
71+
private static final int[] MAG01 = new int[]{0, -1727483681};
72+
private int[] mt = new int[624];
73+
private int mti;
74+
75+
public RandomNumber() {
76+
}
77+
78+
private void setSeed(int seed) {
79+
int[] seeds = new int[]{0,seed};
80+
long longMT = 19650218;
81+
this.mt[0] = (int) longMT;
82+
83+
for (this.mti = 1; this.mti < 624; ++this.mti) {
84+
longMT = 1812433253L * (longMT ^ longMT >> 30) + (long) this.mti & 4294967295L;
85+
this.mt[this.mti] = (int) longMT;
86+
}
87+
88+
int i = 1;
89+
int j = 0;
90+
91+
int k;
92+
long l0;
93+
long l1;
94+
long l;
95+
for (k = 624; k != 0; --k) {
96+
l0 = (long) this.mt[i] & 2147483647L | (this.mt[i] < 0 ? 2147483648L : 0L);
97+
l1 = (long) this.mt[i - 1] & 2147483647L | (this.mt[i - 1] < 0 ? 2147483648L : 0L);
98+
l = (l0 ^ (l1 ^ l1 >> 30) * 1664525L) + (long) seeds[j] + (long) j;
99+
this.mt[i] = (int) (l & 4294967295L);
100+
++i;
101+
++j;
102+
if (i >= 624) {
103+
this.mt[0] = this.mt[623];
104+
i = 1;
105+
}
106+
107+
if (j >= 2) {
108+
j = 0;
109+
}
110+
}
111+
112+
for (k = 623; k != 0; --k) {
113+
l0 = (long) this.mt[i] & 2147483647L | (this.mt[i] < 0 ? 2147483648L : 0L);
114+
l1 = (long) this.mt[i - 1] & 2147483647L | (this.mt[i - 1] < 0 ? 2147483648L : 0L);
115+
l = (l0 ^ (l1 ^ l1 >> 30) * 1566083941L) - (long) i;
116+
this.mt[i] = (int) (l & 4294967295L);
117+
++i;
118+
if (i >= 624) {
119+
this.mt[0] = this.mt[623];
120+
i = 1;
121+
}
122+
}
123+
124+
this.mt[0] = -2147483648;
125+
}
126+
127+
private int next(int bits) {
128+
int y;
129+
if (this.mti >= 624) {
130+
int mtNext = this.mt[0];
131+
132+
int k;
133+
int mtCurr;
134+
for (k = 0; k < 227; ++k) {
135+
mtCurr = mtNext;
136+
mtNext = this.mt[k + 1];
137+
y = mtCurr & -2147483648 | mtNext & 2147483647;
138+
this.mt[k] = this.mt[k + 397] ^ y >>> 1 ^ MAG01[y & 1];
139+
}
140+
141+
for (k = 227; k < 623; ++k) {
142+
mtCurr = mtNext;
143+
mtNext = this.mt[k + 1];
144+
y = mtCurr & -2147483648 | mtNext & 2147483647;
145+
this.mt[k] = this.mt[k + -227] ^ y >>> 1 ^ MAG01[y & 1];
146+
}
147+
148+
y = mtNext & -2147483648 | this.mt[0] & 2147483647;
149+
this.mt[623] = this.mt[396] ^ y >>> 1 ^ MAG01[y & 1];
150+
this.mti = 0;
151+
}
152+
153+
y = this.mt[this.mti++];
154+
y ^= y >>> 11;
155+
y ^= y << 7 & -1658038656;
156+
y ^= y << 15 & -272236544;
157+
y ^= y >>> 18;
158+
return y >>> 32 - bits;
159+
}
160+
161+
public int generateMersenneTwisterRandomNumber(int n, int seed) {
162+
setSeed(seed);
163+
164+
if ((n & -n) == n) {
165+
return (int) ((long) n * (long) this.next(31) >> 31);
166+
} else {
167+
int bits;
168+
int val;
169+
do {
170+
bits = this.next(31);
171+
val = bits % n;
172+
} while (bits - val + (n - 1) < 0);
173+
174+
return val;
175+
}
57176
}
58177
}

0 commit comments

Comments
 (0)