By leonardo maffi
V.1.1, April 26 2009
Keywords: programming, Python, Psyco, D language, C, gcc, llvm
[Go back to the article index]
This is a little problem from the Ebook "Data Structures in Java, for the Principled
Programmer", page 191:
http://www.cs.williams.edu/~bailey/Java

# two_towers1_py.py
from sys import argv
from itertools import combinations
def main(n):
sqrs = [n ** 0.5 for n in xrange(1, n+1)]
half = sum(sqrs) / 2
sol = min((abs(sum(sub)-half), sub)
for nit in xrange(1, n+1)
for sub in combinations(sqrs, nit))[1]
print "err:", abs(sum(sol) - sum(set(sqrs).difference(sol)))
print "n:", n, [int(round(x * x)) for x in sol]
n = int(argv[1]) if len(argv) == 2 else 10
main(n)// two_towers1_d.d
import std.math: sqrt, abs, round;
import std.conv, d.func, d.comb;
import d.sets: set;
void main(string[] argv) {
int n = argv.length == 2 ? toInt(argv[1]) : 10;
auto sqrs = map((double d){ return sqrt(d); }, xrange(1, n+1));
double half = sum(sqrs) / 2;
auto sol = min(xsubsets(sqrs), (double[] s){ return abs(sum(s)-half); });
putr("err: ", abs(sum(sol) - sum(set(sqrs).difference(sol))));
putr("n: ", n, " ", map((double x) { return cast(int)round(x * x); }, sol) );
}>>> from math import sqrt >>> a = [1, 2, 4, 5, 6, 9, 11, 16, 17, 21, 23, 24, 25] >>> b = [3, 7, 8, 10, 12, 13, 14, 15, 18, 19, 20, 22] >>> da = sum(sqrt(x) for x in a) >>> db = sum(sqrt(x) for x in b) >>> da, db (42.816888402464038, 42.816891872614107) >>> abs(da - db) 3.4701500695177856e-06It's not difficult to write a much faster D version (that uses just Phobos), that is lower level compared to the first D one, just a bit higher-level than C code:
// two_towers2_d.d
import std.math: sqrt, abs, round;
import std.conv: toInt;
import std.stdio: writefln;
int[] finder(int n) {
double data[60];
double target = 0.0;
for (int i = 1; i <= n; i++) {
data[i - 1] = sqrt(cast(double)i);
target += data[i - 1];
}
target /= 2;
uint index_min;
double current_min = double.max;
for (uint index = 0; index < (1 << n); index++) {
auto i = index;
double current_tot = 0.0;
uint position;
while (i) {
if (i & 1)
current_tot += data[position];
i >>= 1;
position++;
}
double current_diff = abs(current_tot - target);
if (current_diff < current_min) {
current_min = current_diff;
index_min = index;
}
}
int[] result;
uint position;
while (index_min) {
if (index_min & 1)
result ~= position + 1;
index_min >>= 1;
position++;
}
return result;
}
void main(string[] argv) {
int n = argv.length == 2 ? toInt(argv[1]) : 10;
auto solution = finder(n);
double tot1 = 0.0;
double tot2 = 0.0;
for (int i = 1; i <= n; i++) {
bool found = false;
foreach (j; solution)
if (i == j)
found = true;
if (found)
tot1 += sqrt(cast(double)i);
else
tot2 += sqrt(cast(double)i);
}
double err = abs(tot1 - tot2);
writefln("err: %.16f", err);
writefln("n: ", n, " ", solution);
}// two_towers1_c.c
#include "math.h"
#include "stdio.h"
#include "stdlib.h"
int main(int argc, char** argv) {
int n = argc == 2 ? atoi(argv[1]) : 10;
double data[60];
unsigned int i;
for (i = 0; i < 60; i++)
data[i] = 0.0;
double target = 0.0;
for (i = 0; i < n; i++) {
data[i] = sqrt((double)(i + 1));
target += data[i];
}
target /= 2;
unsigned int index_min = 0;
double min_difference = 1e30;
unsigned int index;
for (index = 0; index < (1 << n); index++) {
i = index;
double current_tot = 0.0;
unsigned int position = 0;
while (i) {
if (i & 1)
current_tot += data[position];
i >>= 1;
position++;
}
double current_diff = fabs(current_tot - target);
if (current_diff < min_difference) {
min_difference = current_diff;
index_min = index;
}
}
double tot1 = 0.0;
double tot2 = 0.0;
for (i = 0; i < n; i++)
if (index_min & (1 << i))
tot1 += sqrt((double)(i + 1));
else
tot2 += sqrt((double)(i + 1));
double err = fabs(tot1 - tot2);
printf("err: %.16f\n", err);
printf("n: %d ", n);
unsigned int position = 0;
while (index_min) {
if (index_min & 1)
printf("%d ", position + 1);
index_min >>= 1;
position++;
}
printf("\n");
return 0;
}LBB1_10: testl %esi, %esi jne LBB1_20 LBB1_11: pxor %xmm0, %xmm0 movapd %xmm0, %xmm2 jmp LBB1_24 LBB1_12: testl $1, %ecx je LBB1_14 LBB1_13: addsd (%edx), %xmm1 LBB1_14: addl $8, %edx shrl %ecx testl %ecx, %ecx jne LBB1_12 LBB1_15: subsd %xmm0, %xmm1 andpd LCPI1_2, %xmm1 ucomisd %xmm1, %xmm2 cmova %eax, %edi incl %eax cmpl %ebx, %eax minsd %xmm2, %xmm1 je LBB1_10
// gcc_approx.c
#include "math.h"
#include "stdio.h"
#define N 20
int main() {
double data[60];
int i;
double target = 0.0;
for (i = 0; i < N; i++) {
data[i] = sqrt((double)i + 1);
target += data[i];
}
target /= 2;
double min_difference = 1e30;
unsigned int index;
for (index = 0; index < (1 << N); index++) {
unsigned int i = index;
double current_tot = 0.0;
unsigned int position = 0;
while (i) {
if (i & 1)
current_tot += data[position];
i >>= 1;
position++;
}
double current_diff = fabs(current_tot - target);
if (current_diff < min_difference) {
min_difference = current_diff;
printf("%.18f\n", min_difference);
}
}
return 0;
}0 1 11 10 110 111 101 100 1100 1101 1111 1110 1010 1011 1001 1000 11000 11001 11011 ...
import sys, psyco
def main(n):
sqrs = [n ** 0.5 for n in xrange(1, n+1)]
half = sum(sqrs) / 2
tot0 = -half
current_min = 1e20
best = None
for i1 in xrange(2):
tot1 = tot0 + 1.0 if i1 else tot0
for i2 in xrange(2):
tot2 = tot1 + 1.4142135623730951 if i2 else tot1
for i3 in xrange(2):
tot3 = tot2 + 1.7320508075688772 if i3 else tot2
for i4 in xrange(2):
tot4 = tot3 + 2.0 if i4 else tot3
for i5 in xrange(2):
tot5 = tot4 + 2.2360679774997898 if i5 else tot4
for i6 in xrange(2):
tot6 = tot5 + 2.4494897427831779 if i6 else tot5
if abs(tot6) < current_min:
current_min = abs(tot6)
best = [i1, i2, i3, i4, i5, i6]
sol = [s for s, ok in zip(sqrs, best) if ok]
print "err: %.16f" % abs(sum(sol) - sum(set(sqrs).difference(sol)))
print "n:", n, [int(round(x * x)) for x in sol]
n = 6
psyco.bind(main)
main(n)# two_towers2_py.py
import sys, psyco
n = int(sys.argv[1]) if len(sys.argv) == 2 else 20
code = []
for i in xrange(1, n+1):
indent = " " * 2 * i
code.append(indent + "for i%d in xrange(2):" % i)
code.append(indent + " tot%d = tot%d + %r if i%d else tot%d" % (i, i-1, i**0.5, i, i-1))
indent = " " * 2 * (n + 1)
code.append(indent + "if abs(tot%d) < current_min:" % n)
code.append(indent + " current_min = abs(tot%d)" % n)
code.append(indent + " best = [%s]" % ", ".join("i%d" % i for i in xrange(1, n+1)))
code = "\n".join(code)
exec """
def main(n):
sqrs = [n ** 0.5 for n in xrange(1, n+1)]
half = sum(sqrs) / 2
tot0 = -half
current_min = 1e20
best = None
XXX
sol = [s for s, ok in zip(sqrs, best) if ok]
print "err: %.16f" % abs(sum(sol) - sum(set(sqrs).difference(sol)))
print "n:", n, [int(round(x * x)) for x in sol]""".replace("XXX", code)
psyco.bind(main)
main(n)
// two_towers3_d.d
import std.math: sqrt, abs;
import std.conv: toInt;
import std.stdio: writefln;
import std.metastrings: Format, ToString;
const int n = 25; // this can be changed
template SeriesGen1(string txt, string separator, int max, int min=0) {
static if (min > max)
const SeriesGen1 = "";
else static if (min == max)
const SeriesGen1 = Format!(txt, ToString!(max));
else
const SeriesGen1 = SeriesGen1!(txt, separator, max-1, min) ~ separator ~
Format!(txt, ToString!(max));
}
template GenCore(int i, string arr) {
const string GenCore = Format!("
if (abs(tot%s) < current_min) {
current_min = abs(tot%s);
best = [%s];
}", i, i, arr);
}
template GenLoop(int i) {
static if (i <= 0)
const string GenLoop = "";
else
const string GenLoop = GenLoop!(i - 1) ~
Format!("for (int i%s; i%s < 2; i%s++) {\n" ~
" double tot%s = i%s ? tot%s + sqrs[%s] : tot%s;\n",
i, i, i, i, i, i-1, i-1, i-1);
}
template GenClosing(int i) {
static if (i <= 0)
const string GenClosing = "";
else
const string GenClosing = GenClosing!(i - 1) ~ "}";
}
template FullGenerator(int i) {
const string FullGenerator = GenLoop!(i) ~
GenCore!(i, SeriesGen1!("i%s", ", ", i, 1)) ~
"\n"
~ GenClosing!(i);
}
int[] finder(int n)() {
double sqrs[60];
double half = 0.0;
for (int i = 1; i <= n; i++) {
sqrs[i - 1] = sqrt(cast(double)i);
half += sqrs[i - 1];
}
half /= 2;
double tot0 = -half;
double current_min = 1e20;
auto best = new int[n];
mixin(FullGenerator!(n));
int[] result;
for (int i; i < n; i++)
if (best[i])
result ~= (i + 1);
return result;
}
void main() {
auto solution = finder!(n)();
double tot1 = 0.0;
double tot2 = 0.0;
for (int i = 1; i <= n; i++) {
bool found = false;
foreach (j; solution)
if (i == j)
found = true;
if (found)
tot1 += sqrt(cast(double)i);
else
tot2 += sqrt(cast(double)i);
}
double err = abs(tot1 - tot2);
writefln("err: %.16f", err);
writefln("n: ", n, " ", solution);
}// two_towers2_c.ct
#include "stdio.h"
#include "math.h"
#include "string.h"
{{
n = 41 # this can be changed
print "#define N %d" % n
}}
int main() {
#define M 60
double sqrs[M];
int i;
for (i = 0; i < M; i++)
sqrs[i] = 0.0;
double half = 0.0;
for (i = 1; i <= N; i++) {
sqrs[i - 1] = sqrt((double)i);
half += sqrs[i - 1];
}
half /= 2;
double tot0 = -half;
double current_min = 1e20;
int best[M];
for (i = 0; i < M; i++)
best[i] = 0;
{{
print " // generated code for n = %d ------------------" % n
print " #if N != %d" % n
print " #error The defined value of N is wrong for the generated code."
print " #endif"
for i in xrange(1, n+1):
indent = " " * 2 * i
print indent + "int i%d; for (i%d = 0; i%d < 2; i%d++) {" % (i, i, i, i)
print indent + " double tot%d = i%d ? tot%d + sqrs[%d] : tot%d;" % (i, i, i-1, i-1, i-1)
indent = " " * 2 * (n + 1)
print indent + "if (fabs(tot%d) < current_min) {" % n
print indent + " current_min = fabs(tot%d);" % n
for i in xrange(0, n):
print indent + " best[%d] = i%d;" % (i, i+1)
print indent + "}"
for i in xrange(n, 0, -1):
indent = " " * 2 * i
print indent + "}"
print " // end generated code ------------------"
}}
double tot1 = 0.0;
double tot2 = 0.0;
for (i = 0; i < N; i++)
if (best[i])
tot1 += sqrt((double)(i + 1));
else
tot2 += sqrt((double)(i + 1));
double err = fabs(tot1 - tot2);
printf("err: %.16f\n", err);
printf("N: %d [", N);
for (i = 0; i < N; i++)
if (best[i])
printf("%d ", i + 1);
printf("]\n");
return 0;
}
Using llvm-gcc the performance is much higher than D code compiled with DMD
(I have not used the LDC D compiler, probably it will lead to much better performance
than DMD):
subs4_c_llvm.exe
err: 0.0000000552507515
N: 31 [2 4 5 6 7 10 11 12 15 18 19 22 26 27 28 29 ]
9.21 seconds
two_towers2_c_llvm.exe
err: 0.0000000011096404
N: 36 [1 3 4 5 7 8 10 12 13 16 20 21 23 24 26 29 33 34 35 ]
5 minutes 49.13 seconds
two_towers2_c_llvm.exe
err: 0.0000000000449774
N: 38 [1 5 6 7 9 10 12 13 14 15 17 18 19 21 24 27 29 30 35 38 ]
20 minutes 12.83 seconds
two_towers2_c_llvm.exe
err: 0.0000000144697481
N: 39 [1 4 6 9 11 13 15 16 17 18 19 21 22 23 25 26 27 31 33 36 ]
6 minutes 34.71 seconds
two_towers2_c_llvm.exe
err: 0.0000000001917329
N: 40 [1 3 4 6 8 9 10 11 12 15 16 20 21 25 28 31 33 35 36 37 40 ]
1 hour 35 minutes 40 seconds
two_towers2_c_llvm.exe
err: 0.0000000001424638
N: 41 [3 7 9 10 12 15 16 17 18 19 22 23 25 26 28 30 31 36 37 41 ]
3 hours 11 minutes 8 seconds
Note that 2^41 = 2_199_023_255_552 :-)