/* hash_switch.d Perfect hashing used to implement string switches in D. By leonardo maffi, version 1.0, Jan 22 2010 --------------- Benchmark constants: NDATA=10_000, NLOOPS=4_000, random seed=10 Compiled with: LDC based on DMD v1.051 and llvm 2.6 (Fri Nov 27 12:54:12 2009) DMD Digital Mars D Compiler v1.042 ldc -O3 -release -inline dmd -O -release -inline Timings, ldc, seconds: test1: 4.48 // normal string switch test2: 2.98 // perfect hash test3: 2.09 test4: 2.07 test5: 5.44 // AA. Tango AA opIn_r is bug-slow Timings, dmd, seconds: test1: 5.55 test2: 5.35 test3: 4.73 test4: 4.79 test5: 3.61 --------------- Original idea from: http://www.reddit.com/r/programming/comments/asqml/ Perfect hash computed with: http://www.ibiblio.org/pub/Linux/devel/lang/c/mph-1.2.tar.gz Another possible solution: a lexicographic comparison tree, it can be implemented as a state machine too, with gotos (where the program counter encodes the state). The number of missed branches can be kept down and adapted to the probability of different keys. */ version (Tango) { import tango.stdc.stdio: printf; import tango.stdc.stdlib: rand, srand; } else { import std.c.stdio: printf; import std.c.stdlib: rand, srand; } int test1(char[][] strings) { int result; foreach (txt; strings) { switch (txt) { case "__alignof": result += 1; break; // do something case "__alignof__": result += 2; break; case "__asm": result += 3; break; case "__asm__": result += 4; break; case "__attribute": result += 5; break; case "__attribute__": result += 6; break; case "__const": result += 7; break; case "__const__": result += 8; break; case "__inline": result += 9; break; case "__inline__": result += 10; break; case "__signed": result += 11; break; case "__signed__": result += 12; break; case "__typeof": result += 13; break; case "__typeof__": result += 14; break; case "__volatile": result += 15; break; case "__volatile__": result += 16; break; case "all": result += 17; break; case "asm": result += 18; break; case "auto": result += 19; break; case "break": result += 20; break; case "case": result += 21; break; case "catch": result += 22; break; case "char": result += 23; break; case "class": result += 24; break; case "const": result += 25; break; case "continue": result += 26; break; case "default": result += 27; break; case "delete": result += 28; break; case "do": result += 29; break; case "double": result += 30; break; case "dynamic": result += 31; break; case "else": result += 32; break; case "enum": result += 33; break; case "except": result += 34; break; case "exception": result += 35; break; case "extern": result += 36; break; case "float": result += 37; break; case "for": result += 38; break; case "friend": result += 39; break; case "goto": result += 40; break; case "if": result += 41; break; case "inline": result += 42; break; case "int": result += 43; break; case "long": result += 44; break; case "new": result += 45; break; case "operator": result += 46; break; case "overload": result += 47; break; case "private": result += 48; break; case "protected": result += 49; break; case "public": result += 50; break; case "raise": result += 51; break; case "raises": result += 52; break; case "register": result += 53; break; case "reraise": result += 54; break; case "return": result += 55; break; case "short": result += 56; break; case "signed": result += 57; break; case "sizeof": result += 58; break; case "static": result += 59; break; case "struct": result += 60; break; case "switch": result += 61; break; case "this": result += 62; break; case "try": result += 63; break; case "typedef": result += 64; break; case "typeof": result += 65; break; case "union": result += 66; break; case "unsigned": result += 67; break; case "virtual": result += 68; break; case "void": result += 69; break; case "volatile": result += 70; break; case "while": result += 71; break; default: result += 72; break; } } return result; } int perfectHash(char[] key) { static const int[] g = [56, 11, 41, 15, 43, 41, 37, 46, -1, 27, 41, 39, 28, 26, 70, 59, 32, 4, 45, 59, 21, 58, 12, 30, 47, -1, 63, 70, 33, 66, 31, 0, 8, -1, 49, 47, 62, 39, 34, 24, -1, 21, 64, 40, 37, 2, 34, 12, 1, 44, 9, 60, 8, 0, 36, 52, 37, 25, 23, 37, 52, 65, 28, 57, 32, 65, 38, 33, 50, 45, 66, 66, -1, 31, 57, 61, 12, 0, 45, 0]; static const int[] T0 = [38, 67, 16, 59, 2, 14, 3, 16, 32, 46, 76, 33, 30, 65, 29, 4, 1, 35, 39, 18, 28, 67, 9, 38, 67, 7, 70, 26]; static const int[] T1 = [74, 33, 4, 1, 46, 32, 15, 32, 71, 58, 70, 19, 28, 15, 31, 13, 11, 6, 79, 32, 39, 47, 44, 6, 73, 16, 49, 29]; uint f0, f1; if (key.length < 2 || key.length > 13) return -1; foreach (c; key) { if (c < 95 || c > 122) return -1; f0 += T0[-95 + c]; f1 += T1[-95 + c]; } f0 %= 80; f1 %= 80; return (g[f0] + g[f1]) % 71; } int test2(char[][] strings) { int result; foreach (txt; strings) { switch (perfectHash(txt)) { case perfectHash("__alignof"): // compile-time run! if (txt == "__alignof") result += 1; // do something else goto default; // hash collision with wrong string break; case perfectHash("__alignof__"): if (txt == "__alignof__") result += 2; else goto default; break; case perfectHash("__asm"): if (txt == "__asm") result += 3; else goto default; break; case perfectHash("__asm__"): if (txt == "__asm__") result += 4; else goto default; break; case perfectHash("__attribute"): if (txt == "__attribute") result += 5; else goto default; break; case perfectHash("__attribute__"): if (txt == "__attribute__") result += 6; else goto default; break; case perfectHash("__const"): if (txt == "__const") result += 7; else goto default; break; case perfectHash("__const__"): if (txt == "__const__") result += 8; else goto default; break; case perfectHash("__inline"): if (txt == "__inline") result += 9; else goto default; break; case perfectHash("__inline__"): if (txt == "__inline__") result += 10; else goto default; break; case perfectHash("__signed"): if (txt == "__signed") result += 11; else goto default; break; case perfectHash("__signed__"): if (txt == "__signed__") result += 12; else goto default; break; case perfectHash("__typeof"): if (txt == "__typeof") result += 13; else goto default; break; case perfectHash("__typeof__"): if (txt == "__typeof__") result += 14; else goto default; break; case perfectHash("__volatile"): if (txt == "__volatile") result += 15; else goto default; break; case perfectHash("__volatile__"): if (txt == "__volatile__") result += 16; else goto default; break; case perfectHash("all"): if (txt == "all") result += 17; else goto default; break; case perfectHash("asm"): if (txt == "asm") result += 18; else goto default; break; case perfectHash("auto"): if (txt == "auto") result += 19; else goto default; break; case perfectHash("break"): if (txt == "break") result += 20; else goto default; break; case perfectHash("case"): if (txt == "case") result += 21; else goto default; break; case perfectHash("catch"): if (txt == "catch") result += 22; else goto default; break; case perfectHash("char"): if (txt == "char") result += 23; else goto default; break; case perfectHash("class"): if (txt == "class") result += 24; else goto default; break; case perfectHash("const"): if (txt == "const") result += 25; else goto default; break; case perfectHash("continue"): if (txt == "continue") result += 26; else goto default; break; case perfectHash("default"): if (txt == "default") result += 27; else goto default; break; case perfectHash("delete"): if (txt == "delete") result += 28; else goto default; break; case perfectHash("do"): if (txt == "do") result += 29; else goto default; break; case perfectHash("double"): if (txt == "double") result += 30; else goto default; break; case perfectHash("dynamic"): if (txt == "dynamic") result += 31; else goto default; break; case perfectHash("else"): if (txt == "else") result += 32; else goto default; break; case perfectHash("enum"): if (txt == "enum") result += 33; else goto default; break; case perfectHash("except"): if (txt == "except") result += 34; else goto default; break; case perfectHash("exception"): if (txt == "exception") result += 35; else goto default; break; case perfectHash("extern"): if (txt == "extern") result += 36; else goto default; break; case perfectHash("float"): if (txt == "float") result += 37; else goto default; break; case perfectHash("for"): if (txt == "for") result += 38; else goto default; break; case perfectHash("friend"): if (txt == "friend") result += 39; else goto default; break; case perfectHash("goto"): if (txt == "goto") result += 40; else goto default; break; case perfectHash("if"): if (txt == "if") result += 41; else goto default; break; case perfectHash("inline"): if (txt == "inline") result += 42; else goto default; break; case perfectHash("int"): if (txt == "int") result += 43; else goto default; break; case perfectHash("long"): if (txt == "long") result += 44; else goto default; break; case perfectHash("new"): if (txt == "new") result += 45; else goto default; break; case perfectHash("operator"): if (txt == "operator") result += 46; else goto default; break; case perfectHash("overload"): if (txt == "overload") result += 47; else goto default; break; case perfectHash("private"): if (txt == "private") result += 48; else goto default; break; case perfectHash("protected"): if (txt == "protected") result += 49; else goto default; break; case perfectHash("public"): if (txt == "public") result += 50; else goto default; break; case perfectHash("raise"): if (txt == "raise") result += 51; else goto default; break; case perfectHash("raises"): if (txt == "raises") result += 52; else goto default; break; case perfectHash("register"): if (txt == "register") result += 53; else goto default; break; case perfectHash("reraise"): if (txt == "reraise") result += 54; else goto default; break; case perfectHash("return"): if (txt == "return") result += 55; else goto default; break; case perfectHash("short"): if (txt == "short") result += 56; else goto default; break; case perfectHash("signed"): if (txt == "signed") result += 57; else goto default; break; case perfectHash("sizeof"): if (txt == "sizeof") result += 58; else goto default; break; case perfectHash("static"): if (txt == "static") result += 59; else goto default; break; case perfectHash("struct"): if (txt == "struct") result += 60; else goto default; break; case perfectHash("switch"): if (txt == "switch") result += 61; else goto default; break; case perfectHash("this"): if (txt == "this") result += 62; else goto default; break; case perfectHash("try"): if (txt == "try") result += 63; else goto default; break; case perfectHash("typedef"): if (txt == "typedef") result += 64; else goto default; break; case perfectHash("typeof"): if (txt == "typeof") result += 65; else goto default; break; case perfectHash("union"): if (txt == "union") result += 66; else goto default; break; case perfectHash("unsigned"): if (txt == "unsigned") result += 67; else goto default; break; case perfectHash("virtual"): if (txt == "virtual") result += 68; else goto default; break; case perfectHash("void"): if (txt == "void") result += 69; else goto default; break; case perfectHash("volatile"): if (txt == "volatile") result += 70; else goto default; break; case perfectHash("while"): if (txt == "while") result += 71; else goto default; break; default: result += 72; } } return result; } const char[][] keywords = [ // right keywords "__alignof", "__alignof__", "__asm", "__asm__", "__attribute", "__attribute__", "__const", "__const__", "__inline", "__inline__", "__signed", "__signed__", "__typeof", "__typeof__", "__volatile", "__volatile__", "all", "asm", "auto", "break", "case", "catch", "char", "class", "const", "continue", "default", "delete", "do", "double", "dynamic", "else", "enum", "except", "exception", "extern", "float", "for", "friend", "goto", "if", "inline", "int", "long", "new", "operator", "overload", "private", "protected", "public", "raise", "raises", "register", "reraise", "return", "short", "signed", "sizeof", "static", "struct", "switch", "this", "try", "typedef", "typeof", "union", "unsigned", "virtual", "void", "volatile", "while" ]; int test3(char[][] strings) { static const int[] map = [72, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71]; int result; foreach (txt; strings) { int h = perfectHash(txt); if (h != -1 && txt == keywords[h]) result += map[h + 1]; else result += map[0]; // default } return result; } int test4(char[][] strings) { int result; foreach (txt; strings) { int h = perfectHash(txt); if (h != -1 && txt == keywords[h]) result += h + 1; else result += 72; // default } return result; } int[char[]] keymap; static this() { foreach (i, key; keywords) keymap[key] = i + 1; keymap.rehash; } int test5(char[][] strings) { int result; foreach (txt; strings) { int* p = txt in keymap; if (p != null) result += *p; else result += 72; // default } return result; } void main() { const int NDATA = 10_000; const int NLOOPS = 4_000; const int USE_ALGO = 1; srand(10); const char[][] words = [ // right keywords "__alignof", "__alignof__", "__asm", "__asm__", "__attribute", "__attribute__", "__const", "__const__", "__inline", "__inline__", "__signed", "__signed__", "__typeof", "__typeof__", "__volatile", "__volatile__", "all", "asm", "auto", "break", "case", "catch", "char", "class", "const", "continue", "default", "delete", "do", "double", "dynamic", "else", "enum", "except", "exception", "extern", "float", "for", "friend", "goto", "if", "inline", "int", "long", "new", "operator", "overload", "private", "protected", "public", "raise", "raises", "register", "reraise", "return", "short", "signed", "sizeof", "static", "struct", "switch", "this", "try", "typedef", "typeof", "union", "unsigned", "virtual", "void", "volatile", "while", // wrong keywords "the", "of", "and", "a", "to", "in", "is", "you", "that", "it", "he", "was", "for", "on", "are", "as", "with", "his", "they", "I ", "at", "be", "this", "have", "from", "or", "one", "had", "by", ]; // std.string.split() not used here to use the C std lib only // generate test data char[][] testData = new char[][](NDATA); foreach (ref word; testData) word = words[rand() % words.length]; int tot; for (int i; i < NLOOPS; i++) { static if (USE_ALGO == 1) tot += test1(testData); static if (USE_ALGO == 2) tot += test2(testData); static if (USE_ALGO == 3) tot += test3(testData); static if (USE_ALGO == 4) tot += test4(testData); static if (USE_ALGO == 5) tot += test5(testData); } printf("%d\n", tot); }