/* File: sentence.c * Author: Peter Ross * Created: 29 Nov 1999 * * See "Tracking the Automatic Ant", David Gale, Springer 1998 * for an introduction to this problem by Lee Sallows. * * The aim of this program is to produce a truthful sentence * of the general form * This automatically-generated sentence contains [a] As, * [b] Bs, [c] Cs, ....., [y] Ys and [z] Zs. * where [a], [b] etc are integers spelt out in English, eg * "twenty-three", "one", etc. Zero is spelt "no". * * contrib[][] is a table showing the number of occurrences of * each letter of the alphabet in each number from "no" to * "ninety-nine". In every case except "one" there is an extra `s' * included. This is because if [a]="three", the contribution * it makes is 2 Es, 1 H, 1R, 1 T -- and 1 S, as in the fragment * "three As," * ^^^^^ ^ * * The solution process is very simple: * 1) choose random values for [a], [b], [c] etc * 2) if correct, stop, else... * 3) count up the number of each letter in the current sentence, * using the curent values of [a], [b], [c] etc, to get updated * values [na], [nb], [nc] etc * 4) choose a random number of variables whose value is wrong and * replace them by their updated values * 5) go to 2) * This should eventually converge on a correct sentence, if one exists! * It may take a few million iterations, however; can it be made faster? * The problem illustrates the notion of epistasis quite nicely. * * Question for analysis: why is step 4 not "Choose EVERY variable whose.."? * There is a very good reason -- figure it out and try to put it into words. * Note also that "Choose ONE variable ..." is very much slower and less * reliable; why? * * Here is an example of a successful result: * "This correct sentence was computer-generated and it contains * six As, one B, six Cs, four Ds, thirty-two Es, nine Fs, two Gs, * five Hs, fifteen Is, one J, one K, two Ls, two Ms, twenty-three Ns, * seventeen Os, two Ps, one Q, eleven Rs, thirty Ss, twenty-four Ts, * five Us, six Vs, nine Ws, four Xs, five Ys and one Z." * The output is not quite like that, but does contain the information * needed. * * DON'T try re-implementing this in Java, Prolog etc -- you REALLY need * speed! */ /* The following 2-d array was computer-generated: */ int contrib[100][26] = { /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */ {0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0}, /* no _s */ {0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0}, /* 1 _ */ {0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0}, /* 2 _s */ {0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0}, /* 3 _s */ {0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0}, /* 4 _s */ {0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0}, /* 5 _s */ {0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0}, /* 6 _s */ {0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,1,0,0,0,0}, /* 7 _s */ {0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0}, /* 8 _s */ {0,0,0,0,1,0,0,0,1,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0}, /* 9 _s */ /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */ {0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0}, /* 10 _s */ {0,0,0,0,3,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0}, /* 11 _s */ {0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,1,0,0,0}, /* 12 _s */ {0,0,0,0,2,0,0,1,0,0,0,0,0,1,0,0,0,1,1,2,0,0,0,0,0,0}, /* 13 _s */ {0,0,0,0,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,0}, /* 14 _s */ {0,0,0,0,2,2,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0}, /* 15 _s */ {0,0,0,0,2,0,0,0,1,0,0,0,0,1,0,0,0,0,2,1,0,0,0,1,0,0}, /* 16 _s */ {0,0,0,0,4,0,0,0,0,0,0,0,0,2,0,0,0,0,2,1,0,1,0,0,0,0}, /* 17 _s */ {0,0,0,0,3,0,1,1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0}, /* 18 _s */ {0,0,0,0,3,0,0,0,1,0,0,0,0,3,0,0,0,0,1,1,0,0,0,0,0,0}, /* 19 _s */ /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */ {0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,2,0,0,1,0,1,0}, /* 20 _s */ {0,0,0,0,2,0,0,0,0,0,0,0,0,2,1,0,0,0,0,2,0,0,1,0,1,0}, /* 21 _s */ {0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,3,0,0,2,0,1,0}, /* 22 _s */ {0,0,0,0,3,0,0,1,0,0,0,0,0,1,0,0,0,1,1,3,0,0,1,0,1,0}, /* 23 _s */ {0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,1,1,2,1,0,1,0,1,0}, /* 24 _s */ {0,0,0,0,2,1,0,0,1,0,0,0,0,1,0,0,0,0,1,2,0,1,1,0,1,0}, /* 25 _s */ {0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,2,2,0,0,1,1,1,0}, /* 26 _s */ {0,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,2,2,0,1,1,0,1,0}, /* 27 _s */ {0,0,0,0,2,0,1,1,1,0,0,0,0,1,0,0,0,0,1,3,0,0,1,0,1,0}, /* 28 _s */ {0,0,0,0,2,0,0,0,1,0,0,0,0,3,0,0,0,0,1,2,0,0,1,0,1,0}, /* 29 _s */ /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */ {0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,2,0,0,0,0,1,0}, /* 30 _s */ {0,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,2,0,0,0,0,1,0}, /* 31 _s */ {0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,3,0,0,1,0,1,0}, /* 32 _s */ {0,0,0,0,2,0,0,2,1,0,0,0,0,0,0,0,0,2,1,3,0,0,0,0,1,0}, /* 33 _s */ {0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,2,1,2,1,0,0,0,1,0}, /* 34 _s */ {0,0,0,0,1,1,0,1,2,0,0,0,0,0,0,0,0,1,1,2,0,1,0,0,1,0}, /* 35 _s */ {0,0,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,1,2,2,0,0,0,1,1,0}, /* 36 _s */ {0,0,0,0,2,0,0,1,1,0,0,0,0,1,0,0,0,1,2,2,0,1,0,0,1,0}, /* 37 _s */ {0,0,0,0,1,0,1,2,2,0,0,0,0,0,0,0,0,1,1,3,0,0,0,0,1,0}, /* 38 _s */ {0,0,0,0,1,0,0,1,2,0,0,0,0,2,0,0,0,1,1,2,0,0,0,0,1,0}, /* 39 _s */ /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */ {0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0}, /* 40 _s */ {0,0,0,0,1,1,0,0,0,0,0,0,0,1,2,0,0,1,0,1,0,0,0,0,1,0}, /* 41 _s */ {0,0,0,0,0,1,0,0,0,0,0,0,0,0,2,0,0,1,1,2,0,0,1,0,1,0}, /* 42 _s */ {0,0,0,0,2,1,0,1,0,0,0,0,0,0,1,0,0,2,1,2,0,0,0,0,1,0}, /* 43 _s */ {0,0,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0,2,1,1,1,0,0,0,1,0}, /* 44 _s */ {0,0,0,0,1,2,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0}, /* 45 _s */ {0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,1,2,1,0,0,0,1,1,0}, /* 46 _s */ {0,0,0,0,2,1,0,0,0,0,0,0,0,1,1,0,0,1,2,1,0,1,0,0,1,0}, /* 47 _s */ {0,0,0,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0}, /* 48 _s */ {0,0,0,0,1,1,0,0,1,0,0,0,0,2,1,0,0,1,1,1,0,0,0,0,1,0}, /* 49 _s */ /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */ {0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0}, /* 50 _s */ {0,0,0,0,1,2,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0}, /* 51 _s */ {0,0,0,0,0,2,0,0,1,0,0,0,0,0,1,0,0,0,1,2,0,0,1,0,1,0}, /* 52 _s */ {0,0,0,0,2,2,0,1,1,0,0,0,0,0,0,0,0,1,1,2,0,0,0,0,1,0}, /* 53 _s */ {0,0,0,0,0,3,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,1,0}, /* 54 _s */ {0,0,0,0,1,3,0,0,2,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0}, /* 55 _s */ {0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,2,1,0,0,0,1,1,0}, /* 56 _s */ {0,0,0,0,2,2,0,0,1,0,0,0,0,1,0,0,0,0,2,1,0,1,0,0,1,0}, /* 57 _s */ {0,0,0,0,1,2,1,1,2,0,0,0,0,0,0,0,0,0,1,2,0,0,0,0,1,0}, /* 58 _s */ {0,0,0,0,1,2,0,0,2,0,0,0,0,2,0,0,0,0,1,1,0,0,0,0,1,0}, /* 59 _s */ /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */ {0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,0}, /* 60 _s */ {0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,0}, /* 61 _s */ {0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,2,0,0,1,1,1,0}, /* 62 _s */ {0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,0,1,1,2,0,0,0,1,1,0}, /* 63 _s */ {0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1,1,0}, /* 64 _s */ {0,0,0,0,1,1,0,0,2,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,1,0}, /* 65 _s */ {0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,1,0,0,0,2,1,0}, /* 66 _s */ {0,0,0,0,2,0,0,0,1,0,0,0,0,1,0,0,0,0,2,1,0,1,0,1,1,0}, /* 67 _s */ {0,0,0,0,1,0,1,1,2,0,0,0,0,0,0,0,0,0,1,2,0,0,0,1,1,0}, /* 68 _s */ {0,0,0,0,1,0,0,0,2,0,0,0,0,2,0,0,0,0,1,1,0,0,0,1,1,0}, /* 69 _s */ /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */ {0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,2,1,0,1,0,0,1,0}, /* 70 _s */ {0,0,0,0,3,0,0,0,0,0,0,0,0,2,1,0,0,0,1,1,0,1,0,0,1,0}, /* 71 _s */ {0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,2,2,0,1,1,0,1,0}, /* 72 _s */ {0,0,0,0,4,0,0,1,0,0,0,0,0,1,0,0,0,1,2,2,0,1,0,0,1,0}, /* 73 _s */ {0,0,0,0,2,1,0,0,0,0,0,0,0,1,1,0,0,1,2,1,1,1,0,0,1,0}, /* 74 _s */ {0,0,0,0,3,1,0,0,1,0,0,0,0,1,0,0,0,0,2,1,0,2,0,0,1,0}, /* 75 _s */ {0,0,0,0,2,0,0,0,1,0,0,0,0,1,0,0,0,0,3,1,0,1,0,1,1,0}, /* 76 _s */ {0,0,0,0,4,0,0,0,0,0,0,0,0,2,0,0,0,0,3,1,0,2,0,0,1,0}, /* 77 _s */ {0,0,0,0,3,0,1,1,1,0,0,0,0,1,0,0,0,0,2,2,0,1,0,0,1,0}, /* 78 _s */ {0,0,0,0,3,0,0,0,1,0,0,0,0,3,0,0,0,0,2,1,0,1,0,0,1,0}, /* 79 _s */ /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */ {0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0}, /* 80 _s */ {0,0,0,0,2,0,1,1,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0}, /* 81 _s */ {0,0,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0,0,1,2,0,0,1,0,1,0}, /* 82 _s */ {0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,0,1,1,2,0,0,0,0,1,0}, /* 83 _s */ {0,0,0,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,1,0}, /* 84 _s */ {0,0,0,0,2,1,1,1,2,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0}, /* 85 _s */ {0,0,0,0,1,0,1,1,2,0,0,0,0,0,0,0,0,0,2,1,0,0,0,1,1,0}, /* 86 _s */ {0,0,0,0,3,0,1,1,1,0,0,0,0,1,0,0,0,0,2,1,0,1,0,0,1,0}, /* 87 _s */ {0,0,0,0,2,0,2,2,2,0,0,0,0,0,0,0,0,0,1,2,0,0,0,0,1,0}, /* 88 _s */ {0,0,0,0,2,0,1,1,2,0,0,0,0,2,0,0,0,0,1,1,0,0,0,0,1,0}, /* 89 _s */ /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */ {0,0,0,0,1,0,0,0,1,0,0,0,0,2,0,0,0,0,1,1,0,0,0,0,1,0}, /* 90 _s */ {0,0,0,0,2,0,0,0,1,0,0,0,0,3,1,0,0,0,0,1,0,0,0,0,1,0}, /* 91 _s */ {0,0,0,0,1,0,0,0,1,0,0,0,0,2,1,0,0,0,1,2,0,0,1,0,1,0}, /* 92 _s */ {0,0,0,0,3,0,0,1,1,0,0,0,0,2,0,0,0,1,1,2,0,0,0,0,1,0}, /* 93 _s */ {0,0,0,0,1,1,0,0,1,0,0,0,0,2,1,0,0,1,1,1,1,0,0,0,1,0}, /* 94 _s */ {0,0,0,0,2,1,0,0,2,0,0,0,0,2,0,0,0,0,1,1,0,1,0,0,1,0}, /* 95 _s */ {0,0,0,0,1,0,0,0,2,0,0,0,0,2,0,0,0,0,2,1,0,0,0,1,1,0}, /* 96 _s */ {0,0,0,0,3,0,0,0,1,0,0,0,0,3,0,0,0,0,2,1,0,1,0,0,1,0}, /* 97 _s */ {0,0,0,0,2,0,1,1,2,0,0,0,0,2,0,0,0,0,1,2,0,0,0,0,1,0}, /* 98 _s */ {0,0,0,0,2,0,0,0,2,0,0,0,0,4,0,0,0,0,1,1,0,0,0,0,1,0} /* 99 _s */ /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */ }; char *basicNames[20] = {"no", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen" }; char *tens[8] = {"twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety" }; #include #include /* for random/srandom */ #include /* for time(), for random seeding */ #include /* for getpid(), for random seeding */ void usage() { printf("sentence [-p preamble] [-r report] [-i maxiter] [-s seed]\n"); printf(" where the preamble, if omitted, is:\n"); printf(" 'This computer-generated sentence contains'\n"); printf(" (nb: a poor choice) and the sentence continues with\n"); printf(" [a] As, [b] Bs, ... [y] Ys and [z] Zs\n"); printf(" Of course, if (say) [b]=1 then an `s' is omitted.\n"); printf(" Dashes, spaces and commas are not included in counts.\n"); printf(" Maxiter is the maximum number of iterations, and\n"); printf(" defaults to 100000 (which is a bit low; try 1000000)\n"); printf(" Report is the reporting interval, default 1000\n"); printf(" You can seed the random number generator with -s;\n"); printf(" the default uses the time and the process ID.\n"); } #define DEFAULT_PREAMBLE "This computer-generated sentence contains" void utter(int n, int m) { if(m < 1 || m > 99) { printf("OUCH: crazy number to utter: %d at %d\n",m,n); exit(1); } if(m<20) { printf("%s",basicNames[m]); } else { printf("%s",tens[(m-20)/10]); if((m%10) != 0) { printf("-%s",basicNames[m%10]); } } printf(" %c",n+'A'); if(m !=1) printf("s"); } int main(int argc, char **argv) { int additional[26] = {2,1,1,2,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1}; /* "ABCD.. and Z" */ /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */ int current[26], new[26]; int count, report = 1000, maxcount = 100000, i, j, k, c, differs; long int seed = 0; char *preamble = DEFAULT_PREAMBLE; while(argc > 1) { argc--; argv++; if(argv[0][0]=='-') { switch(argv[0][1]) { case 'p': if(argc==1) { printf("-p needs an argument string\n"); usage(); exit(1); } else { argc--; argv++; preamble = argv[0]; } break; case 'i': if(argc==1) { printf("-i needs an argument number\n"); usage(); exit(1); } else { argc--; argv++; maxcount = atoi(argv[0]); } break; case 'r': if(argc==1) { printf("-r needs an argument number\n"); usage(); exit(1); } else { argc--; argv++; report = atoi(argv[0]); } break; case 's': if(argc==1) { printf("-s needs an argument number\n"); usage(); exit(1); } else { argc--; argv++; seed = atol(argv[0]); } break; default: usage(); exit(1); } } } /* Prepare the `additional' array, which is the counts of * the letters in the preamble, added to the fixed ingredients * of one each of the letters A-Z and the final connective "AND" */ k = strlen(preamble); for(i=0; i= 'A' && c <= 'Z') additional[c-'A']++; else if(c >= 'a' && c <= 'z') additional[c-'a']++; } /* Seed the RNG. Even a brain-damaged RNG should be OK! */ if(seed==0) seed = time(NULL) + getpid(); srandom(seed); /* Initialise current values randomly */ for(i=0;i<26;i++) { current[i] = random() % 100; /* so in range 0..99 */ } differs = 1; count = 0; while(differs) { /* Abort if we reach maximum iterations */ if(count == maxcount) { printf("Stopped after %d iterations\n", maxcount); exit(1); } else count++; /* Set up initial bit of new totals */ for(i=0;i<26;i++) { new[i] = additional[i]; } /* Work out the new totals */ for(i=0; i<26; i++) { j = current[i]; for(k=0;k<26;k++) { new[k] = new[k] + contrib[j][k]; } } /* Give some output every `report' iterations to reassure user */ if((count % report) == 0) { for(i=0;i<26;i++) { printf("%3d",current[i]); } printf("\n"); } /* Count up number of wrong totals */ differs = 0; for(i=0; i<26; i++) { if(new[i] != current[i]) { differs++; } } /* If any, adjust a random one to have its new value */ if(differs) { k = random() % differs; /* a random one to change */ for(i=0; i<26; i++) { if(new[i] != current[i]) { if(k == 0) { current[i] = new[i]; /* break; */ } else k--; } } } } /* If we reach here, we won; print iter, preamble and winning counts */ printf("Seed %ld iteration %d :\n\n%s\n ",seed,count,preamble); for(i=0;i<24;i++) { utter(i,current[i]); printf(",\n "); } utter(24,current[24]); printf("\n and "); utter(25,current[25]); printf(".\n"); }