/* a least-squares fitter to a straight line. Input two cols of numbers, x and y; it outputs a and b, such that y = a + bx */ #include #include #define MAX 300000 #define TINY 0.000001 #define LINELEN 200 #define COMMENT_CHAR '#' double x[MAX], y[MAX]; double get_t( /* int n */ ); main(argc, argv) int argc; char *argv[]; { int i, num; double sumx, sumy, sumxy, sumxx, sumyy, meanx, meany, b, a; double bconf, syx, r; char line[LINELEN+1]; num = 0; while (fgets(line, LINELEN, stdin) != NULL) { if (line[0] == COMMENT_CHAR) { continue; } if (sscanf(line, "%lf %lf", &(x[num]), &(y[num])) != 2) { fprintf(stderr, "error in line ..%s..\n", line); exit(1); } num++; if (num == MAX) { fprintf(stderr, "stopping after %d lines \n", MAX); break; } } if (num == 0) { fprintf(stderr, "no input - no output\n"); exit(1); } sumx = 0; sumy = 0; sumxy = 0; sumxx = 0; sumyy = 0; for (i = 0; i < num; i++) { sumx += x[i]; sumy += y[i]; sumxy += x[i]*y[i]; sumxx += x[i]*x[i]; sumyy += y[i]*y[i]; } meanx = sumx/num; meany = sumy/num; if ((num*sumxx - sumx*sumx) < TINY) { fprintf(stderr, "indeterminate - zero denominator\n"); exit(1); } b = ((num*sumxy) - (sumx*sumy))/((num*sumxx - sumx*sumx)); a = meany - b*meanx; printf("%10.6e %10.6e\n", a, b); /* go on to calculate some other things */ /* standard error of the estimate */ if (num > 2) { syx = sqrt((sumyy - (a*sumy + b*sumxy))/(num - 2.0)); } else { syx = 0.0; } /* 95% confidence interval in the slope 'b' */ if ((num > 2) && ((sumxx - ((sumx*sumx)/(double) num)) > TINY)) { bconf = get_t(num - 2)*syx/sqrt((sumxx - ((sumx*sumx)/(double) num))); } else { bconf = 0.0; } /* correlation coefficient r */ if (((sumxx - ((sumx*sumx)/(double) num)) < TINY) || ((sumyy - ((sumy*sumy)/(double) num)) < TINY)) { r = -99.0; } else { r = (sumxy - ((sumx*sumy)/(double) num)); r /= sqrt((sumxx - ((sumx*sumx)/(double) num))*((sumyy - ((sumy*sumy)/(double) num)))); } printf("syx = %lf bconf = %9.4e r = %lf\n", syx, bconf, r); exit (0); } /* return the student t statistic with upper-tail area 0.025 (that will give us a test for 95% confidence) for the given number of degrees of freedom */ double tt[30] = { 12.706, 4.303, 3.182, 2.776, 2.571, 2.447, 2.365, 2.306, 2.262, 2.228, 2.201, 2.179, 2.160, 2.145, 2.131, 2.120, 2.110, 2.101, 2.093, 2.086, 2.080, 2.074, 2.069, 2.064, 2.060, 2.056, 2.052, 2.048, 2.045, 2.042 }; double tt40 = 2.021; double tt60 = 2.000; double tt120 = 1.980; double ttinf = 1.960; double get_t(n) int n; { if (n <= 30) return(tt[n - 1]); else if (n < 40) { return(tt[29] + 0.1*(tt40 - tt[29])*(n - 30.0)); } else if (n == 40) { return(tt40); } else if (n < 60) { return(tt40 + (((tt60 - tt40)*(n - 40.0))/20.0)); } else if (n == 60) { return(tt60); } else if (n < 120) { return(tt60 + (((tt120 - tt60)*(n - 60))/60.0)); } else if (n == 120) { return(tt120); } else { return(ttinf); } }