![]() |
|
|
|
#1
|
|||
|
|||
|
KS-Test - what values should go into kstwo() ?
Hi,
first of all: thanks for this great book! I am wondering what values I should give to kstwo(). I made 1000 Experiments and built a histogram. I think the data is gumbel-distributed. I calculated all parameters (mue and beta). Now I can compute all 'theoretical' data - so I got two vectors. I want to know if both vectors have the same distribution. Currently I summed up all bins and gave this data to kstwo(). The results are very suprising: if I use just even numbers, I get p-values around 0.9. If I use unrounded numbers I get p-values around 0.0 - how can this happen? Do I give the right vectors to kstwo? Here is my source (Test1 and Test2 gave me high p-values, Test3 gave me low p-vals): void testkstwo(){ // Test 1 double nrDis = 0.0; double nrProb = 0.0; int elementsToCompare = 100; // nutzlos, da bei Direktinitialisierung die Gr??e zur Kompilierzeit da sein muss VecDoub vOne(100); VecDoub vTwo(100); double v1[100] = { 0, 0, 0, 0, 0, 24, 98, 196, 329, 476, 610, 730, 819, 888, 934, 965, 981, 988, 995, 998, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000 }; double v2[100] = { 0, 0, 0, 0, 0, 4, 31, 111, 250, 418, 577, 707, 804, 872, 917, 947, 966, 979, 986, 991, 995, 997, 998, 999, 999, 999, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000 }; for ( int i = 0; i < elementsToCompare; i++ ) { vOne[i] = v1[i]; vTwo[i] = v2[i]; cout << " -> Lauf - " << i << "; Vergleich an vOne - " << vOne[i] << "; vTwo " << vTwo[i] << "; " << endl; } kstwo( vOne, vTwo, nrDis, nrProb ); cout << "KS-Test: Dist: " << nrDis << "; P: " << nrProb << endl; // Test 2 double nrDisT2 = 0.0; double nrProbT2 = 0.0; int elementsToCompareT2 = 100; // nutzlos, da bei Direktinitialisierung die Gr??e zur Kompilierzeit da sein muss VecDoub vOneT2(100); VecDoub vTwoT2(100); double v1T2[100] = { 0, 0, 0, 0, 0, 24, 98, 196, 329, 476, 610, 730, 819, 888, 934, 965, 981, 988, 995, 998, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000 }; double v2T2[100] = { 0, 0, 0, 0, 0, 4, 31, 111, 250, 418, 577, 707, 804, 872, 917, 947.56, 966, 979.88, 986, 991, 995, 997, 998, 999, 999, 999, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000 }; for ( int i = 0; i < elementsToCompare; i++ ) { vOneT2[i] = v1T2[i]; vTwoT2[i] = v2T2[i]; cout << " -> Lauf - " << i << "; Vergleich an vOneT2 - " << vOneT2[i] << "; vTwoT2 " << vTwoT2[i] << "; " << endl; } kstwo( vOneT2, vTwoT2, nrDisT2, nrProbT2 ); cout << "KS-Test: DistT2: " << nrDisT2 << "; PT2: " << nrProbT2 << endl; // Test 3 double nrDisT3 = 0.0; double nrProbT3 = 0.0; int elementsToCompareT3 = 100; VecDoub vOneT3(100); VecDoub vTwoT3(100); double v1T3[100] = { 0, 0, 0, 0, 0, 24, 98, 196, 329, 476, 610, 730, 819, 888, 934, 965, 981, 988, 995, 998, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000 }; double v2T3[100] = { 6.24211e-22, 5.61377e-13, 2.45976e-07, 0.000882158, 0.153194, 3.94838, 30.589, 111.12, 250.483, 418.009, 577.19, 707.315, 803.979, 871.56, 917.028, 946.887, 966.198, 978.567, 986.442, 991.436, 994.595, 996.591, 997.851, 998.645, 999.146, 999.462, 999.661, 999.786, 999.865, 999.915, 999.947, 999.966, 999.979, 999.987, 999.992, 999.995, 999.997, 999.998, 999.999, 999.999, 999.999, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000 }; double flooredV2T3[100]; for ( int i = 0; i < elementsToCompare; i++ ) { flooredV2T3[i] = floor( v2T3[i] ); cout << "Rundungsalarm - Lauf " << i << " : " << v2T3[i] << " zu " << flooredV2T3[i] << endl; } for ( int i = 0; i < elementsToCompare; i++ ) { vOneT3[i] = v1T3[i]; vTwoT3[i] = flooredV2T3[i]; cout << " -> Lauf - " << i << "; Vergleich an vOneT3 - " << vOneT3[i] << "; vTwoT3 " << vTwoT3[i] << "; " << endl; } kstwo( vOneT3, vTwoT3, nrDisT3, nrProbT3 ); cout << "KS-Test: DistT3: " << nrDisT3 << "; PT3: " << nrProbT3 << endl; } int main(int argc, char** argv) { testkstwo(); return 0; } |
|
#2
|
|||
|
|||
|
What exactly are the vectors you're passing to kstwo()? It looks as if they may be cumulative counts (for the first one) and expected cumulative counts (for the second). That's not what kstwo wants. It wants the actual original data points. It computes the (empirical) cdfs itself.
|
|
#3
|
|||
|
|||
|
Hi,
yes I pass the cumulative function for my experiment data and the cumulative data for my theoretical distribution. You tell me that's wrong - okay! What data should I pass then? The original data points? So you mean I should pass the heights of the histogram bins as first vector? What should I pass for the second vector - the theoretical y value to the bins-mid density function? I am quite surprised because if I take a look at the kstwo-function I can see that one of the first steps is sorting both of the data containers and if I am right, then the binheight-bin-relation will get lost ?! Edit: okay, if you mean with data points, each value between 0 and 1 of 1000 experiment-events, then the sorting happens on those list and cdf could be calculated. What about the second vector - should I roll my theoretical-distribution-dice tons of time? Thanks for some more clarity - great that you made me thinking! Last edited by Jochen1980; 07-28-2011 at 01:24 PM. Reason: Added some information |
|
#4
|
|||
|
|||
|
Okay, next try:
I send these two data point vectors to kstwo() in R I get a P-Value of around 1 in kstwo around 0 - maybe someone can send those vectors to his kstwo()-method or statistical analysis software to tell me what result is wrong. If I plot the curves I guess 1 is correct. Thanks in advance - here is the code: void testkstwo(){ // Test 1 double nrDis = 0.0; double nrProb = 0.0; int elementsToCompare = 100; // nutzlos, da bei Direktinitialisierung die Gr??e zur Kompilierzeit da sein muss VecDoub vOne(100); VecDoub vTwo(100); double v1[100] = { 0, 0, 0, 0, 0, 24, 74, 98, 133, 147, 134, 120, 89, 69, 46, 31, 16, 7, 7, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; double v2[100] = { 0, 0, 0, 0, 1, 10, 49, 113, 160, 168, 147, 113, 81, 55, 37, 24, 15, 10, 6, 4, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; for ( int i = 0; i < elementsToCompare; i++ ) { vOne[i] = v1[i]; vTwo[i] = v2[i]; //cout << " -> Lauf - " << i << "; Vergleich an vOne - " << vOne[i] << "; vTwo " << vTwo[i] << "; " << endl; } kstwo( vOne, vTwo, nrDis, nrProb ); cout << "KS-Test Test-1: Dist: " << nrDis << "; P: " << nrProb << endl; // Result here: KS-Test: Dist: 0.92; P: 1.31664e-38 // Result R: 1] "KST Dist: 0.04" [1] "KST P: 1" } |
|
#5
|
|||
|
|||
|
Quote:
Code:
#include "../code/nr3.h"
#include "../code/sort.h"
#include "../code/ksdist.h"
#include "../code/kstests.h"
int main()
{
double nrDis;
double nrProb;
const int elementsToCompare = 100;
double v1[elementsToCompare] = {
0, 0, 0, 0, 0, 24, 74, 98, 133, 147,
134, 120, 89, 69, 46, 31, 16, 7, 7, 3,
2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
double v2[elementsToCompare] = {
0, 0, 0, 0, 1, 10, 49, 113, 160, 168,
147, 113, 81, 55, 37, 24, 15, 10, 6, 4,
2, 2, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
VecDoub vOne(elementsToCompare, v1); // No need to copy them; just use this constructor
VecDoub vTwo(elementsToCompare, v2); // No need to copy them; just use this constructor
kstwo(vOne, vTwo, nrDis, nrProb);
cout << scientific << setprecision(5);
cout << "KS-Test Test-1: Dist: " << nrDis << "; P: " << nrProb << endl;
return 0;
}
After compiling with GNU g++ version 4.1.2 on my Centos Linux workstation, the output agrees with what you observed from R: __________________________________________________ KS-Test Test-1: Dist: 4.00000e-02; P: 9.99997e-01 __________________________________________________ Regards, Dave Footnote: I let the vecDoub constructor copy elements from the arrays. This is a matter of style, not substance; it does not change the results here. Bottom line: Where did you get your NR files? Mine were from the Numerical Recipes version 3 distribution CD. Last edited by davekw7x; 07-29-2011 at 09:33 AM. |
|
#6
|
|||
|
|||
|
If you've got a theoretical distribution that you can compute exactly, you should use ksone rather than kstwo.
|
![]() |
| Thread Tools | |
| Display Modes | Rate This Thread |
|
|