GRASS GIS 8 Programmer's Manual 8.3.2(2024)-exported
Loading...
Searching...
No Matches
class.c
Go to the documentation of this file.
1/* functions to classify sorted arrays of doubles and fill a vector of
2 * classbreaks */
3
4#include <grass/glocale.h>
5#include <grass/arraystats.h>
6
7int AS_option_to_algorithm(const struct Option *option)
8{
9 if (G_strcasecmp(option->answer, "int") == 0)
10 return CLASS_INTERVAL;
11 if (G_strcasecmp(option->answer, "std") == 0)
12 return CLASS_STDEV;
13 if (G_strcasecmp(option->answer, "qua") == 0)
14 return CLASS_QUANT;
15 if (G_strcasecmp(option->answer, "equ") == 0)
16 return CLASS_EQUIPROB;
17 if (G_strcasecmp(option->answer, "dis") == 0)
18 return CLASS_DISCONT;
19
20 G_fatal_error(_("Unknown algorithm '%s'"), option->answer);
21}
22
23double AS_class_apply_algorithm(int algo, double *data, int nrec, int *nbreaks,
24 double *classbreaks)
25{
26 double finfo = 0.0;
27
28 switch (algo) {
29 case CLASS_INTERVAL:
30 finfo = AS_class_interval(data, nrec, *nbreaks, classbreaks);
31 break;
32 case CLASS_STDEV:
33 finfo = AS_class_stdev(data, nrec, *nbreaks, classbreaks);
34 break;
35 case CLASS_QUANT:
36 finfo = AS_class_quant(data, nrec, *nbreaks, classbreaks);
37 break;
38 case CLASS_EQUIPROB:
39 finfo = AS_class_equiprob(data, nrec, nbreaks, classbreaks);
40 break;
41 case CLASS_DISCONT:
42 /* finfo = class_discont(data, nrec, *nbreaks, classbreaks);
43 * disabled because of bugs */
45 _("Discont algorithm currently not available because of bugs"));
46 break;
47 default:
48 break;
49 }
50
51 if (finfo == 0)
52 G_fatal_error(_("Classification algorithm failed"));
53
54 return finfo;
55}
56
57int AS_class_interval(double *data, int count, int nbreaks, double *classbreaks)
58{
59 double min, max;
60 double step;
61 int i = 0;
62
63 min = data[0];
64 max = data[count - 1];
65
66 step = (max - min) / (nbreaks + 1);
67
68 for (i = 0; i < nbreaks; i++)
69 classbreaks[i] = min + (step * (i + 1));
70
71 return (1);
72}
73
74double AS_class_stdev(double *data, int count, int nbreaks, double *classbreaks)
75{
76 struct GASTATS stats;
77 int i;
78 int nbclass;
79 double scale = 1.0;
80
81 AS_basic_stats(data, count, &stats);
82
83 nbclass = nbreaks + 1;
84
85 if (nbclass % 2 ==
86 1) { /* number of classes is uneven so we center middle class on mean */
87
88 /* find appropriate fraction of stdev for step */
89 i = 1;
90 while (i) {
91 if (((stats.mean + stats.stdev * scale / 2) +
92 (stats.stdev * scale * (nbclass / 2 - 1)) >
93 stats.max) ||
94 ((stats.mean - stats.stdev * scale / 2) -
95 (stats.stdev * scale * (nbclass / 2 - 1)) <
96 stats.min))
97 scale = scale / 2;
98 else
99 i = 0;
100 }
101
102 /* classbreaks below the mean */
103 for (i = 0; i < nbreaks / 2; i++)
104 classbreaks[i] = (stats.mean - stats.stdev * scale / 2) -
105 stats.stdev * scale * (nbreaks / 2 - (i + 1));
106 /* classbreaks above the mean */
107 for (; i < nbreaks; i++)
108 classbreaks[i] = (stats.mean + stats.stdev * scale / 2) +
109 stats.stdev * scale * (i - nbreaks / 2);
110 }
111 else { /* number of classes is even so mean is a classbreak */
112
113 /* decide whether to use 1*stdev or 0.5*stdev as step */
114 i = 1;
115 while (i) {
116 if (((stats.mean) + (stats.stdev * scale * (nbclass / 2 - 1)) >
117 stats.max) ||
118 ((stats.mean) - (stats.stdev * scale * (nbclass / 2 - 1)) <
119 stats.min))
120 scale = scale / 2;
121 else
122 i = 0;
123 }
124
125 /* classbreaks below the mean and on the mean */
126 for (i = 0; i <= nbreaks / 2; i++)
127 classbreaks[i] =
128 stats.mean - stats.stdev * scale * (nbreaks / 2 - i);
129 /* classbreaks above the mean */
130 for (; i < nbreaks; i++)
131 classbreaks[i] =
132 stats.mean + stats.stdev * scale * (i - nbreaks / 2);
133 }
134
135 return (scale);
136}
137
138int AS_class_quant(double *data, int count, int nbreaks, double *classbreaks)
139{
140 int i, step;
141
142 step = count / (nbreaks + 1);
143
144 for (i = 0; i < nbreaks; i++)
145 classbreaks[i] = data[step * (i + 1)];
146
147 return (1);
148}
149
150int AS_class_equiprob(double *data, int count, int *nbreaks,
151 double *classbreaks)
152{
153 int i, j;
154 double *lequi; /*Vector of scale factors for probabilities of the normal
155 distribution */
156 struct GASTATS stats;
157 int nbclass;
158
159 nbclass = *nbreaks + 1;
160
161 lequi = G_malloc(*nbreaks * sizeof(double));
162
163 /* The following values come from the normal distribution and will be used
164 * as: classbreak[i] = (lequi[i] * stdev) + mean;
165 */
166
167 if (nbclass < 3) {
168 lequi[0] = 0;
169 }
170 else if (nbclass == 3) {
171 lequi[0] = -0.43076;
172 lequi[1] = 0.43076;
173 }
174 else if (nbclass == 4) {
175 lequi[0] = -0.6745;
176 lequi[1] = 0;
177 lequi[2] = 0.6745;
178 }
179 else if (nbclass == 5) {
180 lequi[0] = -0.8416;
181 lequi[1] = -0.2533;
182 lequi[2] = 0.2533;
183 lequi[3] = 0.8416;
184 }
185 else if (nbclass == 6) {
186 lequi[0] = -0.9676;
187 lequi[1] = -0.43076;
188 lequi[2] = 0;
189 lequi[3] = 0.43076;
190 lequi[4] = 0.9676;
191 }
192 else if (nbclass == 7) {
193 lequi[0] = -1.068;
194 lequi[1] = -0.566;
195 lequi[2] = -0.18;
196 lequi[3] = 0.18;
197 lequi[4] = 0.566;
198 lequi[5] = 1.068;
199 }
200 else if (nbclass == 8) {
201 lequi[0] = -1.1507;
202 lequi[1] = -0.6745;
203 lequi[2] = -0.3187;
204 lequi[3] = 0;
205 lequi[4] = 0.3187;
206 lequi[5] = 0.6745;
207 lequi[6] = 1.1507;
208 }
209 else if (nbclass == 9) {
210 lequi[0] = -1.2208;
211 lequi[1] = -0.7648;
212 lequi[2] = -0.4385;
213 lequi[3] = -0.1397;
214 lequi[4] = 0.1397;
215 lequi[5] = 0.4385;
216 lequi[6] = 0.7648;
217 lequi[7] = 1.2208;
218 }
219 else if (nbclass == 10) {
220 lequi[0] = -1.28155;
221 lequi[1] = -0.84162;
222 lequi[2] = -0.5244;
223 lequi[3] = -0.25335;
224 lequi[4] = 0;
225 lequi[5] = 0.25335;
226 lequi[6] = 0.5244;
227 lequi[7] = 0.84162;
228 lequi[8] = 1.28155;
229 }
230 else {
232 _("Equiprobable classbreaks currently limited to 10 classes"));
233 }
234
235 AS_basic_stats(data, count, &stats);
236
237 /* Check if any of the classbreaks would fall outside of the range min-max
238 */
239 j = 0;
240 for (i = 0; i < *nbreaks; i++) {
241 if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
242 (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
243 j++;
244 }
245 }
246
247 if (j < (*nbreaks)) {
248 G_warning(
249 _("There are classbreaks outside the range min-max. Number of "
250 "classes reduced to %i, but using probabilities for %i classes."),
251 j + 1, *nbreaks + 1);
252 G_realloc(classbreaks, j * sizeof(double));
253 for (i = 0; i < j; i++)
254 classbreaks[i] = 0;
255 }
256
257 j = 0;
258 for (i = 0; i < *nbreaks; i++) {
259 if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
260 (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
261 classbreaks[j] = lequi[i] * stats.stdev + stats.mean;
262 j++;
263 }
264 }
265
266 *nbreaks = j;
267
268 G_free(lequi);
269 return (1);
270}
271
272/* FIXME: there seems to a problem with array overflow, probably due to
273 the fact that the code was ported from fortran which has 1-based arrays */
274double AS_class_discont(double *data, int count, int nbreaks,
275 double *classbreaks)
276{
277 int *num, nbclass;
278 double *no, *zz, /* *nz, */ *xn, *co;
279 double *x; /* Vector standardized observations */
280 int i, j, k;
281 double min = 0, max = 0, rangemax = 0;
282 int n = 0;
283 double rangemin = 0, xlim = 0;
284 double dmax = 0.0 /*, d2 = 0.0, dd = 0.0, p = 0.0 */;
285 int nf = 0, nmax = 0;
286 double *abc;
287 int nd = 0;
288 double den = 0, d = 0;
289 int im = 0, ji = 0;
290 int tmp = 0;
291 int nff = 0, jj = 0, no1 = 0, no2 = 0;
292 double f = 0, xt1 = 0, xt2 = 0, chi2 = 1000.0, xnj_1 = 0, xj_1 = 0;
293
294 /*get the number of values */
295 n = count;
296
297 nbclass = nbreaks + 1;
298
299 num = G_malloc((nbclass + 1) * sizeof(int));
300 no = G_malloc((nbclass + 1) * sizeof(double));
301 zz = G_malloc((nbclass + 1) * sizeof(double));
302 /* nz = G_malloc(3 * sizeof(double)); */
303 xn = G_malloc((n + 1) * sizeof(double));
304 co = G_malloc((nbclass + 1) * sizeof(double));
305
306 /* We copy the array of values to x, in order to be able to standardize it
307 */
308 x = G_malloc((n + 1) * sizeof(double));
309 x[0] = n;
310 xn[0] = 0;
311
312 min = data[0];
313 max = data[count - 1];
314 for (i = 1; i <= n; i++)
315 x[i] = data[i - 1];
316
317 rangemax = max - min;
318 rangemin = rangemax;
319
320 for (i = 2; i <= n; i++) {
321 if (x[i] != x[i - 1] && x[i] - x[i - 1] < rangemin)
322 rangemin = x[i] - x[i - 1]; /* rangemin = minimal distance */
323 }
324
325 /* STANDARDIZATION
326 * and creation of the number vector (xn) */
327
328 for (i = 1; i <= n; i++) {
329 x[i] = (x[i] - min) / rangemax;
330 xn[i] = i / (double)n;
331 }
332 xlim = rangemin / rangemax;
333 rangemin = rangemin / 2.0;
334
335 /* Searching for the limits */
336 num[1] = n;
337 abc = G_malloc(3 * sizeof(double));
338
339 /* Loop through possible solutions */
340 for (i = 1; i <= nbclass; i++) {
341 nmax = 0;
342 dmax = 0.0;
343 /* d2 = 0.0; */
344 nf = 0; /*End number */
345
346 /* Loop through classes */
347 for (j = 1; j <= i; j++) {
348 nd = nf; /*Start number */
349 nf = num[j];
350 co[j] = 10e37;
351 AS_eqdrt(x, xn, nd, nf, abc);
352 den = sqrt(pow(abc[1], 2) + 1.0);
353 nd++;
354 /* Loop through observations */
355 for (k = nd; k <= nf; k++) {
356 if (abc[2] == 0.0)
357 d = fabs((-1.0 * abc[1] * x[k]) + xn[k] - abc[0]) / den;
358 else
359 d = fabs(x[k] - abc[2]);
360 /* d2 += pow(d, 2); */
361 if (x[k] - x[nd] < xlim)
362 continue;
363 if (x[nf] - x[k] < xlim)
364 continue;
365 if (d <= dmax)
366 continue;
367 dmax = d;
368 nmax = k;
369 }
370 nd--; /* A VERIFIER! */
371 if (x[nf] != x[nd]) {
372 if (nd != 0)
373 co[j] = (xn[nf] - xn[nd]) / (x[nf] - x[nd]);
374 else
375 co[j] = (xn[nf]) / (x[nf]); /* A VERIFIER! */
376 }
377 }
378 /* if (i == 1)
379 dd = d2;
380 p = d2 / dd; */
381 for (j = 1; j <= i; j++) {
382 no[j] = num[j];
383 zz[j] = x[num[j]] * rangemax + min;
384 if (j == i)
385 continue;
386 if (co[j] > co[j + 1]) {
387 zz[j] = zz[j] + rangemin;
388 continue;
389 }
390 zz[j] = zz[j] - rangemin;
391 no[j] = no[j] - 1;
392 }
393 im = i - 1;
394 if (im != 0.0) {
395 for (j = 1; j <= im; j++) {
396 ji = i + 1 - j;
397 no[ji] -= no[ji - 1];
398 }
399 }
400 if (nmax == 0) {
401 break;
402 }
403 nff = i + 2;
404 tmp = 0;
405 for (j = 1; j <= i; j++) {
406 jj = nff - j;
407 if (num[jj - 1] < nmax) {
408 num[jj] = nmax;
409 tmp = 1;
410 break;
411 }
412 num[jj] = num[jj - 1];
413 }
414 if (tmp == 0) {
415 num[1] = nmax;
416 jj = 1;
417 }
418 if (jj == 1) {
419 xnj_1 = 0;
420 xj_1 = 0;
421 }
422 else {
423 xnj_1 = xn[num[jj - 1]];
424 xj_1 = x[num[jj - 1]];
425 }
426 no1 = (xn[num[jj]] - xnj_1) * n;
427 no2 = (xn[num[jj + 1]] - xn[num[jj]]) * n;
428 f = (xn[num[jj + 1]] - xnj_1) / (x[num[jj + 1]] - xj_1);
429 f *= n;
430 xt1 = (x[num[jj]] - xj_1) * f;
431 xt2 = (x[num[jj + 1]] - x[num[jj]]) * f;
432 if (xt2 == 0) {
433 xt2 = rangemin / 2.0 / rangemax * f;
434 xt1 -= xt2;
435 }
436 else if (xt1 * xt2 == 0) {
437 xt1 = rangemin / 2.0 / rangemax * f;
438 xt2 -= xt1;
439 }
440
441 /* calculate chi-square to indicate statistical significance of new
442 * class, i.e. how probable would it be that the new class could be the
443 * result of purely random choice */
444 if (chi2 > pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2))
445 chi2 = pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2);
446 }
447
448 /* Fill up classbreaks of i <=nbclass classes */
449 for (j = 0; j <= (i - 1); j++)
450 classbreaks[j] = zz[j + 1];
451
452 return (chi2);
453}
454
455int AS_class_frequencies(double *data, int count, int nbreaks,
456 double *classbreaks, int *frequencies)
457{
458 int i, j;
459
460 /* min = data[0];
461 max = data[count - 1]; */
462 /* count cases in all classes, except for last class */
463 i = 0;
464 for (j = 0; j < nbreaks; j++) {
465 while (data[i] <= classbreaks[j]) {
466 frequencies[j]++;
467 i++;
468 }
469 }
470
471 /*Now count cases in last class */
472 for (; i < count; i++) {
473 frequencies[nbreaks]++;
474 }
475
476 return (1);
477}
void G_free(void *buf)
Free allocated memory.
Definition alloc.c:150
void AS_eqdrt(double vectx[], double vecty[], int i1, int i2, double *vabc)
Definition basic.c:37
void AS_basic_stats(double *data, int count, struct GASTATS *stats)
Definition basic.c:5
double AS_class_discont(double *data, int count, int nbreaks, double *classbreaks)
Definition class.c:274
int AS_class_equiprob(double *data, int count, int *nbreaks, double *classbreaks)
Definition class.c:150
int AS_class_quant(double *data, int count, int nbreaks, double *classbreaks)
Definition class.c:138
double AS_class_apply_algorithm(int algo, double *data, int nrec, int *nbreaks, double *classbreaks)
Definition class.c:23
int AS_class_frequencies(double *data, int count, int nbreaks, double *classbreaks, int *frequencies)
Definition class.c:455
int AS_class_interval(double *data, int count, int nbreaks, double *classbreaks)
Definition class.c:57
int AS_option_to_algorithm(const struct Option *option)
Definition class.c:7
double AS_class_stdev(double *data, int count, int nbreaks, double *classbreaks)
Definition class.c:74
void G_fatal_error(const char *msg,...)
Print a fatal error message to stderr.
Definition gis/error.c:159
void G_warning(const char *msg,...)
Print a warning message to stderr.
Definition gis/error.c:203
int count
#define min(a, b)
#define max(a, b)
int G_strcasecmp(const char *x, const char *y)
String compare ignoring case (upper or lower)
Definition strings.c:47
#define x