资源简介
R语言,自然分类数据包,适合低版本R语言使用。
代码片段和文件信息
/* SCCS @(#)anova.c 1.8 08/13/01 */
/*
** The four routines for anova splitting
*/
#include
#include “rpart.h“
#include “rpartS.h“
#include “node.h“
#include “rpartproto.h“
static double *mean *sums;
static double *wts;
static int *countn;
static int *tsplit;
int anovainit(int n double *y[] int maxcat char **error
double *parm int *size int who double *wt)
{
if (who==1 && maxcat >0) {
graycode_init0(maxcat);
countn = (int *)ALLOC(2*maxcat sizeof(int));
tsplit = countn + maxcat;
mean = (double *)ALLOC(3*maxcat sizeof(double));
wts = mean + maxcat;
sums = wts + maxcat;
}
*size = 1;
return(0);
}
/*
** The anova evaluation function. Return the mean and the ss.
*/
void anovass(int n double *y[] double *value double *risk
double *wt) {
int i;
double temp twt;
double mean ss;
temp = 0;
twt = 0; /* sum of the weights */
for (i=0; i temp += *y[i] * wt[i];
twt += wt[i];
}
mean = temp/twt;
ss =0;
for (i=0; i temp = *y[i] - mean;
if (rp.dissim==1) ss += temp * temp * wt[i];
else if (rp.dissim==2) ss += fabs(temp) * wt[i];
}
*value = mean;
*risk = ss;
}
/*
** The anova splitting function. Find that split point in x such that
** the sum of squares of y within the two groups is decreased as much
** as possible. It is not necessary to actually calculate the SS the
** improvement involves only means in the two groups.
*/
void anova(int n double *y[] FLOAT *x int nclass
int edge double *improve FLOAT *split int *csplit
double myrisk double *wt)
{
int ij;
double temp = 0;
double left_sum right_sum;
double left_wt right_wt;
int left_n right_n;
double grandmean best;
int direction = LEFT;
int where = 0;
/*
** The improvement of a node is SS - (SS_L + SS_R) where
** SS = sum of squares in a node = \sum w_i (x_i - \bar x)^2 where
** of course \bar x is a weighted mean \sum w_i x_i / \sum w_i
** Using the identity
** \sum w_i(x_ - \bar x)^2 = \sum w_i (x_i-c)^2 - (\sum w_i)(c-\bar x)^2
** the improvement = w_l*(left mean - grand mean)^2
** +w_r*(right mean- grand mean)^2
** where w_l is the sum of weights in the left node w_r similarly.
*/
right_wt =0;
right_n = n;
right_sum =0;
for (i=0; i right_sum += *y[i] * wt[i];
right_wt += wt[i];
}
grandmean = right_sum/right_wt;
if (nclass==0) { /* continuous predictor */
left_sum=0; /* No data in left branch to start */
left_wt =0; left_n =0;
right_sum=0; /*after subracting grand mean it‘s zero */
best = 0;
for (i=0; right_n>edge; i++) {
left_wt += wt[i];
right_wt -= wt[i];
left_n++;
right_n--;
temp = (*y[i] - gr
评论
共有 条评论