# Rajarshi Guha # 13/05/2005 # # Functions to calculate forms of entropy for # categorical variables # H(X) - entropy entropy <- function(x, base=exp(1)) { if (!is.factor(x)) { stop("x must be a factor") } x <- factor(x) t <- table(x) p <- t/sum(t) if (any(t==0)) { p <- p[-which(t==0)] } ent <- -1 * sum( p * log(p)/log(base) ) if (is.na(ent)) { ent <- 0 } ent } # H(X,Y) - joint entropy entropy.joint <- function(x,y, base=exp(1)) { if (!is.factor(x) || !is.factor(y)) { stop("x & y must be factors") } x <- factor(x) y <- factor(y) t <- table(x,y) p <- as.numeric(t/sum(t)) if (any(p == 0)) { p <- p[-which(p == 0)] } ent <- -1 * sum(p*log(p)/log(base)) if (is.na(ent)) { ent <- 0 } ent } # H(X|Y) = H(X,Y) - H(Y) - conditional entropy entropy.cond <- function(x,y, base=exp(1)) { if (!is.factor(x) || !is.factor(y)) { stop("x & y must be factors") } ent <- entropy.joint(x,y,base) - entropy(y,base) if (is.na(ent)) { ent <- 0 } ent } # Formula taken from NR in C SU <- function(x,y, base=exp(1)) { if (!is.factor(x) || !is.factor(y)) { stop("x & y must be factors") } Ht <- entropy.joint(x,y,base) Hx <- entropy(x,base) Hy <- entropy(y,base) #cat(Ht,' ',Hx,' ',Hy,'\n') 2 * (Hy + Hx - Ht) / (Hx + Hy); }