User:Timothee Flutre/Notebook/Postdoc/2011/12/14
From OpenWetWare
(Difference between revisions)
m (→Learn about mixture models and the EM algorithm: improve motivation) |
(→Learn about mixture models and the EM algorithm: add implem simul data and E step) |
||
Line 50: | Line 50: | ||
<math>\hat{\sigma_k} = \sqrt{\frac{\sum_{i=1}^N p(k/i) (x_i - \mu_k)^2}{\sum_{i=1}^N p(k/i)}}</math> | <math>\hat{\sigma_k} = \sqrt{\frac{\sum_{i=1}^N p(k/i) (x_i - \mu_k)^2}{\sum_{i=1}^N p(k/i)}}</math> | ||
+ | |||
+ | * ... <TO DO> ... | ||
+ | |||
+ | * '''Simulate data''': | ||
+ | |||
+ | #' Generate univariate observations from a mixture of Normals | ||
+ | #' | ||
+ | #' @param K number of components | ||
+ | #' @param N number of observations | ||
+ | GetUnivariateSimulatedData <- function(K=2, N=100){ | ||
+ | mus <- seq(0, 6*(K-1), 6) | ||
+ | sigmas <- runif(n=K, min=0.5, max=1.5) | ||
+ | tmp <- floor(rnorm(n=K-1, mean=floor(N/K), sd=5)) | ||
+ | ns <- c(tmp, N - sum(tmp)) | ||
+ | clusters <- as.factor(matrix(unlist(lapply(1:K, function(k){rep(k, ns[k])})), | ||
+ | ncol=1)) | ||
+ | obs <- matrix(unlist(lapply(1:K, function(k){ | ||
+ | rnorm(n=ns[k], mean=mus[k], sd=sigmas[k]) | ||
+ | }))) | ||
+ | new.order <- sample(1:N, N) | ||
+ | obs <- obs[new.order] | ||
+ | rownames(obs) <- NULL | ||
+ | clusters <- clusters[new.order] | ||
+ | return(list(obs=obs, clusters=clusters, mus=mus, sigmas=sigmas, | ||
+ | mix.probas=ns/N)) | ||
+ | } | ||
+ | |||
+ | * '''Implement the E step''': | ||
+ | |||
+ | #' Return probas of latent variables given data and parameters from previous iteration | ||
+ | #' | ||
+ | #' @param data Nx1 vector of observations | ||
+ | #' @param params list which components are mus, sigmas and mix.probas | ||
+ | Estep <- function(data, params){ | ||
+ | GetMembershipProbas(data, params$mus, params$sigmas, params$mix.probas) | ||
+ | } | ||
+ | |||
+ | #' Return the membership probabilities P(zi=k/xi,theta) | ||
+ | #' | ||
+ | #' @param data Nx1 vector of observations | ||
+ | #' @param mus Kx1 vector of means | ||
+ | #' @param sigmas Kx1 vector of std deviations | ||
+ | #' @param mix.probas Kx1 vector of mixing probas P(zi=k/theta) | ||
+ | #' @return NxK matrix of membership probas | ||
+ | GetMembershipProbas <- function(data, mus, sigmas, mix.probas){ | ||
+ | N <- length(data) | ||
+ | K <- length(mus) | ||
+ | tmp <- matrix(unlist(lapply(1:N, function(i){ | ||
+ | x <- data[i] | ||
+ | norm.const <- sum(unlist(Map(function(mu, sigma, mix.proba){ | ||
+ | mix.proba * GetUnivariateNormalDensity(x, mu, sigma)}, mus, sigmas, mix.probas))) | ||
+ | unlist(Map(function(mu, sigma, mix.proba){ | ||
+ | mix.proba * GetUnivariateNormalDensity(x, mu, sigma) / norm.const | ||
+ | }, mus[-K], sigmas[-K], mix.probas[-K])) | ||
+ | })), ncol=K-1, byrow=TRUE) | ||
+ | membership.probas <- cbind(tmp, apply(tmp, 1, function(x){1 - sum(x)})) | ||
+ | names(membership.probas) <- NULL | ||
+ | return(membership.probas) | ||
+ | } | ||
+ | |||
+ | #' Univariate Normal density | ||
+ | GetUnivariateNormalDensity <- function(x, mu, sigma){ | ||
+ | return( 1/(sigma * sqrt(2*pi)) * exp(-1/(2*sigma^2)*(x-mu)^2) ) | ||
+ | } | ||
<!-- ##### DO NOT edit below this line unless you know what you are doing. ##### --> | <!-- ##### DO NOT edit below this line unless you know what you are doing. ##### --> |
Revision as of 04:55, 23 December 2011
Project name | Main project page Previous entry Next entry |
Learn about mixture models and the EM algorithm
As we derive with respect to μ_{k}, all the others means μ_{l} with are constant, and thus disappear:
And finally:
Once we put all together, we end up with:
By convention, we note the maximum-likelihood estimate of μ_{k}:
Therefore, we finally obtain:
By doing the same kind of algebra, we also obtain the ML estimates for the standard deviation of each cluster:
#' Generate univariate observations from a mixture of Normals #' #' @param K number of components #' @param N number of observations GetUnivariateSimulatedData <- function(K=2, N=100){ mus <- seq(0, 6*(K-1), 6) sigmas <- runif(n=K, min=0.5, max=1.5) tmp <- floor(rnorm(n=K-1, mean=floor(N/K), sd=5)) ns <- c(tmp, N - sum(tmp)) clusters <- as.factor(matrix(unlist(lapply(1:K, function(k){rep(k, ns[k])})), ncol=1)) obs <- matrix(unlist(lapply(1:K, function(k){ rnorm(n=ns[k], mean=mus[k], sd=sigmas[k]) }))) new.order <- sample(1:N, N) obs <- obs[new.order] rownames(obs) <- NULL clusters <- clusters[new.order] return(list(obs=obs, clusters=clusters, mus=mus, sigmas=sigmas, mix.probas=ns/N)) }
#' Return probas of latent variables given data and parameters from previous iteration #' #' @param data Nx1 vector of observations #' @param params list which components are mus, sigmas and mix.probas Estep <- function(data, params){ GetMembershipProbas(data, params$mus, params$sigmas, params$mix.probas) } #' Return the membership probabilities P(zi=k/xi,theta) #' #' @param data Nx1 vector of observations #' @param mus Kx1 vector of means #' @param sigmas Kx1 vector of std deviations #' @param mix.probas Kx1 vector of mixing probas P(zi=k/theta) #' @return NxK matrix of membership probas GetMembershipProbas <- function(data, mus, sigmas, mix.probas){ N <- length(data) K <- length(mus) tmp <- matrix(unlist(lapply(1:N, function(i){ x <- data[i] norm.const <- sum(unlist(Map(function(mu, sigma, mix.proba){ mix.proba * GetUnivariateNormalDensity(x, mu, sigma)}, mus, sigmas, mix.probas))) unlist(Map(function(mu, sigma, mix.proba){ mix.proba * GetUnivariateNormalDensity(x, mu, sigma) / norm.const }, mus[-K], sigmas[-K], mix.probas[-K])) })), ncol=K-1, byrow=TRUE) membership.probas <- cbind(tmp, apply(tmp, 1, function(x){1 - sum(x)})) names(membership.probas) <- NULL return(membership.probas) } #' Univariate Normal density GetUnivariateNormalDensity <- function(x, mu, sigma){ return( 1/(sigma * sqrt(2*pi)) * exp(-1/(2*sigma^2)*(x-mu)^2) ) } |