% % k-means clustering (really simple) % % 2/98 pjf % % parameters: data = pattern matrix, nclus = # clusters % returns squared error and membership table % % NEW: version that also returns the distance of each pattern from each cluster center function [distance,label,tse] = kmeans1(data,nclus) [n,d] = size(data); mx = max(data); mn = min(data); % choose nclus cluster centers at random in the `box' occupied by the patterns center = repmat(mn,nclus,1) + rand(nclus,d).*repmat(mx-mn,nclus,1); done=0; niter=0; label = zeros(n,1); while (done == 0), change = 0; % assign patterns to closest cluster center for i=1:n mind = norm(data(i,:)-center(1,:)); lab = 1; for j=2:nclus dtmp=norm(data(i,:)-center(j,:)); if (dtmp 0) center(i,:) = center(i,:) / pop(i); else center(i,:) = mn + rand(1,d) .* mx-mn; end end fprintf(1,'%2d: ',niter); for i=1:nclus fprintf(1,'[%d p %d] ',i,pop(i)); end % compute squared error se=zeros(nclus); tse = 0; % total squared error for i=1:n se(label(i)) = se(label(i)) + norm(data(i,:)-center(label(i),:))^2; end tse = 0.0; for i=1:nclus tse = tse + se(i); end fprintf(1,'tse %f\n',tse); niter = niter + 1; if ((niter > 50)|(change==0)) break; end; end % calculate the distance of each pattern from cluster centers for i=1:n for j=1:nclus distance(i,j)=norm(data(i,:)-center(j,:),2); end end fprintf(1,'Terminated after %d iterations.\n',niter); err=tse;