#--------------------------------------- #Clustering of User profiles, CRM with k-Means #--------------------------------------- library (mclust) library(cluster) library(car) #------------------------------------------- #Data preparation #------------------------------------------- setwd("C:/Users/grossmw5/Willi/Buch/Chapter5/Text/Beispiele/Clustering/CRM_clusteranalyse") daten=read.csv("CRM_working1.csv",header=T,sep=";") # summary(daten) attach(daten) #-------------------------------------------------- #Variable Slection:4 most important services and #sales indikators #------------------------------------------------- datclus1<-cbind(Service2,Service3, Service4, Service9,Sales,Average_Sales,No_activities) #datclus2<-cbind(Service1,Service2,Service3, Service4, # Service5,Service7,Service8, # Service9,Service10,Average_Sales,No_activities) clust1<-subset(datclus1, Sales < 50 & Duration_CR >12 & Average_Sales < 5) # sample without outliers clust2<-subset(datclus1, Duration_CR >12 ) # sample with outliers #-------------------------------------------------- # Standardisation of Data #-------------------------------------------------- rge1<- apply(clust1,2,max) - apply(clust1,2, min) rge1 use.dat1<-sweep(clust1,2,rge1,FUN= "/") summary(use.dat1) rge2<- apply(clust2,2,max) - apply(clust2,2, min) rge2 use.dat2<-sweep(clust2,2,rge2,FUN= "/") summary(use.dat2) #------------------------------------------- #Explanation of SSQ with k-Means #------------------------------------------- n<- nrow(use.dat1) wss1<-rep(0,10) wss1[1]<-(n-1)*sum(apply(use.dat1,2,var)) for (i in 2:10) wss1[i] <- sum(kmeans(use.dat1,centers =i)$withinss) pdf("5_kmeans1.pdf",width = 7, height = 3.5) par(oma=c(0,2,0,2)) plot(1:10,wss1, type = "b", xlab="No of classes", ylab = "SS in class") dev.off() wss1[1] n<- nrow(use.dat2) wss2<-rep(0,10) wss2[1]<-(n-1)*sum(apply(use.dat2,2,var)) for (i in 2:10) wss2[i] <- sum(kmeans(use.dat2,centers =i)$withinss) plot(1:10,wss2, type = "b", xlab="Classes use.data2 ", ylab = "SS in class") wss2[1] #=============================================== #Solution dataset 1, standardized #=============================================== #K-means, five Cluster #----------------------------------------------- use5_stand1<- kmeans(use.dat1,centers =5) table(use5_stand1$cluster) #------------------------------------------------- # Function for Cluster centers #------------------------------------------------- ccent<- function(cl) { f<-function(i) colMeans(clust1[cl == i, ]) x<- sapply(sort(unique(cl)),f) colnames(x) <- sort(unique(cl)) return(x)} ccent(use5_stand1$cluster) pdf("5_kmeans2.pdf",width = 7, height = 2.5) par(oma=c(0,2,0,2)) clus<-par(mfrow= c(1,3)) boxplot (Sales~use5_stand1$cluster, data = use.dat1, xlab = "Sales in clusters") boxplot (Service2~use5_stand1$cluster, data = use.dat1, xlab = "Service2 in clusters" ) boxplot (Service3~use5_stand1$cluster, data = use.dat1, xlab = "Service3 in clusters") dev.off() par(clus) boxplot (No_activities~use5_stand1$cluster, data = use.dat1, xlab = "Activities in clusters") boxplot (Service4~use5_stand1$cluster, data = use.dat1) boxplot (Service9~use5_stand1$cluster, data = use.dat1) boxplot (No_activities~use5_stand1$cluster, data = use.dat1, xlab = "Activities in clusters") #=============================================== #Solution dataset 2, standardized Distance2 #=============================================== #K-means, five Cluster #----------------------------------------------- use5_stand2<- kmeans(use.dat2, centers = 5) table(use5_stand2$cluster) #------------------------------------------------- # Function for Cluster centers #------------------------------------------------- ccent2<- function(cl) { f<-function(i) colMeans(clust2[cl == i, ]) x<- sapply(sort(unique(cl)),f) colnames(x) <- sort(unique(cl)) return(x)} ccent2(use5_stand2$cluster) #------------------------------------------- #Pam Clustering 5 clusters #-------------------------------------------- pmed<-pam(use.dat2,5) summary(pmed) plot(pmed) table(pmed$cluster) pmed$data[1:10,] table(pmed$cluster,use5_stand2$cluster) ccent2(pmed$cluster) pmed$medoids sweep(pmed$medoids,2,rge2,FUN= "*") sweep(use5_stand2$centers,2,rge2,FUN= "*") ls(use5_stand2) use5_stand2$centers #-------------------------------------------------- Kmeans with 9 components use9_stand2<- kmeans(use.dat2, centers = 9) table(use9_stand2$cluster) ccent2(use9_stand2$cluster) table(mclass9,use9_stand2$cluster )