# Principal Component Analysis # Use "Samsung" data #Please indicate the frequency of application you use for a tablet, 0-10 scale. #V1 is respondent number #V2. Books (novels, eBook, the Bible, cartoon etc.) #V3. Business or Personal Information management (scheduler, business card manager, sales manager, time manager, planner etc.) #V4. Education (language, vocabulary, tests etc.) #V5. Entertainment (games, broadcasts, humor etc.) #V6. Finance (banking, money manager, stocks etc.) #V7. Medical & Healthcare (hospital searches, infant care, general medical information etc.) #V8. Fitness (calorie manager, diet, stress checker etc.) #V9. Lifestyle (fashion, religion, cooking, pets, interior design etc.) #V10. Music (learning an instrument, song maker, radio, etc.) #V11. Navigation (traffic information, maps, transportation info, etc.) #V12. News (newspaper etc.) #V13. Photography (camera, photo manager, etc.) #V14. Reference (dictionary, lexicon, fact book, etc.) #V15. Social networking (messenger, Twitter, Skype, etc.) #V16. Sports (live sports, learning a sport, etc.) #V17. Travel (tour information, travel guides, etc.) #V18. Utilities (flashlight, calendar, clock, calculator, etc.) #V19. Weather (local / country weather information, etc.) pcadata <- read.table(file="E:/pcadata.txt",sep="\t",colClasses = c("NULL",rep(NA,18))) # Use prcomp function results <- prcomp(pcadata,scale=TRUE) results # Get eigenvalues of principal components # Adopt the one with eigenvalue greater than 1 eig <- (results$sdev)^2 # Get variances of principal components variance <- eig*100/sum(eig) # Cumulative variances cumvar <- cumsum(variance) # combine to dataframe eig.var.cum <- data.frame(eig=eig,variance=variance,cumvariance=cumvar) head(eig.var.cum) # barplot of principal component eigenvalues barplot(eig.var.cum[1:6,1],names.arg = c("PC1","PC2","PC3","PC4","PC5","PC6")) # barplot of principal component variances barplot(eig.var.cum[1:6,2],names.arg = c("PC1","PC2","PC3","PC4","PC5","PC6")) # This time use "princomp" # Pretty much same results # uses different computation methods # in reality not much difference with prcomp princomp(pcadata[,2:19], cor = TRUE) summary(princomp(pcadata[,2:19], cor = TRUE))