# Principal Component Analysis
# Use "Samsung" data 

#Please indicate the frequency of application you use for a tablet, 0-10 scale.
#V1 is respondent number
#V2. Books (novels, eBook, the Bible, cartoon etc.)         
#V3. Business or Personal Information management (scheduler, business card manager, sales manager, time manager, planner etc.)              
#V4. Education (language, vocabulary, tests etc.)             
#V5. Entertainment (games, broadcasts, humor etc.)         
#V6. Finance (banking, money manager, stocks etc.)        
#V7. Medical & Healthcare (hospital searches, infant care, general medical information etc.)        
#V8. Fitness (calorie manager, diet, stress checker etc.)              
#V9. Lifestyle (fashion, religion, cooking, pets, interior design etc.)          
#V10. Music (learning an instrument, song maker, radio, etc.)        
#V11. Navigation (traffic information, maps, transportation info, etc.)         
#V12. News (newspaper etc.)         
#V13. Photography (camera, photo manager, etc.)         
#V14. Reference (dictionary, lexicon, fact book, etc.)        
#V15. Social networking (messenger, Twitter, Skype, etc.)          
#V16. Sports (live sports, learning a sport, etc.)             
#V17. Travel (tour information, travel guides, etc.)         
#V18. Utilities (flashlight, calendar, clock, calculator, etc.)         
#V19. Weather (local / country weather information, etc.)   

pcadata <- read.table(file="E:/pcadata.txt",sep="\t",colClasses = c("NULL",rep(NA,18)))

# Use prcomp function 
results <- prcomp(pcadata,scale=TRUE)
results

# Get eigenvalues of principal components
# Adopt the one with eigenvalue greater than 1
eig <- (results$sdev)^2

# Get variances of principal components
variance <- eig*100/sum(eig)

# Cumulative variances
cumvar <- cumsum(variance)

# combine to dataframe
eig.var.cum <- data.frame(eig=eig,variance=variance,cumvariance=cumvar)
head(eig.var.cum)

# barplot of principal component eigenvalues
barplot(eig.var.cum[1:6,1],names.arg = c("PC1","PC2","PC3","PC4","PC5","PC6"))

# barplot of principal component variances
barplot(eig.var.cum[1:6,2],names.arg = c("PC1","PC2","PC3","PC4","PC5","PC6"))

# This time use "princomp"
# Pretty much same results
# uses different computation methods
# in reality not much difference with prcomp
princomp(pcadata[,2:19], cor = TRUE)
summary(princomp(pcadata[,2:19], cor = TRUE))