### Code for running a discriminant function analysis

### uses the MASS package - preinstalled with R
library(MASS)



###import data
datum=read.csv(file.choose())
head(datum)

###Plot the data using a scatter plot matrix (in package lattice)
library(lattice)
splom(~data.frame(Petals,Sepal,Leaf),data=datum,groups=Taxon)

### run a multivariate analysis of variance (MANOVA)
datum$Taxon=factor(datum$Taxon) ###Taxon is a number, be sure to tell R that it's a group variable
results=manova(cbind(Petals,Sepal,Leaf)~Taxon,data=datum) ### Run a MANOVA on differences between taxon for the
### three dependent variables
summary(results) ### There are some differences among taxa within the three measures
summary.aov(results) ### Examine which measures are different among taxa

### function for DFA is 'lda'
help(lda)
### notice argument 'prior' - very important if making prediction. 

### run the DFA
results=lda(Taxon~Petals+Sepals+Leaf,data=datum)
### no summary on lda, just call 'results'
results

### plot the groups against discriminants
plot(results) ### Notice that don't really need second DFA



### use 'predict' to see how well a dfa performs or the use the DFA
### to classify a new sample
help(predict.lda)
predict(results)
### classifies each sample based on DFA
### Gives probability of that classification
### Gives DFA 'scores' - value of LD1 and LD2

table(datum$Taxon,predict(results)$class) 
### summary table of how accurate prediction was. 


###Cross validation

datum2=read.csv(file.choose()) ### import new test sample
datum2$Taxon=factor(datum2$Taxon)

predict(results,datum2) ### test DFA with new sample
table(datum2$Taxon,predict(results,datum2)$class)

### results will rarely be as good with test data-set, but if close to performance of DFA
### on original data-set, then captures classification in general.

### what if you sampled in an area with no plants from taxon 1, but had low ld1 score?
### DFA would predict sample was taxon 1 with high probability
### Need to change prior probability of belonging to each class
### easy way to account for prior probability is sample randomly - then each class
### will be represented in data according to how often it occurs