Step3: Generate association arules
rules <- apriori(df,parameter=list(support=0.01,# support indicate how frequently iterms in LHS and RHS occur together
confidence=0.5)) # confidence indicate how often the rule has found to be true
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 5 0.01 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 1
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[21 item(s), 154 transaction(s)] done [0.00s].
## sorting and recoding items ... [21 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.00s].
## writing ... [346 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
# check the rules
summary(rules)
## set of 346 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3 4 5 6
## 26 150 124 40 6
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 3.000 3.000 3.566 4.000 6.000
##
## summary of quality measures:
## support confidence lift count
## Min. :0.01299 Min. :0.5000 Min. : 1.100 Min. : 2.00
## 1st Qu.:0.01299 1st Qu.:0.6667 1st Qu.: 2.200 1st Qu.: 2.00
## Median :0.01299 Median :1.0000 Median : 5.704 Median : 2.00
## Mean :0.02104 Mean :0.8667 Mean : 7.753 Mean : 3.24
## 3rd Qu.:0.01948 3rd Qu.:1.0000 3rd Qu.:12.833 3rd Qu.: 3.00
## Max. :0.22078 Max. :1.0000 Max. :19.250 Max. :34.00
##
## mining info:
## data ntransactions support confidence
## df 154 0.01 0.5
# visualize rules
plot(rules)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

# pick the rules whose lift is higher than 18
betterrules <- rules[quality(rules)$lift > 18] # "lift" serves as ameasure of interestingness of "support" and "confidence"
# check better rules
summary(betterrules)
## set of 56 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3 4 5 6
## 2 16 24 12 2
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 3.000 4.000 3.929 4.250 6.000
##
## summary of quality measures:
## support confidence lift count
## Min. :0.01299 Min. :1 Min. :19.25 Min. :2.000
## 1st Qu.:0.01299 1st Qu.:1 1st Qu.:19.25 1st Qu.:2.000
## Median :0.01299 Median :1 Median :19.25 Median :2.000
## Mean :0.01693 Mean :1 Mean :19.25 Mean :2.607
## 3rd Qu.:0.01948 3rd Qu.:1 3rd Qu.:19.25 3rd Qu.:3.000
## Max. :0.05195 Max. :1 Max. :19.25 Max. :8.000
##
## mining info:
## data ntransactions support confidence
## df 154 0.01 0.5
# check the good rules one by one use function inspect
inspect(betterrules)
## lhs rhs support confidence
## [1] {series} => {time} 0.05194805 1
## [2] {time} => {series} 0.05194805 1
## [3] {computing,package} => {parallel} 0.01298701 1
## [4] {computing,users} => {parallel} 0.01298701 1
## [5] {code,series} => {time} 0.01298701 1
## [6] {code,time} => {series} 0.01298701 1
## [7] {series,slides} => {time} 0.01298701 1
## [8] {slides,time} => {series} 0.01298701 1
## [9] {examples,series} => {time} 0.01298701 1
## [10] {examples,time} => {series} 0.01298701 1
## [11] {series,users} => {time} 0.01298701 1
## [12] {time,users} => {series} 0.01298701 1
## [13] {analysis,series} => {time} 0.02597403 1
## [14] {analysis,time} => {series} 0.02597403 1
## [15] {mining,series} => {time} 0.01948052 1
## [16] {mining,time} => {series} 0.01948052 1
## [17] {r,series} => {time} 0.03246753 1
## [18] {r,time} => {series} 0.03246753 1
## [19] {computing,package,r} => {parallel} 0.01298701 1
## [20] {computing,r,users} => {parallel} 0.01298701 1
## [21] {code,examples,series} => {time} 0.01298701 1
## [22] {code,examples,time} => {series} 0.01298701 1
## [23] {code,r,series} => {time} 0.01298701 1
## [24] {code,r,time} => {series} 0.01298701 1
## [25] {examples,r,series} => {time} 0.01298701 1
## [26] {examples,r,time} => {series} 0.01298701 1
## [27] {analysis,series,users} => {time} 0.01298701 1
## [28] {analysis,time,users} => {series} 0.01298701 1
## [29] {mining,series,users} => {time} 0.01298701 1
## [30] {mining,time,users} => {series} 0.01298701 1
## [31] {r,series,users} => {time} 0.01298701 1
## [32] {r,time,users} => {series} 0.01298701 1
## [33] {analysis,mining,series} => {time} 0.01948052 1
## [34] {analysis,mining,time} => {series} 0.01948052 1
## [35] {analysis,r,series} => {time} 0.01948052 1
## [36] {analysis,r,time} => {series} 0.01948052 1
## [37] {mining,r,series} => {time} 0.01948052 1
## [38] {mining,r,time} => {series} 0.01948052 1
## [39] {analysis,mining,users} => {series} 0.01298701 1
## [40] {analysis,mining,r} => {series} 0.01948052 1
## [41] {analysis,mining,users} => {time} 0.01298701 1
## [42] {analysis,mining,r} => {time} 0.01948052 1
## [43] {code,examples,r,series} => {time} 0.01298701 1
## [44] {code,examples,r,time} => {series} 0.01298701 1
## [45] {analysis,mining,series,users} => {time} 0.01298701 1
## [46] {analysis,mining,time,users} => {series} 0.01298701 1
## [47] {analysis,r,series,users} => {time} 0.01298701 1
## [48] {analysis,r,time,users} => {series} 0.01298701 1
## [49] {mining,r,series,users} => {time} 0.01298701 1
## [50] {mining,r,time,users} => {series} 0.01298701 1
## [51] {analysis,mining,r,series} => {time} 0.01948052 1
## [52] {analysis,mining,r,time} => {series} 0.01948052 1
## [53] {analysis,mining,r,users} => {series} 0.01298701 1
## [54] {analysis,mining,r,users} => {time} 0.01298701 1
## [55] {analysis,mining,r,series,users} => {time} 0.01298701 1
## [56] {analysis,mining,r,time,users} => {series} 0.01298701 1
## lift count
## [1] 19.25 8
## [2] 19.25 8
## [3] 19.25 2
## [4] 19.25 2
## [5] 19.25 2
## [6] 19.25 2
## [7] 19.25 2
## [8] 19.25 2
## [9] 19.25 2
## [10] 19.25 2
## [11] 19.25 2
## [12] 19.25 2
## [13] 19.25 4
## [14] 19.25 4
## [15] 19.25 3
## [16] 19.25 3
## [17] 19.25 5
## [18] 19.25 5
## [19] 19.25 2
## [20] 19.25 2
## [21] 19.25 2
## [22] 19.25 2
## [23] 19.25 2
## [24] 19.25 2
## [25] 19.25 2
## [26] 19.25 2
## [27] 19.25 2
## [28] 19.25 2
## [29] 19.25 2
## [30] 19.25 2
## [31] 19.25 2
## [32] 19.25 2
## [33] 19.25 3
## [34] 19.25 3
## [35] 19.25 3
## [36] 19.25 3
## [37] 19.25 3
## [38] 19.25 3
## [39] 19.25 2
## [40] 19.25 3
## [41] 19.25 2
## [42] 19.25 3
## [43] 19.25 2
## [44] 19.25 2
## [45] 19.25 2
## [46] 19.25 2
## [47] 19.25 2
## [48] 19.25 2
## [49] 19.25 2
## [50] 19.25 2
## [51] 19.25 3
## [52] 19.25 3
## [53] 19.25 2
## [54] 19.25 2
## [55] 19.25 2
## [56] 19.25 2
# generate another rulset, setting the RHS to be exactly "analysis"
rules2 <- apriori(df,parameter=list(support=0.01,confidence=0.5),
# support indicate how frequently iterms in LHS and RHS occur together
# confidence indicate how often the rule has found to be true
appearance = list(default="lhs",rhs=c("analysis"))) # set RHS to be "analysis"
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 5 0.01 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 1
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[21 item(s), 154 transaction(s)] done [0.00s].
## sorting and recoding items ... [21 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.00s].
## writing ... [45 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
# check ruleset
summary(rules2)
## set of 45 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3 4 5 6
## 4 19 15 6 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 3.000 3.000 3.578 4.000 6.000
##
## summary of quality measures:
## support confidence lift count
## Min. :0.01299 Min. :0.5000 Min. :3.348 Min. : 2.000
## 1st Qu.:0.01299 1st Qu.:0.6667 1st Qu.:4.464 1st Qu.: 2.000
## Median :0.01299 Median :1.0000 Median :6.696 Median : 2.000
## Mean :0.01919 Mean :0.8520 Mean :5.705 Mean : 2.956
## 3rd Qu.:0.01948 3rd Qu.:1.0000 3rd Qu.:6.696 3rd Qu.: 3.000
## Max. :0.07792 Max. :1.0000 Max. :6.696 Max. :12.000
##
## mining info:
## data ntransactions support confidence
## df 154 0.01 0.5
# visualize rules2
plot(rules2)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
