Let's start by loading the needed libraries and getting the data loaded into R.
library(RCurl)
library(reshape2)
library(plyr)
library(ggplot2)
pokemon <- read.csv(text = getURL("https://raw.github.com/veekun/pokedex/master/pokedex/data/csv/pokemon_species.csv", ssl.verifypeer=FALSE))
pokemon_stats <- read.csv(text = getURL("https://raw.github.com/veekun/pokedex/master/pokedex/data/csv/pokemon_stats.csv", ssl.verifypeer=FALSE))
stat_names <- read.csv(text = getURL("https://raw.github.com/veekun/pokedex/master/pokedex/data/csv/stat_names.csv", ssl.verifypeer=FALSE))
We don't care about the unevolved forms, so let's throw them away.
pokemon <- ddply(pokemon, .(evolution_chain_id), subset, evolves_from_species_id == max(evolves_from_species_id, na.rm=T))
Now, the pokemon attributes are in another table, so let's clean it up and merge everything together into a nice data frame listing our pokemon by name and their various attributes as columns.
stat_names <- stat_names[stat_names$local_language_id == 9, c("stat_id", "name")]
pokemon_stats <- merge(pokemon_stats, stat_names)
pokemon_stats <- pokemon_stats[, c("pokemon_id", "base_stat", "name")]
colnames(pokemon_stats) <- c("pokemon_id", "value", "name")
pokemon_stats <- dcast(pokemon_stats, pokemon_id ~ name, mean )
colnames(pokemon_stats)[[1]] <- "id"
pokemon <- merge(pokemon, pokemon_stats)
pokemon <- pokemon[, c("identifier", "Attack", "Defense", "HP", "Special Attack", "Special Defense", "Speed")]
> head(pokemon)
identifier Attack Defense HP Special Attack Special Defense Speed
1 venusaur 82 83 80 100 100 80
2 charizard 84 78 78 109 85 100
3 blastoise 83 100 79 85 105 78
4 butterfree 45 50 60 80 80 70
5 beedrill 80 40 65 45 80 75
6 pidgeot 80 75 83 70 70 91
Previous experience using hclust has told me I should probably scale these values to give them equal weight. Then we can create the cluster and create a quick plot.
pokemon.scaled <- scale(pokemon[, -1])
hc <- hclust(dist(pokemon.scaled))
plot(hc, labels=pokemon$identifier, hang=-1)
Cool chart huh? Pretty hard to read. All I'm really interested in is choosing how many groups I want to have. I'm going to choose 6 as that seems to be a nice place to break it up. Based on what I find, I might want to try a different grouping, but I'll start with 6 and see if some characteristics appear within those groups.
pokemon$groups <- cutree(hc, k=6)
df <- melt(ddply(pokemon[, 2:8], .(groups), numcolwise(mean)), id=c("groups"))
ggplot(df, aes(variable, value, fill=variable)) + geom_bar(stat="identity") + facet_grid(groups ~ .) + coord_flip() + theme(legend.position="none")
Now we have something. Immediately we can see that group 5 consists of the high HP pokemon, so we'll call them "Meat Shields". Group 6 has high HP and high Attack, we'll call these our "Bulky Attackers". Likewise, Group 5 has high Defense and high Attack, thus the label "Hard Knocks". Group 3 seems to be distinguished by their lack of distinguishing attributes. I'll call them the "Bench". I would guess this group has some tricks in their moveset to make them more viable. Group 1 and 2 are the hardest to distinguish. I should probably break them into other groups to narrow down their similarities a bit. However, we can see at least Group 2 has the highest Speed of all groups so we'll call them the "Speed Demons". Leaving Group 1 with the label, "Middle Road" as all their attributes seem to be in that middle ground. With these labels in place, let's add them and then see which pokemon are in which groups.
> pokemon$class <- factor(pokemon$groups, labels=c("Middle Road", "Speed Demons", "Bench", "Hard Knocks", "Meat Shield", "Bulky Attackers"))
pokemon$identifier <- as.character(pokemon$identifier)
split(pokemon$identifier, pokemon$class)
> > $`Middle Road`
[1] "venusaur" "blastoise" "pidgeot" "nidoqueen" "nidoking"
[6] "vileplume" "golduck" "poliwrath" "tentacruel" "slowbro"
[11] "dewgong" "hypno" "weezing" "seaking" "mr-mime"
[16] "vaporeon" "omastar" "meganium" "feraligatr" "noctowl"
[21] "ledian" "lanturn" "ampharos" "bellossom" "politoed"
[26] "sunflora" "quagsire" "umbreon" "slowking" "magcargo"
[31] "mantine" "kingdra" "swampert" "ludicolo" "pelipper"
[36] "gardevoir" "exploud" "swalot" "grumpig" "altaria"
[41] "whiscash" "claydol" "cradily" "milotic" "glalie"
[46] "walrein" "huntail" "gorebyss" "torterra" "empoleon"
[51] "bastiodon" "wormadam" "vespiquen" "gastrodon" "skuntank"
[56] "bronzong" "abomasnow" "magnezone" "lickilicky" "togekiss"
[61] "glaceon" "probopass" "dusknoir" "musharna" "seismitoad"
[66] "scrafty" "cofagrigus" "garbodor" "gothitelle" "reuniclus"
[71] "vanilluxe" "escavalier" "amoonguss" "jellicent" "ferrothorn"
[76] "beheeyem" "chandelure" "mandibuzz" "volcarona"
$`Speed Demons`
[1] "charizard" "butterfree" "beedrill" "raticate" "fearow"
[6] "arbok" "ninetales" "venomoth" "dugtrio" "persian"
[11] "primeape" "arcanine" "alakazam" "victreebel" "rapidash"
[16] "dodrio" "gengar" "electrode" "exeggutor" "hitmonlee"
[21] "hitmonchan" "starmie" "jynx" "electabuzz" "magmar"
[26] "jolteon" "flareon" "typhlosion" "furret" "crobat"
[31] "xatu" "jumpluff" "espeon" "octillery" "houndoom"
[36] "hitmontop" "sceptile" "blaziken" "mightyena" "linoone"
[41] "dustox" "shiftry" "swellow" "masquerain" "breloom"
[46] "ninjask" "delcatty" "medicham" "manectric" "roselia"
[51] "sharpedo" "camerupt" "flygon" "cacturne" "crawdaunt"
[56] "banette" "chimecho" "salamence" "infernape" "bibarel"
[61] "kricketune" "luxray" "mothim" "floatzel" "cherrim"
[66] "ambipom" "lopunny" "mismagius" "honchkrow" "purugly"
[71] "lucario" "toxicroak" "lumineon" "weavile" "yanmega"
[76] "porygon-z" "gallade" "froslass" "serperior" "emboar"
[81] "samurott" "watchog" "liepard" "simisage" "simisear"
[86] "simipour" "unfezant" "zebstrika" "swoobat" "leavanny"
[91] "scolipede" "whimsicott" "lilligant" "archeops" "zoroark"
[96] "cinccino" "swanna" "sawsbuck" "galvantula" "eelektross"
[101] "accelgor" "mienshao" "hydreigon"
$Bench
[1] "pikachu" "clefairy" "jigglypuff" "marill" "shedinja"
$`Hard Knocks`
[1] "sandslash" "parasect" "machamp" "golem" "muk"
[6] "cloyster" "kingler" "marowak" "gyarados" "kabutops"
[11] "dragonite" "ariados" "sudowoodo" "forretress" "steelix"
[16] "granbull" "scizor" "ursaring" "donphan" "tyranitar"
[21] "aggron" "armaldo" "metagross" "garchomp" "hippowdon"
[26] "drapion" "rhyperior" "tangrowth" "leafeon" "gliscor"
[31] "mamoswine" "stoutland" "gigalith" "conkeldurr" "krookodile"
[36] "crustle" "carracosta" "klinklang" "haxorus" "beartic"
[41] "golurk" "bisharp" "braviary"
$`Meat Shield`
[1] "chansey" "wobbuffet"
$`Bulky Attackers`
[1] "snorlax" "slaking" "hariyama" "wailord" "staraptor"
[6] "rampardos" "drifblim" "excadrill" "darmanitan"
Unfortunately, I'm not really an expert enough trainer to know whether these classifications are accurate. Maybe a more experienced player can give us some feedback on how well this worked. At least a few of the pokemon I'm more familiar with are in the groups I would expect them to be in. To follow up, I think I'll try to break the groups up a bit more. See if I can get some distinction in the larger groups. Might be interesting to see which types fit into these classifications most frequently.
Link to complete script for anyone interested.
No comments:
Post a Comment