<!DOCTYPE html>
<html lang="en">

<head>
  <meta charset="utf-8">
  <title>Yelp Chart</title>
  <script src="http://d3js.org/d3.v3.js"></script>
  <link rel="stylesheet" href="style.css">
</head>

<body>
    <h1>D3 is really cool!</h1>
    <h2>Dataset</h2>
    <h4 id="data">I choose to use the datasets from Yelp Data Challenge, which invlove reviews data from 40000+ businesses across the country.
      <br>
      In the following charts, the total review numbers of different cities are demonstrated. 
    </h4>
     <h2>
		 Yelp Bubble Chart 
	</h2>
    <div id="years">
      Click:
      <button type="button" id="yelp2006">2006</button>
      <button type="button" id="yelp2007">2007</button>
      <button type="button" id="yelp2008">2008</button>
      <button type="button" id="yelp2009">2009</button>
      <button type="button" id="yelp2010">2010</button>
      <button type="button" id="yelp2011">2011</button>
      <button type="button" id="yelp2012">2012</button>
      <button type="button" id="yelp2013">2013</button>
      <button type="button" id="yelp2014">2014</button>
    </div>
    <h4 id="interaction">Three interaction effects have been added to this bubbule chart:
      <br>
      mouseover, mouseout and click.
      <br>
      Please feel free to check them out :)
    </h4>
    
    <p id="BubbleChart">
      <script src="bubble.js"></script>
    </p>
    <h4></h4>
    <h2>
Yelp Chart
</h2>
    <p id="menu">
      Select:
      <select>
        <option>count2006</option>
        <option>count2007</option>
        <option>count2008</option>
        <option>count2009</option>
        <option>count2010</option>
        <option>count2011</option>
        <option>count2012</option>
        <option>count2013</option>
        <option>count2014</option>
      </select>
      <h4 id="select">
        Select an option, the top ten cities with largest review numbers in the corresponding year will be showed in front of you :)
      </h4>
      <p id="chart">
        <script src="chart.js"></script>
      </p>

</body>

</html>
// Set the Attributes of the Graph
var diameter = 560,
  format = d3.format(",d"),
  color1 = d3.scale.category20c();

var pack = d3.layout.pack()
  .size([diameter, diameter])
  .padding(1.5);

//Create SVG element
var svg1 = d3.select("#BubbleChart").append("svg")
  .attr("width", diameter)
  .attr("height", diameter)
  .attr("class", "bubble");


changebubble(8);


/*function classes(root) {
    var classes = [];

    function recurse(name, node) {
        if (node.children) node.children.forEach(function (child) {
            recurse(node.name, child);
        });
        else classes.push({
            packageName: name,
            className: node.name,
            value: node.size
        });
    }

    recurse(null, root);
    return {
        children: classes
    };
}*/

function changebubble(i) {
  d3.csv("count_s.csv", function(csvData) {
    var years = [2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014];
    pack.value(function(d) {
      return +d["count" + years[i]];
    });

    var data = {
      name: "city",
      children: csvData
    };

    var node = svg1.selectAll("g.node")
      .data(pack.nodes(data), function(d) {
        return d.city;
      });

    var nodeEnter = node.enter().append("g")
      .attr("class", "node")
      .attr("transform", function(d) {
        return "translate(" + d.x + "," + d.y + ")";
      });

    //Add the Circles
    var circles = nodeEnter.append("circle")
      .attr("r", function(d) {
        return d.r;
      })
      .style("fill", function(d) {
        return color1(d.city);
      });

    nodeEnter.append("title")
      .text(function(d) {
        return d.city + " : " + format(d.value);
      });

    //Add the Texts
    nodeEnter.append("text")
      .attr("dy", ".3em")
      .style("text-anchor", "middle")
      .text(function(d) {
        return d.city
      });

    nodeEnter.append("text")
      .attr("class", "value")
      .attr("dy", "1.3em")
      .style("text-anchor", "middle")
      .text(function(d) {
        return d.value
      });


    node.select("circle")
      .transition().duration(1000)
      .attr("r", function(d) {
        return d.r;
      })
      .style("fill", function(d) {
        return color1(d.city);
      });

    node.transition().duration(1000).attr("class", "node")
      .attr("transform", function(d) {
        return "translate(" + d.x + "," + d.y + ")";
      });

    node.select(".value")
      .text(function(d) {
        return format(d.value);
      });

    node.select("title")
      .text(function(d) {
        return d.city + " : " + format(d.value);
      });

    node.on('mouseover', highlightThisOne);
    node.on('mouseout', restoreAllColors);

    function highlightThisOne(d) {
      console.log(d);
      d3.selectAll("g.node").transition().duration(500)
        .attr("fill", function(d) {
          return color1(d.city);
        });
      d3.select(this).transition().duration(500)
        .attr('fill', "orange");
    }

    function restoreAllColors(d) {
      d3.selectAll("g.node").transition().duration(500)
        .attr("font-weight", "bold")
        .attr("fill", "black");
    }

    node.exit().remove();


  });
}

function updateBubble1() {
  changebubble(0);
}

function updateBubble2() {
  changebubble(1);
}

function updateBubble3() {
  changebubble(2);
}

function updateBubble4() {
  changebubble(3);
}

function updateBubble5() {
  changebubble(4);
}

function updateBubble6() {
  changebubble(5);
}

function updateBubble7() {
  changebubble(6);
}

function updateBubble8() {
  changebubble(7);
}

function updateBubble9() {
  changebubble(8);
}

d3.select("#yelp2006").on("click", updateBubble1);
d3.select("#yelp2007").on("click", updateBubble2);
d3.select("#yelp2008").on("click", updateBubble3);
d3.select("#yelp2009").on("click", updateBubble4);
d3.select("#yelp2010").on("click", updateBubble5);
d3.select("#yelp2011").on("click", updateBubble6);
d3.select("#yelp2012").on("click", updateBubble7);
d3.select("#yelp2013").on("click", updateBubble8);
d3.select("#yelp2014").on("click", updateBubble9);
h1{
  color: rgb(253,141,60);
}

text {
  font: 10px sans-serif;
  font-weight: bold;
}
#data{
  color: rgb(49,163,84);
}
#interaction{
  color: rgb(49, 130, 189);
}

#years {font-size: 17px;
  font-weight: bold;
}

#menu {font-size: 17px;
  font-weight: bold;
}

#select{
  color: #AC63CC;
}
### Describe the graph(s) you want to make. When pseudocode helps for succinctness and clarity, use it. You may also include or link to a bitmap prototype generated in R, or even svg generated by gridsvg.

I choose to use the datasets from Yelp Data Challenge, which invlove reviews data from 40000+ businesses across the country.
I intend to summarize the review numbers of each city in different years between 2006-2014, which will help us know how Yelp have developed over years in those cities.

There are two datasets here. 
The first one "count.csv" includes all the count numbers of the cities in the original dataset.
the second one "count_s.csv" only includes the count numbers of those cities with over 500 reviews in 2014.

The first bubble chart uses count_s.csv dataset and will show the review numbers of each year when clicking on the corresponding button.
The second chart uses count.csv dataset and will show the top ten cities with most reviews in each year when selecting the corresponding option.

Please check out the R Markdown Document for more information on tidying data.

### Describe your data shape (a typical element that will be bound to some element’s __data__ property).
As for the following questions, I will use bubble chart as an example to answer the questions.

When plotting the bubble chart, the pack layout is expecting a hierarchical data structure.  
So I have to prepare the flat CSV data accordingly.
svg is bound to parent data, which is city; and circles are bound to children data: cities and the review counts of them.

#####Parent
'__data__': Object
children: Array[15]
depth: 0
name: "city"

#####Children
For example: 
'__data__': Object
city: "Las Vegas"
count2006: "981"
count2007: "4495"
count2008: "9950"
count2009: "17133"
count2010: "34112"
count2011: "53255"
count2012: "60297"
count2013: "79493"
count2014: "60352"
depth: 1
parent: Object


### Create data, as json or csv, that meets the required data contract.

Please check out the R Markdown Document.

### Load your data in js using the convenience d3.csv(file, error, data) or d3.json(file, error, data) methods and use the data callback to indicate success loading the data, either in the console or on the page.

The code is as follows:

   d3.csv("count_s.csv", function(error, csvData) {
       console.log(csvData);
        });
        

I also include a Console Pic (submitted to Github on 11/20) to show the result, which indicates success of loading the data.

### Write an ordered list of comments in js describing the steps you will take to map your data to svg elements. Commit this.

Here is the ordered list of comments:

//Set the CSS 
//Load the d3.js Library
//Set the Attributes of the Graph
//Create SVG Element
//Get the Data
//Plot the nodes
//Add the Circles
//Add the Titles


What shows above is the code I used to made the initial graph.

### Write code to perform each of the steps you described in comments. Your graph should render on the page. Commit this.

Please see the file bubble.js.
The plunk has been updated with interaction effects.

### Iterate: add comments, then code, to enrich your graph. Add aesthetics, tweak sizes or color scales, add components like grid lines, axes, or annotations.
Because I plot a bubble chart, it is not appropriate to add grid lines or axes. However I add texts on each circle to indicate city name and review numbers.
Please check out the file bubble.js for the corresponding code.

### Interact: add comments, then code, using the event listeners for events such as mouseover, mouseout, click, mousedown, mouseup, and mousemove. Write empty callback functions to be executed when these events are raised. Commit this.
I add three interaction effects to this bubble chart:
1. mouseover: when moused over, the texts of the circle will turn orange and other texts will fade out.
2. mouseout: when moused out, all the texts will restore to the original black color.
3. click: when the button is clicked, the bubble chart will transit to demonstrate the corresponding data.

### Fill in one event callback.
Please check out the file bubble.js.

### Fill in any other event callbacks you described in comments.
Please check out the file bubble.js.

There are still a few problems in the bubble chart that need to be addressed.
1. The main issue here is that there is always a large circle which indicates the total review numbers of all the cities around the small circles.
I'd like to get rid of it and will work on it in the following weeks.
2. The data can't reflect the real situation of the year 2014 because it is still in the middle of 2014 when collecting the data. I need to update it whenever new data is released.

### Add a component to choose subsets of your data
Please take a look at the file chart.js. When plotting the second Yelp Chart, I only use the data of the top ten cities with largest review numbers using a sort function.
"city","count2006","count2007","count2008","count2009","count2010","count2011","count2012","count2013","count2014"
"Avondale",1,7,64,96,220,329,466,668,554
"Chandler",47,217,924,1383,2419,3819,4324,5601,4478
"Edinburgh",0,2,112,300,2623,1422,1926,2570,1221
"Gilbert",13,85,262,732,1253,1937,2320,3665,3231
"Glendale",8,84,407,793,1371,2068,2095,3268,2526
"Goodyear",2,12,98,105,259,435,718,1003,711
"Henderson",20,245,410,801,2078,4062,4820,6362,4702
"Las Vegas",981,4495,9950,17133,34112,53255,60297,79493,60352
"Madison",263,578,908,1657,2363,3083,3836,5417,3723
"Mesa",18,146,499,1174,2024,3249,3739,5176,4206
"Peoria",5,29,130,308,566,975,1086,1680,1313
"Phoenix",304,1556,5442,9423,14047,19398,20590,29523,21967
"Scottsdale",282,1003,2749,4532,7617,11041,11848,16527,11544
"Surprise",0,16,82,181,295,496,892,1019,757
"Tempe",114,697,1765,2662,4407,6297,5973,8113,5799
var margin = {top: 20, right: 40, bottom: 10, left: 40},
        width = 200,
        height = 500 - margin.top - margin.bottom;

    var format = d3.format(".1f"), topTen, counts, Selection;

    var x = d3.scale.linear()
        .range([0,30]);

    var y = d3.scale.ordinal()
        .rangeRoundBands([0, height], .1);

    var color = d3.scale.ordinal().range(["#AC63CC","#7FCD50","#C95A3B","#59673E","#A4ACC9","#C85683","#CEB150","#8CCCA9","#504A76","#5F3333"]);

    var svg2 = d3.select("#chart").append("svg")
          .attr("width", width + margin.left + margin.right)
          .attr("height", height + margin.top + margin.bottom)
          .style("margin-left", -margin.left + "px")

    var menu = d3.select("#menu select")
        .on("change", change);

    d3.csv("count.csv", function(data) {
      Allcount = data;

      // Make array of the counts variables
      counts = d3.keys(Allcount[0]).filter(function(key) {
        return key != "city";
      });

      // Make variables numeric
      Allcount.forEach(function(elem) {
        counts.forEach(function(column) {
          elem[column] = +elem[column];
        });
      });

      // Load the counts type into drop-down
      menu.selectAll("option")
          .data(counts)
        .enter().append("option")
          .text(function(d) { return d; });

      // Set the intial value of drop-down when page loads
      menu.property("value", "count2014");

      // Call change function
      change();
    });

    function change() {
      Selection = menu.property("value"),
      topTen = Allcount.sort(function(a, b) { return b[Selection] - a[Selection]; }).slice(0, 10);
      render(topTen);
    }

    function render(data) {

      x.domain([0, data[0][Selection]]);
      y.domain(data.map(function(d) { return d.city; }));

      // Enter Selection
      svg2.selectAll(".circleGroup")
          .data(data, function(d) { return d.city; })
          .enter()
            .append("g")
              .attr("class", "circleGroup")
              .attr("transform", function(d) { return "translate(0," + (y(d.city) + height) + ")"; })
              .style("fill-opacity", .5)
              .each(function (d, i) {
                d3.select(this)
                  .append("circle")
                    .style("fill", function(d) { return color(d.city) })
                    .attr("cx", width / 2)
                    .attr("cy", y.rangeBand())
                    .attr("r", function(d) { return Math.log(d[Selection]); })

                d3.select(this)
                  .append("text")
                    .attr("class", "label")
                    .attr("x", (width / 2) + 60)
                    .attr("y", y.rangeBand() - 10)
                    .attr("dy", ".35em")
                    .attr("text-anchor", "start")
                    .text(function(d) { return d.city; })

                d3.select(this)
                  .append("text")
                    .attr("class", "value")
                    .attr("x", (width / 2) + 60)
                    .attr("y", y.rangeBand() + 10)
                    .attr("dy", ".35em")
                    .attr("text-anchor", "start");
                });

      // Update Selection
      svg2.selectAll(".circleGroup")
          .data(data, function(d) { return d.city; })
            .transition()
            .duration(2000)
            .attr("transform", function(d) { return "translate(0," + (d.y0 = y(d.city)) + ")"; })
            .style("fill-opacity", 1)
            .each(function (d, i) {
                d3.select(this)
                  .select("circle")
                  .attr("r", function(d) { return Math.log(d[Selection]); })

                d3.select(this)
                  .select(".value")
                  .text(function(d) { return format(d[Selection]); })
            });

      // Exit Selection
      svg2.selectAll(".circleGroup")
          .data(data, function(d) { return d.city; })
          .exit()
            .transition()
            .duration(2000)
            .attr("transform", function(d) { return "translate(0," + (d.y0 + height) + ")"; })
            .style("fill-opacity", 0)
            .remove();
    }
"city","count2006","count2007","count2008","count2009","count2010","count2011","count2012","count2013","count2014"
"Ahwatukee",0,3,7,8,25,26,45,62,37
"Anthem",0,4,12,31,42,82,112,168,105
"Apache Junction",1,0,8,18,50,56,121,169,164
"Atlanta",0,0,3,0,3,3,3,1,3
"Avondale",1,7,64,96,220,329,466,668,554
"Black Canyon City",0,0,4,7,18,39,38,63,32
"Boulder City",0,3,0,2,8,11,13,24,27
"Buckeye",0,6,12,11,29,48,93,149,127
"Cambridge",0,0,0,0,0,0,1,4,6
"Carefree",1,2,16,27,47,64,77,99,55
"Casa Grande",0,1,8,50,87,138,150,208,161
"Cave Creek",4,21,61,154,234,431,469,641,406
"Central City Village",0,0,0,0,0,0,0,0,8
"Chandler",47,217,924,1383,2419,3819,4324,5601,4478
"City of Edinburgh",0,0,0,0,0,2,0,2,1
"Clark County",0,0,0,0,0,0,0,2,1
"Coolidge",0,0,1,1,8,2,13,10,11
"Cottage Grove",0,0,1,3,2,7,5,18,13
"Dalkeith",0,0,0,0,0,0,1,1,0
"Dane",0,0,0,0,1,0,0,1,1
"De Forest",0,1,3,2,10,13,17,19,12
"Edinburgh",0,2,112,300,2623,1422,1926,2570,1221
"El Mirage",0,0,5,8,12,26,35,45,33
"Enterprise",0,0,0,2,0,0,3,14,16
"Fitchburg",3,5,20,41,64,107,143,202,145
"Florence",0,0,0,0,4,11,20,39,32
"Fort McDowell",0,0,1,2,6,7,6,8,9
"Fountain Hills",0,9,13,40,60,138,168,276,281
"Gila Bend",0,1,3,5,8,19,25,38,25
"Gilbert",13,85,262,732,1253,1937,2320,3665,3231
"Glendale",8,84,407,793,1371,2068,2095,3268,2526
"Gold Canyon",0,2,1,3,9,27,46,44,34
"Goodyear",2,12,98,105,259,435,718,1003,711
"Green Valley",2,4,1,6,0,0,0,0,0
"Guadalupe",2,2,6,10,16,13,40,20,14
"Henderson",20,245,410,801,2078,4062,4820,6362,4702
"Higley",0,0,0,0,1,1,3,7,5
"Inverkeithing",0,0,0,0,0,0,1,0,0
"Juniper Green",0,0,0,0,0,3,1,4,2
"Kitchener",0,0,1,22,44,94,100,192,147
"Las Vegas",981,4495,9950,17133,34112,53255,60297,79493,60352
"Lasswade",0,0,0,0,0,0,0,1,1
"Laveen",0,6,8,14,34,39,48,96,57
"Litchfield Park",1,0,21,33,50,105,116,225,171
"Loanhead",0,0,0,0,0,0,0,1,0
"Madison",263,578,908,1657,2363,3083,3836,5417,3723
"Maricopa",0,0,1,19,68,76,123,204,109
"Mc Farland",0,1,6,4,14,21,18,27,13
"Mesa",18,146,499,1174,2024,3249,3739,5176,4206
"Middleton",7,38,76,119,129,222,290,375,224
"Monona",4,8,15,18,25,48,59,88,102
"Morristown",0,0,0,1,1,3,3,12,6
"Musselburgh",0,0,0,0,2,0,2,7,0
"Nellis AFB",0,0,0,1,0,0,5,7,5
"New River",0,0,1,6,5,4,20,15,4
"New Town",0,0,0,0,7,1,0,1,0
"North Scottsdale",2,1,2,5,8,15,20,17,13
"Old Town",0,0,0,0,0,0,0,0,6
"Paradise Valley",0,0,0,3,11,8,1,14,13
"Paradise Valley Valley",12,31,65,80,177,175,193,240,173
"Peoria",5,29,130,308,566,975,1086,1680,1313
"Pheonix",0,0,1,0,4,6,2,5,0
"Phoenix",304,1556,5442,9423,14047,19398,20590,29523,21967
"Queen Creek",0,6,45,104,217,274,354,538,473
"Queensferry",0,0,0,1,2,2,1,3,0
"Ratho",0,0,0,0,0,0,2,3,1
"Rio Verde",0,0,0,0,1,2,2,2,0
"Saint Jacobs",0,0,0,0,1,1,1,1,0
"San Tan Valley",0,0,1,4,18,34,51,87,76
"Scottsdale",282,1003,2749,4532,7617,11041,11848,16527,11544
"Sedona",0,0,1,3,13,20,19,21,4
"South Gyle",0,0,0,0,0,1,0,0,1
"South Queensferry",0,0,0,5,6,2,3,0,3
"Spring Valley",0,0,3,6,23,45,47,54,26
"St Clements",0,0,0,0,0,0,1,0,1
"St Jacobs",0,0,0,0,0,1,2,2,1
"Stockbridge",0,0,0,0,0,0,0,9,2
"Stoughton",0,1,1,0,1,1,3,9,3
"Summerlin",0,1,6,7,24,54,43,52,37
"Sun City",0,3,12,20,45,78,88,162,98
"Sun Lakes",0,2,2,2,1,12,9,6,9
"Sun Prairie",0,2,4,18,54,61,109,205,163
"Surprise",0,16,82,181,295,496,892,1019,757
"Tempe",114,697,1765,2662,4407,6297,5973,8113,5799
"Tolleson",0,1,3,10,18,45,64,75,54
"Tonopah",0,0,0,0,1,3,1,3,3
"Tortilla Flat",0,1,2,4,6,4,6,14,13
"Trempealeau",0,0,0,1,0,1,1,2,2
"Verona",0,6,5,5,22,38,55,90,39
"Waterloo",0,0,8,42,74,148,221,359,269
"Waunakee",0,0,1,6,11,12,8,33,32
"Wickenburg",0,2,3,12,24,59,79,119,82
"Windsor",0,0,0,0,2,4,8,8,2
"Woolwich",0,0,0,0,2,1,0,0,0
"Youngtown",0,0,0,0,0,5,6,21,9
---
title: "Homework 4"
author: "Yan Wang (yw2668)"
date: "November 17, 2014"
output: html_document
published: false
tags: hw4
---

####1. Describe the graph(s) you want to make. When pseudocode helps for succinctness and clarity, use it. You may also include or link to a bitmap prototype generated in R, or even svg generated by gridsvg.

I choose to use the datasets from Yelp Data Challenge, which invlove reviews data from 40000+ businesses across the country.

I intend to summarize the review numbers for each city in different years between 2006-2014, which will help us know how Yelp have developed over years in those cities.

The original dataset is pretty big, therefore I am not going to include it here. I will write the code for tidying it and then invite the tidy dataset.

```{r, eval=FALSE}
#Load data
setwd("/Users/Emily/Documents/Columbia/Data Visualization/Yelp/yelp_dataset_challenge_academic_dataset-2")
library(jsonlite)
json_file<-"yelp_academic_dataset_business.json"

#Business data
business <- fromJSON(sprintf("[%s]", paste(readLines(json_file), collapse=",")))
names(business)
#Choose the variables I want
bussub<-subset(business, select = c(business_id, categories, city, review_count, name, state, stars))
#Choose restaurant observations and dump the rest
rst <- bussub$categories %in% grep("Restaurants", bussub$categories, value = T)
biz_data <- subset(bussub, rst)

#Review data
review_file <- "yelp_academic_dataset_review.json"
review <- fromJSON(sprintf("[%s]", paste(readLines(review_file), collapse=",")))
names(review)
#Choose the variables I want
review_data <- subset(review, select = c(date, business_id))
#Merge data
data_all <- merge(biz_data, review_data, by = "business_id")

#Get the year of each review
tmp <- do.call(rbind, strsplit(data_all$date, "-") )
data_all[c("year", "month", "day")] <- tmp
save(data_all, file = "data_all.Rdata")

#Select the variables I want and prepare it for aggregating
data_count1<-subset(data_all, select=c(city,review_count,year))
data_count1$review_count<-1

data_count1 <- as.data.frame(sapply(data_count1, gsub, pattern = "Glendale Az", replacement = "Glendale"))
data_count1 <- as.data.frame(sapply(data_count1, gsub, pattern = "DeForest", replacement = "De Forest"))
data_count1 <- as.data.frame(sapply(data_count1, gsub, pattern = "Fort Mcdowell", replacement = "Fort McDowell"))
data_count1 <- as.data.frame(sapply(data_count1, gsub, pattern = "Las Vegas ", replacement = "Las Vegas"))
data_count1 <- as.data.frame(sapply(data_count1, gsub, pattern = "McFarland", replacement = "Mc Farland"))
data_count1 <- as.data.frame(sapply(data_count1, gsub, pattern = "N Las Vegas", replacement = "Las Vegas"))
data_count1 <- as.data.frame(sapply(data_count1, gsub, pattern = "N. Las Vegas", replacement = "Las Vegas"))
data_count1 <- as.data.frame(sapply(data_count1, gsub, pattern = "Nellis Afb", replacement = "Nellis AFB"))
data_count1 <- as.data.frame(sapply(data_count1, gsub, pattern = "NELLIS AFB", replacement = "Nellis AFB"))
data_count1 <- as.data.frame(sapply(data_count1, gsub, pattern = "North Las Vegas", replacement = "Las Vegas"))
data_count1 <- as.data.frame(sapply(data_count1, gsub, pattern = "Paradise", replacement = "Paradise Valley"))
data_count1 <- as.data.frame(sapply(data_count1, gsub, pattern = "Pheonix ", replacement = "Pheonix"))
data_count1 <- as.data.frame(sapply(data_count1, gsub, pattern = "Phoenix Sky Harbor Center", replacement = "Pheonix"))
data_count1 <- as.data.frame(sapply(data_count1, gsub, pattern = "Summerlin South", replacement = "Summerlin"))
data_count1 <- as.data.frame(sapply(data_count1, gsub, pattern = "Sun City West", replacement = "Sun City"))
save(data_count1, file="data_count1.Rdata")

#Count the numbers of reviews for each city in different years
count<-dcast(data_count1,city~year,sum,value.var="review_count")
count<-count[,-2]
names(count)[2]<-"count2006"
names(count)[3]<-"count2007"
names(count)[4]<-"count2008"
names(count)[5]<-"count2009"
names(count)[6]<-"count2010"
names(count)[7]<-"count2011"
names(count)[8]<-"count2012"
names(count)[9]<-"count2013"
names(count)[10]<-"count2014"
write.csv(count, file = "count.csv", row.names=FALSE)

#Choose the cities with 500+ reviews in 2014 as smaller number might not be representative.
count_s<-count[count$count2014>=500,]
write.csv(count_s, file = "count_s.csv", row.names=FALSE)

```


####The result dataset is as follows:
```{r}
setwd("/Users/Emily/Documents/Columbia/Data Visualization/Yelp/yelp_dataset_challenge_academic_dataset-2")
count_s<-read.csv("count_s.csv", header=T)
pdt <- data.table:::print.data.table
pdt(count_s)
```

####For the following requirments in this homework, please kindly check out the Plunker Link: 
http://plnkr.co/edit/87WLm3OmK1jRtcq8p96u?p=preview