|
@@ -3,9 +3,7 @@
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
"execution_count": 9,
|
|
|
- "metadata": {
|
|
|
- "collapsed": false
|
|
|
- },
|
|
|
+ "metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
|
"data": {
|
|
@@ -170,9 +168,7 @@
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
"execution_count": 10,
|
|
|
- "metadata": {
|
|
|
- "collapsed": false
|
|
|
- },
|
|
|
+ "metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
|
"data": {
|
|
@@ -210,9 +206,7 @@
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
"execution_count": 11,
|
|
|
- "metadata": {
|
|
|
- "collapsed": false
|
|
|
- },
|
|
|
+ "metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
|
"data": {
|
|
@@ -270,17 +264,15 @@
|
|
|
"cell_type": "markdown",
|
|
|
"metadata": {},
|
|
|
"source": [
|
|
|
- "## Error metric\n",
|
|
|
+ "## Error Metric\n",
|
|
|
"\n",
|
|
|
- "The mean squared error metric makes the most sense to evaluate our error. MSE works on continuous numeric data, which fits our data quite well."
|
|
|
+ "The mean squared error metric makes the most sense to evaluate our error. MSE works on continuous numeric data, which fits our data quite well."
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
"execution_count": 13,
|
|
|
- "metadata": {
|
|
|
- "collapsed": false
|
|
|
- },
|
|
|
+ "metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
"train = bike_rentals.sample(frac=.8)"
|
|
@@ -289,9 +281,7 @@
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
"execution_count": 14,
|
|
|
- "metadata": {
|
|
|
- "collapsed": false
|
|
|
- },
|
|
|
+ "metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
"test = bike_rentals.loc[~bike_rentals.index.isin(train.index)]"
|
|
@@ -300,9 +290,7 @@
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
"execution_count": 18,
|
|
|
- "metadata": {
|
|
|
- "collapsed": false
|
|
|
- },
|
|
|
+ "metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
|
"data": {
|
|
@@ -332,9 +320,7 @@
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
"execution_count": 19,
|
|
|
- "metadata": {
|
|
|
- "collapsed": false
|
|
|
- },
|
|
|
+ "metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
|
"data": {
|
|
@@ -360,15 +346,13 @@
|
|
|
"source": [
|
|
|
"## Error\n",
|
|
|
"\n",
|
|
|
- "The error is very high, which may be due to the fact that the data has a few extremely high rental counts, but otherwise mostly low counts. Larger errors are penalized more with MSE, which leads to a higher total error."
|
|
|
+ "The error is very high, which may be due to the fact that the data has a few extremely high rental counts but otherwise mostly low counts. Larger errors are penalized more with MSE, which leads to a higher total error."
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
"execution_count": 25,
|
|
|
- "metadata": {
|
|
|
- "collapsed": false
|
|
|
- },
|
|
|
+ "metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
|
"data": {
|
|
@@ -395,9 +379,7 @@
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
"execution_count": 26,
|
|
|
- "metadata": {
|
|
|
- "collapsed": false
|
|
|
- },
|
|
|
+ "metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
|
"data": {
|
|
@@ -419,9 +401,7 @@
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
"execution_count": 28,
|
|
|
- "metadata": {
|
|
|
- "collapsed": false
|
|
|
- },
|
|
|
+ "metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
|
"data": {
|
|
@@ -448,7 +428,7 @@
|
|
|
"cell_type": "markdown",
|
|
|
"metadata": {},
|
|
|
"source": [
|
|
|
- "## Decision tree error\n",
|
|
|
+ "## Decision Tree Error\n",
|
|
|
"\n",
|
|
|
"By taking the nonlinear predictors into account, the decision tree regressor appears to have much higher accuracy than linear regression."
|
|
|
]
|
|
@@ -456,9 +436,7 @@
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
"execution_count": 30,
|
|
|
- "metadata": {
|
|
|
- "collapsed": false
|
|
|
- },
|
|
|
+ "metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
|
"data": {
|
|
@@ -485,9 +463,7 @@
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
"execution_count": 31,
|
|
|
- "metadata": {
|
|
|
- "collapsed": false
|
|
|
- },
|
|
|
+ "metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
|
"data": {
|
|
@@ -510,7 +486,7 @@
|
|
|
"cell_type": "markdown",
|
|
|
"metadata": {},
|
|
|
"source": [
|
|
|
- "## Random forest error\n",
|
|
|
+ "## Random Forest Error\n",
|
|
|
"\n",
|
|
|
"By removing some of the sources of overfitting, the random forest accuracy is improved over the decision tree accuracy."
|
|
|
]
|
|
@@ -532,7 +508,7 @@
|
|
|
"name": "python",
|
|
|
"nbconvert_exporter": "python",
|
|
|
"pygments_lexer": "ipython3",
|
|
|
- "version": "3.6.4"
|
|
|
+ "version": "3.8.5"
|
|
|
}
|
|
|
},
|
|
|
"nbformat": 4,
|