|
@@ -44,7 +44,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 14,
|
|
|
+ "execution_count": 1,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -54,7 +54,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 15,
|
|
|
+ "execution_count": 2,
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
@@ -64,26 +64,28 @@
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
|
"RangeIndex: 50000 entries, 0 to 49999\n",
|
|
|
"Data columns (total 20 columns):\n",
|
|
|
- "dateCrawled 50000 non-null object\n",
|
|
|
- "name 50000 non-null object\n",
|
|
|
- "seller 50000 non-null object\n",
|
|
|
- "offerType 50000 non-null object\n",
|
|
|
- "price 50000 non-null object\n",
|
|
|
- "abtest 50000 non-null object\n",
|
|
|
- "vehicleType 44905 non-null object\n",
|
|
|
- "yearOfRegistration 50000 non-null int64\n",
|
|
|
- "gearbox 47320 non-null object\n",
|
|
|
- "powerPS 50000 non-null int64\n",
|
|
|
- "model 47242 non-null object\n",
|
|
|
- "odometer 50000 non-null object\n",
|
|
|
- "monthOfRegistration 50000 non-null int64\n",
|
|
|
- "fuelType 45518 non-null object\n",
|
|
|
- "brand 50000 non-null object\n",
|
|
|
- "notRepairedDamage 40171 non-null object\n",
|
|
|
- "dateCreated 50000 non-null object\n",
|
|
|
- "nrOfPictures 50000 non-null int64\n",
|
|
|
- "postalCode 50000 non-null int64\n",
|
|
|
- "lastSeen 50000 non-null object\n",
|
|
|
+ " # Column Non-Null Count Dtype \n",
|
|
|
+ "--- ------ -------------- ----- \n",
|
|
|
+ " 0 dateCrawled 50000 non-null object\n",
|
|
|
+ " 1 name 50000 non-null object\n",
|
|
|
+ " 2 seller 50000 non-null object\n",
|
|
|
+ " 3 offerType 50000 non-null object\n",
|
|
|
+ " 4 price 50000 non-null object\n",
|
|
|
+ " 5 abtest 50000 non-null object\n",
|
|
|
+ " 6 vehicleType 44905 non-null object\n",
|
|
|
+ " 7 yearOfRegistration 50000 non-null int64 \n",
|
|
|
+ " 8 gearbox 47320 non-null object\n",
|
|
|
+ " 9 powerPS 50000 non-null int64 \n",
|
|
|
+ " 10 model 47242 non-null object\n",
|
|
|
+ " 11 odometer 50000 non-null object\n",
|
|
|
+ " 12 monthOfRegistration 50000 non-null int64 \n",
|
|
|
+ " 13 fuelType 45518 non-null object\n",
|
|
|
+ " 14 brand 50000 non-null object\n",
|
|
|
+ " 15 notRepairedDamage 40171 non-null object\n",
|
|
|
+ " 16 dateCreated 50000 non-null object\n",
|
|
|
+ " 17 nrOfPictures 50000 non-null int64 \n",
|
|
|
+ " 18 postalCode 50000 non-null int64 \n",
|
|
|
+ " 19 lastSeen 50000 non-null object\n",
|
|
|
"dtypes: int64(5), object(15)\n",
|
|
|
"memory usage: 7.6+ MB\n"
|
|
|
]
|
|
@@ -288,7 +290,7 @@
|
|
|
"4 39218 2016-04-01 14:38:50 "
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 15,
|
|
|
+ "execution_count": 2,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -352,7 +354,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 16,
|
|
|
+ "execution_count": 4,
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
@@ -555,7 +557,7 @@
|
|
|
"4 2016-04-01 14:38:50 "
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 16,
|
|
|
+ "execution_count": 4,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -680,7 +682,7 @@
|
|
|
" </tr>\n",
|
|
|
" <tr>\n",
|
|
|
" <th>top</th>\n",
|
|
|
- " <td>2016-03-09 11:54:38</td>\n",
|
|
|
+ " <td>2016-03-10 15:36:24</td>\n",
|
|
|
" <td>Ford_Fiesta</td>\n",
|
|
|
" <td>privat</td>\n",
|
|
|
" <td>Angebot</td>\n",
|
|
@@ -893,7 +895,7 @@
|
|
|
" date_crawled name seller offer_type price ab_test \\\n",
|
|
|
"count 50000 50000 50000 50000 50000 50000 \n",
|
|
|
"unique 48213 38754 2 2 2357 2 \n",
|
|
|
- "top 2016-03-09 11:54:38 Ford_Fiesta privat Angebot $0 test \n",
|
|
|
+ "top 2016-03-10 15:36:24 Ford_Fiesta privat Angebot $0 test \n",
|
|
|
"freq 3 78 49999 49999 1421 25756 \n",
|
|
|
"mean NaN NaN NaN NaN NaN NaN \n",
|
|
|
"std NaN NaN NaN NaN NaN NaN \n",
|
|
@@ -994,7 +996,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 17,
|
|
|
+ "execution_count": 7,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -1010,7 +1012,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 18,
|
|
|
+ "execution_count": 8,
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
@@ -1024,7 +1026,7 @@
|
|
|
"Name: price, dtype: int64"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 18,
|
|
|
+ "execution_count": 8,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -1040,7 +1042,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 19,
|
|
|
+ "execution_count": 9,
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
@@ -1054,7 +1056,7 @@
|
|
|
"Name: odometer_km, dtype: int64"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 19,
|
|
|
+ "execution_count": 9,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -1275,7 +1277,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 20,
|
|
|
+ "execution_count": 14,
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
@@ -1292,7 +1294,7 @@
|
|
|
"Name: price, dtype: float64"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 20,
|
|
|
+ "execution_count": 14,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -1328,7 +1330,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 28,
|
|
|
+ "execution_count": 15,
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
@@ -1401,7 +1403,7 @@
|
|
|
"4 2016-04-01 14:38:50 2016-04-01 00:00:00 2016-04-01 14:38:50"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 28,
|
|
|
+ "execution_count": 15,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -1412,7 +1414,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 21,
|
|
|
+ "execution_count": 16,
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
@@ -1455,7 +1457,7 @@
|
|
|
"Name: date_crawled, dtype: float64"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 21,
|
|
|
+ "execution_count": 16,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -1470,7 +1472,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 29,
|
|
|
+ "execution_count": 17,
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
@@ -1496,8 +1498,8 @@
|
|
|
"2016-03-22 0.032987\n",
|
|
|
"2016-03-09 0.033090\n",
|
|
|
"2016-03-08 0.033296\n",
|
|
|
- "2016-03-30 0.033687\n",
|
|
|
"2016-04-01 0.033687\n",
|
|
|
+ "2016-03-30 0.033687\n",
|
|
|
"2016-03-29 0.034099\n",
|
|
|
"2016-03-15 0.034284\n",
|
|
|
"2016-03-19 0.034778\n",
|
|
@@ -1513,7 +1515,7 @@
|
|
|
"Name: date_crawled, dtype: float64"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 29,
|
|
|
+ "execution_count": 17,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -1535,7 +1537,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 150,
|
|
|
+ "execution_count": 18,
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
@@ -1578,7 +1580,7 @@
|
|
|
"Name: last_seen, dtype: float64"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 150,
|
|
|
+ "execution_count": 18,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -1602,7 +1604,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 33,
|
|
|
+ "execution_count": 19,
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
@@ -1620,57 +1622,7 @@
|
|
|
"2015-09-09 0.000021\n",
|
|
|
"2015-11-10 0.000021\n",
|
|
|
"2015-12-05 0.000021\n",
|
|
|
- "2015-12-30 0.000021\n",
|
|
|
- "2016-01-03 0.000021\n",
|
|
|
- "2016-01-07 0.000021\n",
|
|
|
- "2016-01-10 0.000041\n",
|
|
|
- "2016-01-13 0.000021\n",
|
|
|
- "2016-01-14 0.000021\n",
|
|
|
- "2016-01-16 0.000021\n",
|
|
|
- "2016-01-22 0.000021\n",
|
|
|
- "2016-01-27 0.000062\n",
|
|
|
- "2016-01-29 0.000021\n",
|
|
|
- "2016-02-01 0.000021\n",
|
|
|
- "2016-02-02 0.000041\n",
|
|
|
- "2016-02-05 0.000041\n",
|
|
|
- "2016-02-07 0.000021\n",
|
|
|
- "2016-02-08 0.000021\n",
|
|
|
- "2016-02-09 0.000021\n",
|
|
|
- "2016-02-11 0.000021\n",
|
|
|
- "2016-02-12 0.000041\n",
|
|
|
- "2016-02-14 0.000041\n",
|
|
|
- "2016-02-16 0.000021\n",
|
|
|
- "2016-02-17 0.000021\n",
|
|
|
- "2016-02-18 0.000041\n",
|
|
|
- "2016-02-19 0.000062\n",
|
|
|
- "2016-02-20 0.000041\n",
|
|
|
- "2016-02-21 0.000062\n",
|
|
|
" ... \n",
|
|
|
- "2016-03-09 0.033151\n",
|
|
|
- "2016-03-10 0.031895\n",
|
|
|
- "2016-03-11 0.032904\n",
|
|
|
- "2016-03-12 0.036755\n",
|
|
|
- "2016-03-13 0.017008\n",
|
|
|
- "2016-03-14 0.035190\n",
|
|
|
- "2016-03-15 0.034016\n",
|
|
|
- "2016-03-16 0.030125\n",
|
|
|
- "2016-03-17 0.031278\n",
|
|
|
- "2016-03-18 0.013590\n",
|
|
|
- "2016-03-19 0.033687\n",
|
|
|
- "2016-03-20 0.037949\n",
|
|
|
- "2016-03-21 0.037579\n",
|
|
|
- "2016-03-22 0.032801\n",
|
|
|
- "2016-03-23 0.032060\n",
|
|
|
- "2016-03-24 0.029280\n",
|
|
|
- "2016-03-25 0.031751\n",
|
|
|
- "2016-03-26 0.032266\n",
|
|
|
- "2016-03-27 0.030989\n",
|
|
|
- "2016-03-28 0.034984\n",
|
|
|
- "2016-03-29 0.034037\n",
|
|
|
- "2016-03-30 0.033501\n",
|
|
|
- "2016-03-31 0.031875\n",
|
|
|
- "2016-04-01 0.033687\n",
|
|
|
- "2016-04-02 0.035149\n",
|
|
|
"2016-04-03 0.038855\n",
|
|
|
"2016-04-04 0.036858\n",
|
|
|
"2016-04-05 0.011819\n",
|
|
@@ -1679,7 +1631,7 @@
|
|
|
"Name: ad_created, Length: 76, dtype: float64"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 33,
|
|
|
+ "execution_count": 19,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -1702,7 +1654,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 34,
|
|
|
+ "execution_count": 20,
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
@@ -1719,7 +1671,7 @@
|
|
|
"Name: registration_year, dtype: float64"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 34,
|
|
|
+ "execution_count": 20,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -1753,16 +1705,16 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 35,
|
|
|
+ "execution_count": 21,
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
|
"data": {
|
|
|
"text/plain": [
|
|
|
- "0.038793369710697002"
|
|
|
+ "0.038793369710697"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 35,
|
|
|
+ "execution_count": 21,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -1827,7 +1779,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 48,
|
|
|
+ "execution_count": 23,
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
@@ -1876,7 +1828,7 @@
|
|
|
"Name: brand, dtype: float64"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 48,
|
|
|
+ "execution_count": 23,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -1896,7 +1848,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 23,
|
|
|
+ "execution_count": 24,
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
@@ -1915,21 +1867,21 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 24,
|
|
|
+ "execution_count": 25,
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
|
"data": {
|
|
|
"text/plain": [
|
|
|
- "{'audi': 9336,\n",
|
|
|
+ "{'volkswagen': 5402,\n",
|
|
|
" 'bmw': 8332,\n",
|
|
|
- " 'ford': 3749,\n",
|
|
|
- " 'mercedes_benz': 8628,\n",
|
|
|
" 'opel': 2975,\n",
|
|
|
- " 'volkswagen': 5402}"
|
|
|
+ " 'mercedes_benz': 8628,\n",
|
|
|
+ " 'audi': 9336,\n",
|
|
|
+ " 'ford': 3749}"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 24,
|
|
|
+ "execution_count": 25,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -1994,28 +1946,28 @@
|
|
|
" </thead>\n",
|
|
|
" <tbody>\n",
|
|
|
" <tr>\n",
|
|
|
- " <th>audi</th>\n",
|
|
|
- " <td>9336</td>\n",
|
|
|
+ " <th>volkswagen</th>\n",
|
|
|
+ " <td>5402</td>\n",
|
|
|
" </tr>\n",
|
|
|
" <tr>\n",
|
|
|
" <th>bmw</th>\n",
|
|
|
" <td>8332</td>\n",
|
|
|
" </tr>\n",
|
|
|
" <tr>\n",
|
|
|
- " <th>ford</th>\n",
|
|
|
- " <td>3749</td>\n",
|
|
|
+ " <th>opel</th>\n",
|
|
|
+ " <td>2975</td>\n",
|
|
|
" </tr>\n",
|
|
|
" <tr>\n",
|
|
|
" <th>mercedes_benz</th>\n",
|
|
|
" <td>8628</td>\n",
|
|
|
" </tr>\n",
|
|
|
" <tr>\n",
|
|
|
- " <th>opel</th>\n",
|
|
|
- " <td>2975</td>\n",
|
|
|
+ " <th>audi</th>\n",
|
|
|
+ " <td>9336</td>\n",
|
|
|
" </tr>\n",
|
|
|
" <tr>\n",
|
|
|
- " <th>volkswagen</th>\n",
|
|
|
- " <td>5402</td>\n",
|
|
|
+ " <th>ford</th>\n",
|
|
|
+ " <td>3749</td>\n",
|
|
|
" </tr>\n",
|
|
|
" </tbody>\n",
|
|
|
"</table>\n",
|
|
@@ -2023,12 +1975,12 @@
|
|
|
],
|
|
|
"text/plain": [
|
|
|
" mean_price\n",
|
|
|
- "audi 9336\n",
|
|
|
+ "volkswagen 5402\n",
|
|
|
"bmw 8332\n",
|
|
|
- "ford 3749\n",
|
|
|
- "mercedes_benz 8628\n",
|
|
|
"opel 2975\n",
|
|
|
- "volkswagen 5402"
|
|
|
+ "mercedes_benz 8628\n",
|
|
|
+ "audi 9336\n",
|
|
|
+ "ford 3749"
|
|
|
]
|
|
|
},
|
|
|
"execution_count": 26,
|
|
@@ -2043,7 +1995,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 44,
|
|
|
+ "execution_count": 27,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -2060,7 +2012,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 51,
|
|
|
+ "execution_count": 28,
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
@@ -2126,7 +2078,7 @@
|
|
|
"ford 124266"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 51,
|
|
|
+ "execution_count": 28,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -2138,7 +2090,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 52,
|
|
|
+ "execution_count": 29,
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
@@ -2211,7 +2163,7 @@
|
|
|
"ford 124266 3749"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 52,
|
|
|
+ "execution_count": 29,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
@@ -2245,7 +2197,7 @@
|
|
|
"name": "python",
|
|
|
"nbconvert_exporter": "python",
|
|
|
"pygments_lexer": "ipython3",
|
|
|
- "version": "3.6.7"
|
|
|
+ "version": "3.8.2"
|
|
|
}
|
|
|
},
|
|
|
"nbformat": 4,
|