{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Introduction" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "scrolled": true }, "outputs": [], "source": [ "import pandas as pd\n", "pd.options.display.max_columns = 99\n", "chunk_iter = pd.read_csv('crunchbase-investments.csv', chunksize=5000, encoding='ISO-8859-1')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Compute each column's missing value counts" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "company_country_code 1\n", "company_name 1\n", "company_permalink 1\n", "company_region 1\n", "investor_region 2\n", "investor_permalink 2\n", "investor_name 2\n", "funded_quarter 3\n", "funded_at 3\n", "funded_month 3\n", "funded_year 3\n", "funding_round_type 3\n", "company_state_code 492\n", "company_city 533\n", "company_category_code 643\n", "raised_amount_usd 3599\n", "investor_country_code 12001\n", "investor_city 12480\n", "investor_state_code 16809\n", "investor_category_code 50427\n", "dtype: int64" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mv_list = []\n", "for chunk in chunk_iter:\n", " mv_list.append(chunk.isnull().sum())\n", " \n", "combined_mv_vc = pd.concat(mv_list)\n", "unique_combined_mv_vc = combined_mv_vc.groupby(combined_mv_vc.index).sum()\n", "unique_combined_mv_vc.sort_values()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Total memory footprint for each column" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "company_permalink 4057788\n", "company_name 3591326\n", "company_category_code 3421104\n", "company_country_code 3172176\n", "company_state_code 3106051\n", "company_region 3411585\n", "company_city 3505926\n", "investor_permalink 4980548\n", "investor_name 3915666\n", "investor_category_code 622424\n", "investor_country_code 2647292\n", "investor_state_code 2476607\n", "investor_region 3396281\n", "investor_city 2885083\n", "funding_round_type 3410707\n", "funded_at 3542185\n", "funded_month 3383584\n", "funded_quarter 3383584\n", "funded_year 422960\n", "raised_amount_usd 422960\n", "dtype: int64" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chunk_iter = pd.read_csv('crunchbase-investments.csv', chunksize=5000, encoding='ISO-8859-1')\n", "counter = 0\n", "series_memory_fp = pd.Series()\n", "for chunk in chunk_iter:\n", " if counter == 0:\n", " series_memory_fp = chunk.memory_usage(deep=True)\n", " else:\n", " series_memory_fp += chunk.memory_usage(deep=True)\n", " counter += 1\n", "\n", "# Drop memory footprint calculation for the index.\n", "series_memory_fp = series_memory_fp.drop('Index')\n", "series_memory_fp" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Total memory footprint of the data (in megabytes)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "56.987607002258301" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "series_memory_fp.sum() / (1024 * 1024)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "company_country_code 1\n", "company_name 1\n", "company_permalink 1\n", "company_region 1\n", "investor_region 2\n", "investor_permalink 2\n", "investor_name 2\n", "funded_quarter 3\n", "funded_at 3\n", "funded_month 3\n", "funded_year 3\n", "funding_round_type 3\n", "company_state_code 492\n", "company_city 533\n", "company_category_code 643\n", "raised_amount_usd 3599\n", "investor_country_code 12001\n", "investor_city 12480\n", "investor_state_code 16809\n", "investor_category_code 50427\n", "dtype: int64" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "unique_combined_mv_vc.sort_values()" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Drop columns representing URL's or containing way too many missing values (>90% missing)\n", "drop_cols = ['investor_permalink', 'company_permalink', 'investor_category_code']\n", "keep_cols = chunk.columns.drop(drop_cols)" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['company_name', 'company_category_code', 'company_country_code',\n", " 'company_state_code', 'company_region', 'company_city', 'investor_name',\n", " 'investor_country_code', 'investor_state_code', 'investor_region',\n", " 'investor_city', 'funding_round_type', 'funded_at', 'funded_month',\n", " 'funded_quarter', 'funded_year', 'raised_amount_usd'],\n", " dtype='object')" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "keep_cols.tolist" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Selecting Data Types\n", "\n", "Let's first determine which columns shift types across chunks. Note that we only lay the groundwork for this step." ] }, { "cell_type": "code", "execution_count": 76, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Key: Column name, Value: List of types\n", "col_types = {}\n", "chunk_iter = pd.read_csv('crunchbase-investments.csv', chunksize=5000, encoding='ISO-8859-1', usecols=keep_cols)\n", "\n", "for chunk in chunk_iter:\n", " for col in chunk.columns:\n", " if col not in col_types:\n", " col_types[col] = [str(chunk.dtypes[col])]\n", " else:\n", " col_types[col].append(str(chunk.dtypes[col]))" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'company_category_code': {'object'},\n", " 'company_city': {'object'},\n", " 'company_country_code': {'object'},\n", " 'company_name': {'object'},\n", " 'company_region': {'object'},\n", " 'company_state_code': {'object'},\n", " 'funded_at': {'object'},\n", " 'funded_month': {'object'},\n", " 'funded_quarter': {'object'},\n", " 'funded_year': {'float64', 'int64'},\n", " 'funding_round_type': {'object'},\n", " 'investor_city': {'float64', 'object'},\n", " 'investor_country_code': {'float64', 'object'},\n", " 'investor_name': {'object'},\n", " 'investor_region': {'object'},\n", " 'investor_state_code': {'float64', 'object'},\n", " 'raised_amount_usd': {'float64'}}" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "uniq_col_types = {}\n", "for k,v in col_types.items():\n", " uniq_col_types[k] = set(col_types[k])\n", "uniq_col_types" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | company_name | \n", "company_category_code | \n", "company_country_code | \n", "company_state_code | \n", "company_region | \n", "company_city | \n", "investor_name | \n", "investor_country_code | \n", "investor_state_code | \n", "investor_region | \n", "investor_city | \n", "funding_round_type | \n", "funded_at | \n", "funded_month | \n", "funded_quarter | \n", "funded_year | \n", "raised_amount_usd | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
50000 | \n", "NuORDER | \n", "fashion | \n", "USA | \n", "CA | \n", "Los Angeles | \n", "West Hollywood | \n", "Mortimer Singer | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2012-10-01 | \n", "2012-10 | \n", "2012-Q4 | \n", "2012 | \n", "3060000.0 | \n", "
50001 | \n", "ChaCha | \n", "advertising | \n", "USA | \n", "IN | \n", "Indianapolis | \n", "Carmel | \n", "Morton Meyerson | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-b | \n", "2007-10-01 | \n", "2007-10 | \n", "2007-Q4 | \n", "2007 | \n", "12000000.0 | \n", "
50002 | \n", "Binfire | \n", "software | \n", "USA | \n", "FL | \n", "Bocat Raton | \n", "Bocat Raton | \n", "Moshe Ariel | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2008-04-18 | \n", "2008-04 | \n", "2008-Q2 | \n", "2008 | \n", "500000.0 | \n", "
50003 | \n", "Binfire | \n", "software | \n", "USA | \n", "FL | \n", "Bocat Raton | \n", "Bocat Raton | \n", "Moshe Ariel | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2010-01-01 | \n", "2010-01 | \n", "2010-Q1 | \n", "2010 | \n", "750000.0 | \n", "
50004 | \n", "Unified Color | \n", "software | \n", "USA | \n", "CA | \n", "SF Bay | \n", "South San Frnacisco | \n", "Mr. Andrew Oung | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2010-01-01 | \n", "2010-01 | \n", "2010-Q1 | \n", "2010 | \n", "NaN | \n", "
50005 | \n", "HItviews | \n", "advertising | \n", "USA | \n", "NY | \n", "New York | \n", "New York City | \n", "multiple parties | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2007-11-29 | \n", "2007-11 | \n", "2007-Q4 | \n", "2007 | \n", "485000.0 | \n", "
50006 | \n", "LockerDome | \n", "social | \n", "USA | \n", "MO | \n", "Saint Louis | \n", "St. Louis | \n", "multiple parties | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2012-04-17 | \n", "2012-04 | \n", "2012-Q2 | \n", "2012 | \n", "300000.0 | \n", "
50007 | \n", "ThirdLove | \n", "ecommerce | \n", "USA | \n", "CA | \n", "SF Bay | \n", "San Francisco | \n", "Munjal Shah | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2012-12-01 | \n", "2012-12 | \n", "2012-Q4 | \n", "2012 | \n", "5600000.0 | \n", "
50008 | \n", "Hakia | \n", "search | \n", "USA | \n", "NaN | \n", "TBD | \n", "NaN | \n", "Murat Vargi | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2006-11-01 | \n", "2006-11 | \n", "2006-Q4 | \n", "2006 | \n", "16000000.0 | \n", "
50009 | \n", "bookacoach | \n", "sports | \n", "USA | \n", "IN | \n", "Indianapolis | \n", "Indianapolis | \n", "Myles Grote | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2012-11-01 | \n", "2012-11 | \n", "2012-Q4 | \n", "2012 | \n", "NaN | \n", "
50010 | \n", "LocalCircles | \n", "social | \n", "USA | \n", "CA | \n", "SF Bay | \n", "Santa Clara | \n", "Nadir Godrej | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2012-09-01 | \n", "2012-09 | \n", "2012-Q3 | \n", "2012 | \n", "NaN | \n", "
50011 | \n", "Graphdive | \n", "analytics | \n", "USA | \n", "CA | \n", "SF Bay | \n", "Menlo Park | \n", "Naguib Sawiris | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2012-10-04 | \n", "2012-10 | \n", "2012-Q4 | \n", "2012 | \n", "1000000.0 | \n", "
50012 | \n", "Ribbon | \n", "ecommerce | \n", "USA | \n", "CA | \n", "SF Bay | \n", "San Francisco | \n", "Naguib Sawiris | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2013-02-05 | \n", "2013-02 | \n", "2013-Q1 | \n", "2013 | \n", "1630000.0 | \n", "
50013 | \n", "Dokkankom.com | \n", "ecommerce | \n", "USA | \n", "NY | \n", "New York | \n", "new york | \n", "Namek Zu'bi | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2011-10-10 | \n", "2011-10 | \n", "2011-Q4 | \n", "2011 | \n", "30000.0 | \n", "
50014 | \n", "Lookery | \n", "web | \n", "USA | \n", "CA | \n", "SF Bay | \n", "San Francisco | \n", "Nana Shin | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2008-02-07 | \n", "2008-02 | \n", "2008-Q1 | \n", "2008 | \n", "900000.0 | \n", "
50015 | \n", "TrustDegrees | \n", "web | \n", "USA | \n", "NY | \n", "Kenmore | \n", "Kenmore | \n", "Nancy Barrett | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2011-06-09 | \n", "2011-06 | \n", "2011-Q2 | \n", "2011 | \n", "8000.0 | \n", "
50016 | \n", "Altavoz | \n", "games_video | \n", "USA | \n", "DC | \n", "Washington DC | \n", "Washington | \n", "Nancy Jacobsen | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2012-09-11 | \n", "2012-09 | \n", "2012-Q3 | \n", "2012 | \n", "150000.0 | \n", "
50017 | \n", "EdSurge | \n", "education | \n", "USA | \n", "CA | \n", "SF Bay | \n", "Burlingame | \n", "Nancy Peretsman | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2012-08-29 | \n", "2012-08 | \n", "2012-Q3 | \n", "2012 | \n", "400000.0 | \n", "
50018 | \n", "FullContact | \n", "enterprise | \n", "USA | \n", "CO | \n", "Denver | \n", "Denver | \n", "Nancy Pierce | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-b | \n", "2012-07-09 | \n", "2012-07 | \n", "2012-Q3 | \n", "2012 | \n", "7000000.0 | \n", "
50019 | \n", "Rapt Media | \n", "enterprise | \n", "USA | \n", "CO | \n", "Denver | \n", "Boulder | \n", "Nancy Pierce | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2013-01-23 | \n", "2013-01 | \n", "2013-Q1 | \n", "2013 | \n", "2288803.0 | \n", "
50020 | \n", "Humanoid | \n", "software | \n", "USA | \n", "CA | \n", "SF Bay | \n", "San Francisco | \n", "Nat Friedman | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2010-12-01 | \n", "2010-12 | \n", "2010-Q4 | \n", "2010 | \n", "1100000.0 | \n", "
50021 | \n", "Runscope | \n", "web | \n", "USA | \n", "CA | \n", "SF Bay | \n", "San Francisco | \n", "Nat Friedman | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2013-05-22 | \n", "2013-05 | \n", "2013-Q2 | \n", "2013 | \n", "1100000.0 | \n", "
50022 | \n", "Adzerk | \n", "advertising | \n", "USA | \n", "NC | \n", "Raleigh-Durham | \n", "Durham | \n", "Nat Turner | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2011-07-12 | \n", "2011-07 | \n", "2011-Q3 | \n", "2011 | \n", "650000.0 | \n", "
50023 | \n", "Adaptly | \n", "advertising | \n", "USA | \n", "NY | \n", "New York | \n", "New York | \n", "Nat Turner | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2011-04-18 | \n", "2011-04 | \n", "2011-Q2 | \n", "2011 | \n", "2000000.0 | \n", "
50024 | \n", "Lore | \n", "education | \n", "USA | \n", "NY | \n", "New York | \n", "New York | \n", "Nat Turner | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2011-06-27 | \n", "2011-06 | \n", "2011-Q2 | \n", "2011 | \n", "1000000.0 | \n", "
50025 | \n", "Tasted Menu | \n", "hospitality | \n", "USA | \n", "MA | \n", "Boston | \n", "Boston | \n", "Nat Turner | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2011-05-01 | \n", "2011-05 | \n", "2011-Q2 | \n", "2011 | \n", "NaN | \n", "
50026 | \n", "Lua Technologies | \n", "mobile | \n", "USA | \n", "NY | \n", "New York | \n", "New York | \n", "Nat Turner | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2012-08-01 | \n", "2012-08 | \n", "2012-Q3 | \n", "2012 | \n", "2500000.0 | \n", "
50027 | \n", "Blue Apron | \n", "hospitality | \n", "USA | \n", "NY | \n", "New York | \n", "Brooklyn | \n", "Nat Turner | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2013-02-19 | \n", "2013-02 | \n", "2013-Q1 | \n", "2013 | \n", "3000000.0 | \n", "
50028 | \n", "ChatID | \n", "mobile | \n", "USA | \n", "NY | \n", "New York | \n", "New York | \n", "Nat Turner | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2012-01-01 | \n", "2012-01 | \n", "2012-Q1 | \n", "2012 | \n", "NaN | \n", "
50029 | \n", "Breakthrough Behavioral | \n", "health | \n", "USA | \n", "CA | \n", "SF Bay | \n", "Redwood City | \n", "Nat Turner | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2012-08-13 | \n", "2012-08 | \n", "2012-Q3 | \n", "2012 | \n", "900000.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
52840 | \n", "Meddik | \n", "health | \n", "USA | \n", "NY | \n", "New York | \n", "New York | \n", "Zach Weinberg | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2012-05-24 | \n", "2012-05 | \n", "2012-Q2 | \n", "2012 | \n", "750000.0 | \n", "
52841 | \n", "Blue Apron | \n", "hospitality | \n", "USA | \n", "NY | \n", "New York | \n", "Brooklyn | \n", "Zach Weinberg | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2013-02-19 | \n", "2013-02 | \n", "2013-Q1 | \n", "2013 | \n", "3000000.0 | \n", "
52842 | \n", "ChatID | \n", "mobile | \n", "USA | \n", "NY | \n", "New York | \n", "New York | \n", "Zach Weinberg | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2012-01-01 | \n", "2012-01 | \n", "2012-Q1 | \n", "2012 | \n", "NaN | \n", "
52843 | \n", "Breakthrough Behavioral | \n", "health | \n", "USA | \n", "CA | \n", "SF Bay | \n", "Redwood City | \n", "Zach Weinberg | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2012-08-13 | \n", "2012-08 | \n", "2012-Q3 | \n", "2012 | \n", "900000.0 | \n", "
52844 | \n", "Plaid | \n", "software | \n", "USA | \n", "CA | \n", "SF Bay | \n", "San Francisco | \n", "Zach Weinberg | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2013-09-19 | \n", "2013-09 | \n", "2013-Q3 | \n", "2013 | \n", "2800000.0 | \n", "
52845 | \n", "PokitDok | \n", "mobile | \n", "USA | \n", "CA | \n", "SF Bay | \n", "Menlo Park | \n", "Zach Zeitlin | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2012-07-12 | \n", "2012-07 | \n", "2012-Q3 | \n", "2012 | \n", "1300000.0 | \n", "
52846 | \n", "Fitocracy | \n", "web | \n", "USA | \n", "NY | \n", "New York | \n", "New York | \n", "Zachary Aarons | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2011-09-01 | \n", "2011-09 | \n", "2011-Q3 | \n", "2011 | \n", "250000.0 | \n", "
52847 | \n", "Square | \n", "mobile | \n", "USA | \n", "CA | \n", "SF Bay | \n", "San Francisco | \n", "Zachary Bogue | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2009-11-01 | \n", "2009-11 | \n", "2009-Q4 | \n", "2009 | \n", "10000000.0 | \n", "
52848 | \n", "MixRank | \n", "advertising | \n", "USA | \n", "CA | \n", "SF Bay | \n", "San Francisco | \n", "Zachary Bogue | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2011-11-18 | \n", "2011-11 | \n", "2011-Q4 | \n", "2011 | \n", "1500000.0 | \n", "
52849 | \n", "Socialcam | \n", "mobile | \n", "USA | \n", "CA | \n", "Santa Clara County | \n", "Santa Clara County | \n", "Zachary Bogue | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2012-04-30 | \n", "2012-04 | \n", "2012-Q2 | \n", "2012 | \n", "NaN | \n", "
52850 | \n", "Nuzzel | \n", "news | \n", "USA | \n", "CA | \n", "SF Bay | \n", "San Francisco | \n", "Zachary Bogue | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "venture | \n", "2012-11-15 | \n", "2012-11 | \n", "2012-Q4 | \n", "2012 | \n", "1700000.0 | \n", "
52851 | \n", "ThirdLove | \n", "ecommerce | \n", "USA | \n", "CA | \n", "SF Bay | \n", "San Francisco | \n", "Zachary Bogue | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2012-12-01 | \n", "2012-12 | \n", "2012-Q4 | \n", "2012 | \n", "5600000.0 | \n", "
52852 | \n", "MXD3D | \n", "web | \n", "USA | \n", "CA | \n", "SF Bay | \n", "San Francisco | \n", "Zaid Ayoub | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2012-01-01 | \n", "2012-01 | \n", "2012-Q1 | \n", "2012 | \n", "300000.0 | \n", "
52853 | \n", "MXD3D | \n", "web | \n", "USA | \n", "CA | \n", "SF Bay | \n", "San Francisco | \n", "Zaid Ayoub | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2011-01-01 | \n", "2011-01 | \n", "2011-Q1 | \n", "2011 | \n", "300000.0 | \n", "
52854 | \n", "Verious | \n", "mobile | \n", "USA | \n", "CA | \n", "SF Bay | \n", "San Carlos | \n", "Zain Khan | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2011-05-30 | \n", "2011-05 | \n", "2011-Q2 | \n", "2011 | \n", "800000.0 | \n", "
52855 | \n", "Identified | \n", "analytics | \n", "USA | \n", "CA | \n", "SF Bay | \n", "San Francisco | \n", "Zao Yang | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-b | \n", "2012-06-05 | \n", "2012-06 | \n", "2012-Q2 | \n", "2012 | \n", "21000000.0 | \n", "
52856 | \n", "HaulerDeals | \n", "fashion | \n", "USA | \n", "CA | \n", "Los Angeles | \n", "Los Angeles | \n", "Zaw Thet | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2012-10-31 | \n", "2012-10 | \n", "2012-Q4 | \n", "2012 | \n", "1250000.0 | \n", "
52857 | \n", "When You Wish | \n", "nonprofit | \n", "USA | \n", "CA | \n", "Los Angeles | \n", "Marina Del Rey | \n", "Zelda Marzec | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2011-02-01 | \n", "2011-02 | \n", "2011-Q1 | \n", "2011 | \n", "1500000.0 | \n", "
52858 | \n", "Farmeron | \n", "analytics | \n", "USA | \n", "CA | \n", "SF Bay | \n", "Mountain View | \n", "Zeljko Mataija | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2010-10-01 | \n", "2010-10 | \n", "2010-Q4 | \n", "2010 | \n", "15000.0 | \n", "
52859 | \n", "Theraclone Sciences | \n", "biotech | \n", "USA | \n", "WA | \n", "Seattle | \n", "Seattle | \n", "Zenyaku Kogyo | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-b | \n", "2013-03-25 | \n", "2013-03 | \n", "2013-Q1 | \n", "2013 | \n", "8000000.0 | \n", "
52860 | \n", "SimpleGeo | \n", "advertising | \n", "USA | \n", "CA | \n", "SF Bay | \n", "San Francisco | \n", "Ziv Navoth | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "other | \n", "2009-11-10 | \n", "2009-11 | \n", "2009-Q4 | \n", "2009 | \n", "195000.0 | \n", "
52861 | \n", "Open Me | \n", "ecommerce | \n", "USA | \n", "CA | \n", "Los Angeles | \n", "Los Angeles | \n", "Ziver Birg | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2013-08-01 | \n", "2013-08 | \n", "2013-Q3 | \n", "2013 | \n", "NaN | \n", "
52862 | \n", "Comprehend Systems | \n", "enterprise | \n", "USA | \n", "CA | \n", "SF Bay | \n", "Palo Alto | \n", "Zod Nazem | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2013-07-11 | \n", "2013-07 | \n", "2013-Q3 | \n", "2013 | \n", "8400000.0 | \n", "
52863 | \n", "Payoneer | \n", "other | \n", "USA | \n", "NY | \n", "New York | \n", "New York | \n", "Zohar Gilon | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2005-01-01 | \n", "2005-01 | \n", "2005-Q1 | \n", "2005 | \n", "2000000.0 | \n", "
52864 | \n", "Outbrain | \n", "web | \n", "USA | \n", "NY | \n", "New York | \n", "New York City | \n", "Zohar Gilon | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-b | \n", "2009-02-11 | \n", "2009-02 | \n", "2009-Q1 | \n", "2009 | \n", "12000000.0 | \n", "
52865 | \n", "Garantia Data | \n", "enterprise | \n", "USA | \n", "CA | \n", "SF Bay | \n", "Santa Clara | \n", "Zohar Gilon | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2012-08-08 | \n", "2012-08 | \n", "2012-Q3 | \n", "2012 | \n", "3800000.0 | \n", "
52866 | \n", "DudaMobile | \n", "mobile | \n", "USA | \n", "CA | \n", "SF Bay | \n", "Palo Alto | \n", "Zohar Gilon | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-c+ | \n", "2013-04-08 | \n", "2013-04 | \n", "2013-Q2 | \n", "2013 | \n", "10300000.0 | \n", "
52867 | \n", "SiteBrains | \n", "software | \n", "USA | \n", "CA | \n", "SF Bay | \n", "San Francisco | \n", "zohar israel | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "angel | \n", "2010-08-01 | \n", "2010-08 | \n", "2010-Q3 | \n", "2010 | \n", "350000.0 | \n", "
52868 | \n", "Comprehend Systems | \n", "enterprise | \n", "USA | \n", "CA | \n", "SF Bay | \n", "Palo Alto | \n", "Zorba Lieberman | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2013-07-11 | \n", "2013-07 | \n", "2013-Q3 | \n", "2013 | \n", "8400000.0 | \n", "
52869 | \n", "SmartThings | \n", "mobile | \n", "USA | \n", "DC | \n", "unknown | \n", "Minneapolis | \n", "Zorik Gordon | \n", "NaN | \n", "NaN | \n", "unknown | \n", "NaN | \n", "series-a | \n", "2012-12-04 | \n", "2012-12 | \n", "2012-Q4 | \n", "2012 | \n", "3000000.0 | \n", "
2870 rows × 17 columns
\n", "