polylabel 1.0.2

A Rust implementation of the Polylabel algorithm
Documentation
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import geopandas as gp\n",
    "import pandas as pd\n",
    "from matplotlib import pyplot as plt\n",
    "from matplotlib.collections import PatchCollection\n",
    "from matplotlib import rc\n",
    "from shapely.geometry import LineString, Polygon, Point, box, shape\n",
    "from shapely.ops import cascaded_union\n",
    "from fiona.crs import from_epsg\n",
    "from rtree import index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def nearest_postcode_distance(df, df2, sindex):\n",
    "    \"\"\"\n",
    "    Find nearest neighbour to df in df2 spatial index\n",
    "    Then, calculate distance between nearest neighbour geometry and df geometry\n",
    "    Return distance and postcode\n",
    "    Note: distance between adjacent postcodes is 0.0\n",
    "    \"\"\"\n",
    "    idx_pos = sindex.nearest(df.geometry.bounds, 1).next()\n",
    "    return df.geometry.distance(df2.iloc[idx_pos].geometry) / 1000., df2.iloc[idx_pos]['POSTCODE']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# CRS has been converted from 27700 to 25832 (ETRS89) for Euclidean distance measurement\n",
    "merged = gp.GeoDataFrame.from_file(\"merged_postcodes_centroids.shp\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Distance P</th>\n",
       "      <th>Minimum Di</th>\n",
       "      <th>PC_AREA</th>\n",
       "      <th>POSTCODE</th>\n",
       "      <th>UPP</th>\n",
       "      <th>geometry</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AB10 1AN</td>\n",
       "      <td>0.08436385277905387</td>\n",
       "      <td>AB</td>\n",
       "      <td>AB10 1AB</td>\n",
       "      <td>00000000000000000002</td>\n",
       "      <td>POLYGON ((-169708.694024045 6388795.9234469, -...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AB10 1AN</td>\n",
       "      <td>0</td>\n",
       "      <td>AB</td>\n",
       "      <td>AB10 1AL</td>\n",
       "      <td>00000000000000000003</td>\n",
       "      <td>POLYGON ((-169614.706281362 6388752.870788309,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AB10 1AN</td>\n",
       "      <td>0</td>\n",
       "      <td>AB</td>\n",
       "      <td>AB10 1AN</td>\n",
       "      <td>00000000000000000004</td>\n",
       "      <td>POLYGON ((-169546.2997755085 6388773.476457709...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AB10 1AN</td>\n",
       "      <td>0.0233134882466831</td>\n",
       "      <td>AB</td>\n",
       "      <td>AB10 1AP</td>\n",
       "      <td>00000000000000000005</td>\n",
       "      <td>POLYGON ((-169690.3220984091 6388713.581593464...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AB10 1AN</td>\n",
       "      <td>0.1438272018464478</td>\n",
       "      <td>AB</td>\n",
       "      <td>AB10 1AS</td>\n",
       "      <td>00000000000000000006</td>\n",
       "      <td>POLYGON ((-169779.7872413595 6388697.504694026...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Distance P           Minimum Di PC_AREA  POSTCODE                   UPP  \\\n",
       "0   AB10 1AN  0.08436385277905387      AB  AB10 1AB  00000000000000000002   \n",
       "1   AB10 1AN                    0      AB  AB10 1AL  00000000000000000003   \n",
       "2   AB10 1AN                    0      AB  AB10 1AN  00000000000000000004   \n",
       "3   AB10 1AN   0.0233134882466831      AB  AB10 1AP  00000000000000000005   \n",
       "4   AB10 1AN   0.1438272018464478      AB  AB10 1AS  00000000000000000006   \n",
       "\n",
       "                                            geometry  \n",
       "0  POLYGON ((-169708.694024045 6388795.9234469, -...  \n",
       "1  POLYGON ((-169614.706281362 6388752.870788309,...  \n",
       "2  POLYGON ((-169546.2997755085 6388773.476457709...  \n",
       "3  POLYGON ((-169690.3220984091 6388713.581593464...  \n",
       "4  POLYGON ((-169779.7872413595 6388697.504694026...  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create \"funded\" dataframe from CR-supplied data, and create a spatial index for it"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df_funded = pd.read_excel(\"/Users/sth/Downloads/cr_funded.xlsx\")\n",
    "df_funded_geo = merged[merged['POSTCODE'].isin(list(df_funded['Grants Project Postcode']))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "merged.sort_index(inplace=True)\n",
    "df_funded_geo.sort_index(inplace=True)\n",
    "df_funded_geo['Funded Project Postcode'] = df_funded_geo[u'POSTCODE']\n",
    "df_funded_geo.rename(columns={'Grants Project Postcode': u'POSTCODE'}, inplace=True)\n",
    "sindex = df_funded_geo.sindex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([              u'Distance P',               u'Minimum Di',\n",
       "                        u'PC_AREA',                 u'POSTCODE',\n",
       "                            u'UPP',                 u'geometry',\n",
       "               u'Minimum Distance', u'Nearest Project Postcode',\n",
       "        u'Funded Project Postcode'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_funded_geo.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Calculate minimum distance in `km` and post code  \n",
    "This will result in distances of 0.0 for postcodes adjacent to postcodes in which a project has been funded. **CAUTION**: this will take a long time (20 - 45 minutes, depending on processor speed), and will use a lot of memory."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "merged['Minimum Distance'], merged['Nearest Project Postcode'] = zip(*merged.apply(\n",
    "    nearest_postcode_distance, axis=1, args=(df_funded_geo, sindex)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([                 u'PC_AREA',                 u'POSTCODE',\n",
       "                            u'UPP',                 u'geometry',\n",
       "               u'Minimum Distance', u'Nearest Project Postcode'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'PC_AREA', u'POSTCODE', u'UPP', u'geometry',\n",
       "       u'Funded Project Postcode'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_funded_geo.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Join the funded projects DataFrame to the postcode DataFrame  \n",
    "This is an intermediate step -- we'll be using the new geometry column to calculate\n",
    "centroid distance between the nearest postcodes\n",
    "\n",
    "Duplicate columns from the funded projects df have the `_funded` suffix."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "joined = merged.join(\n",
    "    df_funded_geo.set_index(u'POSTCODE'),\n",
    "    on = \"Nearest Project Postcode\",\n",
    "    rsuffix=u'_funded'\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([                 u'PC_AREA',                 u'POSTCODE',\n",
       "                            u'UPP',                 u'geometry',\n",
       "               u'Minimum Distance', u'Nearest Project Postcode',\n",
       "                 u'PC_AREA_funded',               u'UPP_funded',\n",
       "                u'geometry_funded',  u'Funded Project Postcode'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "joined.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Calculate the [centroid](https://en.wikipedia.org/wiki/Centroid) distance in `km` between each UK postcode, and the nearest postcode in which Comic Relief have funded a project.  \n",
    "\n",
    "See the accompanying image for an explanation of the difference between the two measurements."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "joined['Minimum Distance Centroid'] = joined.apply(lambda df:\n",
    "    df.geometry.centroid.distance(df[u'geometry_funded'].centroid) / 1000., axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 192,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PC_AREA                    AB\n",
       "POSTCODE             AB11 5QH\n",
       "UPP                       469\n",
       "geometry                  foo\n",
       "Minimum Distance            1\n",
       "Distance Postcode    AB10 1AN\n",
       "Name: 721, dtype: object"
      ]
     },
     "execution_count": 192,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# fix AB11 5QH -- its polygon is wrong\n",
    "# merged.iloc[721]['Minimum Distance'] = 1.0\n",
    "# merged.iloc[721]['Distance Postcode'] = 'AB10 1AN'\n",
    "# merged.iloc[721]\n",
    "\n",
    "# AB11 5QH HAS BEEN REMOVED DUE TO AN INVALID POLYGON."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Show the 20 post codes which are located the furthest from a post code in which Comic Relief have funded a project. The distance in this case is **polygon distance**."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>POSTCODE</th>\n",
       "      <th>Minimum Distance</th>\n",
       "      <th>Nearest Project Postcode</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>689983</th>\n",
       "      <td>IV27 4XD</td>\n",
       "      <td>102.243906</td>\n",
       "      <td>KW15 1JD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689981</th>\n",
       "      <td>IV27 4XA</td>\n",
       "      <td>102.193813</td>\n",
       "      <td>KW15 1JD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689982</th>\n",
       "      <td>IV27 4XB</td>\n",
       "      <td>102.186633</td>\n",
       "      <td>KW15 1JD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689984</th>\n",
       "      <td>IV27 4XE</td>\n",
       "      <td>102.154803</td>\n",
       "      <td>IV30 5XF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689986</th>\n",
       "      <td>IV27 4XG</td>\n",
       "      <td>102.145813</td>\n",
       "      <td>IV30 5XF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689992</th>\n",
       "      <td>IV27 4XQ</td>\n",
       "      <td>101.753515</td>\n",
       "      <td>IV30 5XF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689989</th>\n",
       "      <td>IV27 4XL</td>\n",
       "      <td>101.681902</td>\n",
       "      <td>KW15 1JD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689985</th>\n",
       "      <td>IV27 4XF</td>\n",
       "      <td>101.569011</td>\n",
       "      <td>IV30 5XF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689987</th>\n",
       "      <td>IV27 4XH</td>\n",
       "      <td>100.444232</td>\n",
       "      <td>KW15 1JD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689929</th>\n",
       "      <td>IV27 4QE</td>\n",
       "      <td>100.420907</td>\n",
       "      <td>HS2 0BB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>690001</th>\n",
       "      <td>IV27 4YR</td>\n",
       "      <td>100.302919</td>\n",
       "      <td>KW15 1JD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689932</th>\n",
       "      <td>IV27 4QH</td>\n",
       "      <td>100.149834</td>\n",
       "      <td>HS2 0BB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>690000</th>\n",
       "      <td>IV27 4YQ</td>\n",
       "      <td>99.836299</td>\n",
       "      <td>KW15 1JD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>690004</th>\n",
       "      <td>IV27 4YU</td>\n",
       "      <td>99.698467</td>\n",
       "      <td>KW15 1JD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>693656</th>\n",
       "      <td>VIV00223</td>\n",
       "      <td>99.570614</td>\n",
       "      <td>HS2 0BB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>693659</th>\n",
       "      <td>VIV00226</td>\n",
       "      <td>99.554083</td>\n",
       "      <td>KW15 1JD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689924</th>\n",
       "      <td>IV27 4PY</td>\n",
       "      <td>99.537784</td>\n",
       "      <td>HS2 0BB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689918</th>\n",
       "      <td>IV27 4PR</td>\n",
       "      <td>99.480622</td>\n",
       "      <td>HS2 0BB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>690002</th>\n",
       "      <td>IV27 4YS</td>\n",
       "      <td>99.442818</td>\n",
       "      <td>KW15 1JD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689919</th>\n",
       "      <td>IV27 4PS</td>\n",
       "      <td>99.378948</td>\n",
       "      <td>HS2 0BB</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        POSTCODE  Minimum Distance Nearest Project Postcode\n",
       "689983  IV27 4XD        102.243906                 KW15 1JD\n",
       "689981  IV27 4XA        102.193813                 KW15 1JD\n",
       "689982  IV27 4XB        102.186633                 KW15 1JD\n",
       "689984  IV27 4XE        102.154803                 IV30 5XF\n",
       "689986  IV27 4XG        102.145813                 IV30 5XF\n",
       "689992  IV27 4XQ        101.753515                 IV30 5XF\n",
       "689989  IV27 4XL        101.681902                 KW15 1JD\n",
       "689985  IV27 4XF        101.569011                 IV30 5XF\n",
       "689987  IV27 4XH        100.444232                 KW15 1JD\n",
       "689929  IV27 4QE        100.420907                  HS2 0BB\n",
       "690001  IV27 4YR        100.302919                 KW15 1JD\n",
       "689932  IV27 4QH        100.149834                  HS2 0BB\n",
       "690000  IV27 4YQ         99.836299                 KW15 1JD\n",
       "690004  IV27 4YU         99.698467                 KW15 1JD\n",
       "693656  VIV00223         99.570614                  HS2 0BB\n",
       "693659  VIV00226         99.554083                 KW15 1JD\n",
       "689924  IV27 4PY         99.537784                  HS2 0BB\n",
       "689918  IV27 4PR         99.480622                  HS2 0BB\n",
       "690002  IV27 4YS         99.442818                 KW15 1JD\n",
       "689919  IV27 4PS         99.378948                  HS2 0BB"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged[['POSTCODE', 'Minimum Distance', 'Nearest Project Postcode']].sort_values(\n",
    "    by='Minimum Distance', ascending=False).head(20)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Show the 20 post codes which are located the furthest from a post code in which Comic Relief have funded a project. The distance in this case is **centroid distance**, which is greater than polygon distance (see for instance the top result: the minimum polygon distance between `HS6 5DL` and `IV51 9DT` is 79.094 km, but the centroid distance is 111.73 km) but is a subjectively better measure of distances between adjacent post codes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>POSTCODE</th>\n",
       "      <th>Minimum Distance</th>\n",
       "      <th>Minimum Distance Centroid</th>\n",
       "      <th>Nearest Project Postcode</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>642866</th>\n",
       "      <td>HS6 5DL</td>\n",
       "      <td>79.094774</td>\n",
       "      <td>111.729833</td>\n",
       "      <td>IV51 9DT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>643062</th>\n",
       "      <td>HS9 5YW</td>\n",
       "      <td>97.247336</td>\n",
       "      <td>105.578325</td>\n",
       "      <td>IV51 9DT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689929</th>\n",
       "      <td>IV27 4QE</td>\n",
       "      <td>100.420907</td>\n",
       "      <td>103.728483</td>\n",
       "      <td>HS2 0BB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689985</th>\n",
       "      <td>IV27 4XF</td>\n",
       "      <td>101.569011</td>\n",
       "      <td>103.334439</td>\n",
       "      <td>IV30 5XF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>690003</th>\n",
       "      <td>IV27 4YT</td>\n",
       "      <td>97.668976</td>\n",
       "      <td>103.241990</td>\n",
       "      <td>KW15 1JD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>690001</th>\n",
       "      <td>IV27 4YR</td>\n",
       "      <td>100.302919</td>\n",
       "      <td>102.885857</td>\n",
       "      <td>KW15 1JD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689986</th>\n",
       "      <td>IV27 4XG</td>\n",
       "      <td>102.145813</td>\n",
       "      <td>102.855545</td>\n",
       "      <td>IV30 5XF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689984</th>\n",
       "      <td>IV27 4XE</td>\n",
       "      <td>102.154803</td>\n",
       "      <td>102.510731</td>\n",
       "      <td>IV30 5XF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>720099</th>\n",
       "      <td>KW14 7TJ</td>\n",
       "      <td>79.766687</td>\n",
       "      <td>102.389255</td>\n",
       "      <td>KW17 2PU</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689992</th>\n",
       "      <td>IV27 4XQ</td>\n",
       "      <td>101.753515</td>\n",
       "      <td>102.341355</td>\n",
       "      <td>IV30 5XF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689983</th>\n",
       "      <td>IV27 4XD</td>\n",
       "      <td>102.243906</td>\n",
       "      <td>102.335801</td>\n",
       "      <td>KW15 1JD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689981</th>\n",
       "      <td>IV27 4XA</td>\n",
       "      <td>102.193813</td>\n",
       "      <td>102.306455</td>\n",
       "      <td>KW15 1JD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689982</th>\n",
       "      <td>IV27 4XB</td>\n",
       "      <td>102.186633</td>\n",
       "      <td>102.300491</td>\n",
       "      <td>KW15 1JD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689928</th>\n",
       "      <td>IV27 4QD</td>\n",
       "      <td>98.620823</td>\n",
       "      <td>102.085076</td>\n",
       "      <td>HS2 0BB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689989</th>\n",
       "      <td>IV27 4XL</td>\n",
       "      <td>101.681902</td>\n",
       "      <td>102.012712</td>\n",
       "      <td>KW15 1JD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689987</th>\n",
       "      <td>IV27 4XH</td>\n",
       "      <td>100.444232</td>\n",
       "      <td>101.951465</td>\n",
       "      <td>KW15 1JD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689932</th>\n",
       "      <td>IV27 4QH</td>\n",
       "      <td>100.149834</td>\n",
       "      <td>101.847465</td>\n",
       "      <td>HS2 0BB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689926</th>\n",
       "      <td>IV27 4QA</td>\n",
       "      <td>99.207035</td>\n",
       "      <td>101.726996</td>\n",
       "      <td>HS2 0BB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689998</th>\n",
       "      <td>IV27 4YL</td>\n",
       "      <td>94.432503</td>\n",
       "      <td>101.615775</td>\n",
       "      <td>IV30 5XF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689927</th>\n",
       "      <td>IV27 4QB</td>\n",
       "      <td>98.351980</td>\n",
       "      <td>101.505845</td>\n",
       "      <td>HS2 0BB</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        POSTCODE  Minimum Distance  Minimum Distance Centroid  \\\n",
       "642866   HS6 5DL         79.094774                 111.729833   \n",
       "643062   HS9 5YW         97.247336                 105.578325   \n",
       "689929  IV27 4QE        100.420907                 103.728483   \n",
       "689985  IV27 4XF        101.569011                 103.334439   \n",
       "690003  IV27 4YT         97.668976                 103.241990   \n",
       "690001  IV27 4YR        100.302919                 102.885857   \n",
       "689986  IV27 4XG        102.145813                 102.855545   \n",
       "689984  IV27 4XE        102.154803                 102.510731   \n",
       "720099  KW14 7TJ         79.766687                 102.389255   \n",
       "689992  IV27 4XQ        101.753515                 102.341355   \n",
       "689983  IV27 4XD        102.243906                 102.335801   \n",
       "689981  IV27 4XA        102.193813                 102.306455   \n",
       "689982  IV27 4XB        102.186633                 102.300491   \n",
       "689928  IV27 4QD         98.620823                 102.085076   \n",
       "689989  IV27 4XL        101.681902                 102.012712   \n",
       "689987  IV27 4XH        100.444232                 101.951465   \n",
       "689932  IV27 4QH        100.149834                 101.847465   \n",
       "689926  IV27 4QA         99.207035                 101.726996   \n",
       "689998  IV27 4YL         94.432503                 101.615775   \n",
       "689927  IV27 4QB         98.351980                 101.505845   \n",
       "\n",
       "       Nearest Project Postcode  \n",
       "642866                 IV51 9DT  \n",
       "643062                 IV51 9DT  \n",
       "689929                  HS2 0BB  \n",
       "689985                 IV30 5XF  \n",
       "690003                 KW15 1JD  \n",
       "690001                 KW15 1JD  \n",
       "689986                 IV30 5XF  \n",
       "689984                 IV30 5XF  \n",
       "720099                 KW17 2PU  \n",
       "689992                 IV30 5XF  \n",
       "689983                 KW15 1JD  \n",
       "689981                 KW15 1JD  \n",
       "689982                 KW15 1JD  \n",
       "689928                  HS2 0BB  \n",
       "689989                 KW15 1JD  \n",
       "689987                 KW15 1JD  \n",
       "689932                  HS2 0BB  \n",
       "689926                  HS2 0BB  \n",
       "689998                 IV30 5XF  \n",
       "689927                  HS2 0BB  "
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "joined[['POSTCODE', 'Minimum Distance', 'Minimum Distance Centroid', 'Nearest Project Postcode']].sort_values(\n",
    "    by='Minimum Distance Centroid', ascending=False).head(20)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's look at some summary stats for both sets of distance calculations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4.3708649416420808"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "joined['Minimum Distance'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4.6501219489666115"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "joined['Minimum Distance Centroid'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2.3061707476917683"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "joined['Minimum Distance'].median()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2.5462807258789875"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "joined['Minimum Distance Centroid'].median()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    1.673695e+06\n",
       "mean     4.370865e+00\n",
       "std      5.529624e+00\n",
       "min      0.000000e+00\n",
       "25%      9.216085e-01\n",
       "50%      2.306171e+00\n",
       "75%      5.910296e+00\n",
       "max      1.022439e+02\n",
       "Name: Minimum Di, dtype: float64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "joined['Minimum Distance'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    1.673695e+06\n",
       "mean     4.650122e+00\n",
       "std      5.663566e+00\n",
       "min      0.000000e+00\n",
       "25%      1.105992e+00\n",
       "50%      2.546281e+00\n",
       "75%      6.253126e+00\n",
       "max      1.117298e+02\n",
       "Name: Minimum Distance Centroid, dtype: float64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "joined['Minimum Distance Centroid'].describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "These are Matplotlib parameters -- you can ignore this."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "rc('font', **{'family':'sans-serif',\n",
    "    'sans-serif':['Helvetica'],\n",
    "    'monospace': ['Inconsolata'],\n",
    "    'serif': ['Adobe Garamamond Pro']}\n",
    ")\n",
    "\n",
    "rc('text', **{'usetex': True})\n",
    "rc('text', **{'latex.preamble': '\\usepackage{sfmath}'})\n",
    "\n",
    "save_args = {\n",
    "    \"format\": \"png\",\n",
    "    \"bbox_inches\": 'tight',\n",
    "    \"alpha\": True,\n",
    "    \"transparent\": True,\n",
    "    \"dpi\": 200\n",
    "}\n",
    "fontsize = 10"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Plot a histogram of both sets of distance measurements. Use a log scale for the `y` axis, in order to stop it being gigantic, due to the fact that so many (75%) postcodes are within ~16km of a project."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEiCAYAAAD5+KUgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGvZJREFUeJzt3cFzG0d2x/Hfs7dyjGjKLN8Um9Le1xT0B+yK9HprS4es\nSfkPiEUqd5uUcnH5EhlyrqktUsk9CpXsQeWy14DyD0ik9w9YwpujSxIlXlPlfTlMjziEAGKaRM9g\nwO+nCkVwgJl+AzTmzXTP9Ji7CwCAN+oOAAAwGUgIAABJJAQAQEBCAABIIiEAAAISAgBAEgmhkcxs\nxszczHbCY8/MVk+wjOUB0+fNrFNYbjtyuXsR5RfXYSePx8yWzWw9Ju46mdlqf7xhWid8hgth2nrf\n+nr4vLcLn/lCYRmbhfcujiozTN8zs/nC/6euK4Xl1FZfUBF359Gwh6QZSXt9/3vkMuYlbQ+Yvidp\nvvB/R9JyxHL3Sr5v0DocKTsm7hq/i44kl7TeF+NO//NB6yFpVVI7TFuQ1AnPFwvPF4rLGFRmmL4e\nphe/v1PXlUmoLzyqefzsZGkEE2Y2f2Jm28p+9LOSNty9G/YYN8NbXkq6IWlD0qKZLbv7gzDvsqSu\nu/cKy14ZsewZZRu2fNkqvH9TUiv8e8Pdd4etgLu/DHuXa2b2WNKVEPOxcZtZp7CYzTBtUdJaIdY7\nhXUsrsOKu/f64wxlHSnX3Y+sWyHupbDHPVOYvCzpfni9Z2ZXB8y6qeyznZXULUzPy9kvTJuV9OS4\nMsN3vCRp6GdcWNYrk1xfFPE9YDxICM01b2Y7hf9XQhNCz903wg/vB0lvKdtA7Ybpi8p+oG1Js/mP\nO1+msj2+V/If4DHLvq1sT/ZuaO54FN6/Gua/HN6/I+niiHXal3S58P+xcecbrvB8IbyWr898X9kP\nwjo8LsS6aGb5ehbfuzmg3JgN0Xll308nzLuhwkY/bEg74bPNP99NZUcLl0M8u2aWN6nMq7ChHWJT\nWRLcHvDaa3UllDnp9eW03wMi0YfQXD13v1x4PFC2we1Ihz/MYEuSwgZqRUf3Po8sU30bbTNbCD/W\nYcteUNjY9R0BXJbUCnuJ91Tuh9z/gx8V976kpbAxXet7LY+puLwrxVjdfWtInGU/r2O5+5Kkq3p9\nI307L6Pw3jVln/F2KHtd2cbwYph+b1g54fvp9O2pFw2qK9Lk15exfA8oj4QwXfaUNRvkTQi565Lu\nhw3UnrI90deEDcVisWNT2Z7h/jHL3lXW3q2++XaUNSesuPuKQhPKMGGvcEOHTQRl4r6trG192J5x\nv14h1sXQRDUozlKf1zE6GrLxyj+7wp50u9DJu6/DJp3zkp4Xph/nsrLE2FFo6+/7joaZ9Ppy2u8B\nkWgymiLhMHy70FSRNzM8UbaR6Cnb89pQ9qNdKLYJByuS2mY2q6z990Gx/X3Asu+EZS+FZfdCLFuF\n90tHN/S5/qaMjdDmnm8ojo1b2UajHcruheUVNzL9n89G/zqE8vrj7PWV+69m9sLd3xq27L5yuma2\nFNZtRll7eO5V/0KQf375EU7/5/px3/RB5b06OgplrhxztFCcb9LrS//3sDFqnXA65s5op8AoZrYa\nmpiAqUWTEVDOyD1uoOk4QgAASOIIAQAQkBAAAJJICACAgIQAAJBEQgAABCQEAIAkEgIAICAhAAAk\nkRAAAAEJAQAgiYQAAAhICAAASSQEAEBAQgAASCIhAAACEgIAQBIJAQAQkBAAAJJICACA4Gd1BxDj\nww8/9G+//bbuMDC9rKZyubE5Uipdrxt1hPDs2bO6QwCAqdWohAAASIeEAACQ1JCEYGbXzGzr4OCg\n7lAAYGo1IiG4+0N3Xz137lzdoQDA1GpEQgAApEdCAABIIiEAAAISAgBAUsOuVB7mg9/8VvtPf4ye\nb3buHX33zdcJIgLGg7qNKk1FQth/+qOeLX4RP2P38/EHA4wRdRtVqr3JyMzmzWw7PObrjgcAzqpJ\nOEJYk7QvSe7eqzkWADizkh0hmNmqma33TWubWcfMdgpHA/OS2pK2zWw5VTwAgOMlSQhm1pG02Tdt\nQdKCuy9JulF4fb/wmE0RDwBgtCRNRu6+ZGarkmYKkxcldcLru2bWCtPbkrYlvVSWKAAANaiyD+G8\npMf9E0O/wdKwmUJiWZWkCxcuJAsOAM66Ks8yeq6svyA3M+yNRe6+5e4td2/Nzc2liQwAUGlC6Coc\nCYT+hG7ZGRn+GgDSqywhuPuupN3Q4dxWdrpp2XkZ/hoAEkvWh+DuWwOmbaQqDwBwOrVfqVwGTUYA\nkF4jEgJNRgCQXiMSAkcIAJBeIxICRwgAkF4jEgIAIL1GJASajAAgvUYkBJqMACC9RiQEAEB6JAQA\ngKSGJAT6EAAgvUYkBPoQACC9RiQEAEB6JAQAgKSGJAT6EAAgvUYkBPoQACC9RiQEAEB6JAQAgCQS\nAgAgICEAACQ1JCFwlhEApNeIhMBZRgCQXiMSAgAgPRICAEASCQEAEJAQAACSSAgAgKARCYHTTgEg\nvdIJwczeNbPfmdmd8PfddGEdxWmnAJDeyIRgZu+b2X9K2pB0XlI3/L1lZvfN7BeJYwQAVOBnJd7T\ncvfrfdMeSbonSWZ2Q9Kfxh0YAKBaIxOCu987zeuTzN94U61WK3q+2bl39N03XyeICADqU+YIQZJk\nZk8k9Tfi9yStuftfxhlUVeyvP+nZ4hfxM3Y/H38wwBixs4OTKJ0QJD2RtO3uj8xsUdKipPuSNiX9\nOkVwAE6GnR2cRMxpp++5+yNJcveupPfd/XtJnAsKAFMg5gjhwMw+VXaW0ZIkM7NfpQkLAFC10kcI\n4UyjtyX9k7LTTlckmaQbpwnAzJbNrGNm22Y2f5plAQBOLuYIQe5+q2/SozHEMK+sc1ru3hvD8gAA\nJxBzpfJVM9s3s+fh8csR7181s/W+ae1wNLBTOBp44O5rkrbNbDl+FQAA4xDTqfylso7l85KuSLo7\n7I1m1lF29lFx2oKkBXdfUtbMlL++EP7uKztaAADUIKbJ6IW7H0hZ046ZvRj2RndfMrNVSTOFyYuS\nOuH1XTPLT5J+aWbb4fmp+iMAACcXkxBemtkdZRv1DyS9jCzrvKTH/RPDKazdYTOFxLIqSRcuXIgs\nEgBQVuxZRvvKzi56NmB8o1Ge62iT0MywN/aVu+XuLXdvzc3NRRYJACirzGinX4Yhr+8o28s/kPR2\n+D9Gfv1C3p8w9KhgQAzcDwEAEivTZPRaM89JhH6D3dDhLElrEfM+lPSw1WrRxwAAiZRJCFck/Ye7\nvzbEtZm9L+m6u9/uf83dtwZM2zhJkGZ2TdK1S5cunWR2AEAJZYa/vmVmn5nZXUkvlPUjnFc28mln\nUDIYN44QACC9UmcZuftXkr4ys3MKVxbnp6ACAKZDzIVpcvcDd/++6mRApzIApBeVEOri7g/dffXc\nuf778wAAxiVmLKO/NbN3w99PzezddGEBAKoWc4RwT9JFSW1lw163k0Q0AE1GAJBeTEKYCXdMmw+d\nzKWuNB4HmowAIL2YhGDh6uTvw/UHFxPFBACoQUxCWFM2HtE/S7qsbEyjStBkBADpRTUZSbqp7C5p\nb0l6L0lEA9BkBADpxSSELWVHBj+EPoTSYxEBACZf9IVpkjz8a+MPBwBQl5iEsGNmv5c0HzqXY2+Q\nAwCYYDE3yLkpaVfSjrKxjGJvkHNidCoDQHqxTUb33P2mu99LFdCQculUBoDERo52amZ/1mG/wWzh\npZ67X0kSFQCgciOPENz9krv/XNL3khbd/byyW2H+kDo4AEB1YpqMzrn791J2O0xlN8gBAEyJ2KEr\nPjWzX5jZZ6rwtFM6lQEgvZiEsCLpbUl3ld1Cs7KhK+hUBoD0St1CU3p1UdqthLEAAGoUc4Ocq2a2\nb2bPw+OXKQMDAFQrpsnoS0nvhbOMrihrOgIATInSTUaSXoRmI7l7z8xeJIpp4vkbb6rVakXPNzv3\njr775usEEQHA6cUkhJdhDKOOpA90hscysr/+pGeLX8TP2P18/MEAwJjEjGV0XdK+pOuSnlU5lhEA\nIL2YTuV3lTUb3ZT0tpn9XaqgBpTNdQgAkFhMp/K2pCfheVfSg/GHMxjXIQBAerGdyn+SJHfvmtl6\nopgA1IQTJs622E7lT5UdHSzqDHcqA9OKEybOtthOZZP0T5LO06kMANMl5ghB7v5VqkAAAPWKumMa\nAGB6xZx2+rvj/gcANFuZW2h+JOljSVfN7ON8sqT3Jf33OIIwsxlJ2+6+NI7lAQDilelD6EralbQh\nqV2Yvj/GOG6Ls5YAoFZl7ql84O4/KEsILum5pI8kvXXcfGa22n+tgpm1zaxjZjtmNh+mLUu6r/Em\nGABApJhO5S1JF5UdJZiOHi0cYWYdSZt90xYkLYRmoRuF15ckrUlaDMkBAFCDmIQw4+6PJM2H009n\nhr0xbPTX+iYvKhspVe6+K6kVnq+5+5qkrrtXNhwGAOComIRgYfjr783sF8qOFmKcl9Qb9mJICoMK\nXTWzJ2b25OnTp5FFAgDKikkIa8ra+e8ou2PaSmRZzyXNF/4feoRR5O5b7t5y99bc3FxkkQCAsmKG\nrvjB3b8Kncz33P37yLK6yvoL8v6EbtkZGf4aANKLuTDtqpntm9nz8PhlTEGh32A3dDi39Xofw3Hz\nMvw1ACQWM5bRl5Lec/eDcMrofWVNRwO5+9aAaRvxIQIAqhDTh/DC3Q8kyd17kl6kCel1NBkBQHox\nCeGlmd0xs1+Z2Zeq8MpimowAIL3Y+yHsS7ou6XmV90PgCAEA0osa/jpckPZl1fdF4AgBANKLOcvo\nIzPbl7QZzjL6+4RxAQAqFnOW0S13n83/MbPHkv4w/pBeZ2bXJF27dOlSFcUBwJkUdZbRiP+TockI\nANKLOULomdkflQ1QtyTJzewTSXL3f0sRHACgOjEJYS88TIfDThx7TwQAQHOUTghVn1lURB8CAKQX\nddppXehDAID0RiaEvJ/AzH6XPhwAQF3KNBldN7MVSS0zy0coNUnu7r9OFxoAoEojE4K7fxBGN13X\nMfdRTok+BABIr1Qfgrv33P1muEnOq0fq4Arl04cAAInFDF3xq9PcIAcAMNlizjJqK7tBznllN8a5\nmyYkAEAdGnGDHABAejFXKr80szvKhq74QBXeIAcAkF7MlcrXzewzZTfI2XP3W+nCOmpazjLyN95U\nq9WKnm927h19983XCSICgEMxRwi1DV/h7g8lPWy1WjfqKH9c7K8/6dniF/Ezdj8ffzAA0CfmLKN3\n04UBAKhbzBFCR9LPUwUCoLlO0hxKU+jkiUkIj8Jd0vKhr+Xut8cfEoCmOVFzKE2hEyf2CKGTKhAA\nQL1izjL6r5SBAADqFdOp/L6Z/dnMHpvZp1UOh21m18xs6+DgoKoiAeDMiblSeUvSZUk/uPu/SFob\n8f6xYXA7AEgv6o5pYegKD//a+MMBANQlJiHsmNnvJc2HISwYugIApkjphODuNyXtStqR1HP368mi\nAgBULnbointm1nH3vySKB8AZwdhek6d0QjCzjyTdk/TEzC5L+sTd/5AsMgBTjbG9Jk/MEcItd5+V\nJDObUXaRGgmhAuxJAahCTEJ4dUMcd39pZtwgpyLsSQGowsiEULgA7aWZ/VGHN8gZS0IwswVl1zTM\nuvvKOJaJDAOOAYhR5gjhYvj7uDCto8PrEU6r5e5rZrZsZovu3h09C8pgwDEAMUYmhPymOGb2vqSP\nJc3kLx03n5mtSppx97uFaW1JC5JmJa24e8/dt8xsUdJtSVdPtBYAgFOLHbrisaTt8Hgw7I1m1pG0\n2TdtQdKCuy9JupG/bmbL4ajghrLbcwIAahDVqVx2xFN3X8qPEAqTFxWGz3b3XTN71bhtZtuS9iVt\nRMQDABijqPshhE7lXj7B3f8xYv7zOtoPkS/jgY4/2liVtCpJFy5ciCgOABAjJiGsKduDP+kYRs8l\nzRf+nxn2xiJ331LWXKVWqzWujmwAQJ+YhLB7ypvkdCW1Jd0N/QmlzyYys2uSrl26dOkUxQOYBlyo\nmU5MQpgJTUa7+YSYeyqHfoPd0OEsRdxPwd0fSnrYarVulI4WwFTiQs10YhJCO2bBoamnf9qJOo05\nQgCA9GLuqfwoZSAjyuYIAQASixnt9In6LkZz9ytjjwgAUIuYI4TidQPLyu6vXAmajAAgvah7KufC\ntQMLY47luPIeuvvquXPnqioSAM6cmCajz3TYZPS2JEsSEQCgFjFnGfUKz39w91vjDmYYmowAIL2R\nTUZm9omZfSLpreLDzP4hdXA5mowAIL0yRwhv9f3vkm6G6f8+9ogAALUofT8ESTKz95QNW91195sp\nAyuiyag6DAsAnF0xnco3JK1LWnP3/0kX0uu4MK06DAsAnF1l7qn8rrIb4jxx95+nDggAUI8yRwi9\n8DhvZveLL7j7x0miAgBUrkxCWEoeBQCgdmU6lWsb1C5HpzKA0+KEidFiLkyrDZ3KAE6LEyZGO9FY\nRgCA6UNCAABIIiEAAIJGJAQzu2ZmWwcHB3WHAgBTqxEJgcHtACC9RiQEAEB6JAQAgCQSAgAgICEA\nACSREAAAAQkBACCpIQmB6xAAID0Gt8NYMJIk0HyNSAiYfIwkCTRfI5qMAADpkRAAAJJICACAgIQA\nAJBEQgAABLUnBDNbMLPt8JivOx4AOKtqTwiSWu6+IumOpOW6gwGAsypZQjCzVTNb75vWNrOOme3k\nRwPuvmVmM5LWJD1IFQ8A4HhJEoKZdSRt9k1bkLTg7kuSbuSvh+m33X3N3Xsp4gEAjJYkIYSN/lrf\n5EVJnfD6rqR8nIPbkuZDHwJNRgBQkyqHrjgv6XH/xNB/MJSZrUpalaQLFy6kiQwAUGmn8nNJxbOI\nZsrM5O5b7t5y99bc3FyayAAAlSaErqQl6VW/QbfsjAx/DQDpVZYQQr/Bbuhwbuv1Pobj5n3o7qvn\nzp1LFh8AnHXJ+hDcfWvAtI1U5QEATmcSLkwbiSYjAEivETfI4Y5pAOpy0rsB6s2/kX76v+jZ6ryL\nYCMSgpldk3Tt0qVLdYcC4Iw56d0A3+5+3ri7CDYiIXCEgH4f/Oa32n/6Y/R83MMZGK4RCQHot//0\nx8btfQGTjk5lAICkhiQErkMAgPRoMkKtTnwGB4CxIyGgVqc5gwPAeDWiyYg+BABIrxEJgT4EAEiv\nEQkBAJAeCQEAIKkhCYE+BABIrxEJgT4EAEivEQkBAJAeCQEAIEkyd687htLM7Kmk/x3w0tuSnlUc\nziDEcVTT4njm7h+mDqafmX2rLMZB6vwMKXs6yi5drxuVEIYxsyfuXvv4B8RBHONWZ+yUfbbKlmgy\nAgAEJAQAgKTpSQhbdQcQEMdRxHF6dcZO2Wer7OnoQwAAnN60HCEAAE6JhAAAkDQFCcHM2mbWMbMd\nM5uvuOzNUPaemS0Xpr8I8eyY2WYFcQwsr6rPxszWC+XvmJnn5VXxWZjZqpmt900buO511pcYdcRZ\nV32uq/7WVW8nur66e2MfkhYkdfqfV1T2oqTN8HxG0ovwfF7SdoVxDCyvrs+mGE8Vn4WkjiSXtD5q\n3eusL5HrVHmcddXnSam/VdXbSa+vTT9CWFT2AcvddyVVeUFHT1I7lP1S0n6YPi9p3sy2Q2ZfSBzH\nsPLq+mw2Jd0YEdvYuPuSpLW+ycPWvc76EqOOOOuqz5NSfyupt5NeX5t+T+Xzkh7XUbC79yQpHMZt\nK/yYlP2Q7rj7g1CRtiVdTBjKsPIq/2xCM0MnbFCOiy21YeteW32JVHmcNdbn2uvvBNTbiamvTU8I\nz5Vl89xMlYWHdsCPJd0IGTzP5K+em9msmc0UKttYDStP9Xw2tyVdHRVbqs+iYNi611pfItQSZx31\neULqb931dmLqa9ObjLqSliQpZPJuVQWb2aKkJXe/nP94wvT1vMMo7G3tp9wAHlNepZ9N3uFVXNeq\nP4uCYeteW32JVHmcddXnuuvvhNTbiamvjT5CCNl718w6YVJ/21xKS5JaZrZXiOeiu98NbY87YfJK\nyiCGlVfDZ7Ms6X6Z2FIbtu4115fSaoqzlvo8AfW39no7SfWVK5UBAJKa32QEABgTEgIAQBIJAQAQ\nkBAAAJJICACA4MwnBDObCYNa5QNZ7ZnZ6jHvXzaz9rDXE8S2PPqdR94/kesSymtbNrDX0JiGzLda\nwRAgU8XM5u1wULS9k37PJ6iDy9Y3cFuYvjdgWuyyK1+nmPUJ0xtdx898Qgh64YKcy5IuKxvXZBLM\nKrtyNMZErkv4Ud0f+cYB3H1L2dWkKK8jaS3UhYuSFmI2vgVRddDdH7j73RTLVg3rFLM+01DHSQiv\nm82f2OHgVjvhSk4VXtvMpxX3tAvzbBYubBm4LDNb7JveX7k3JC3m04+Lp851KbEeUnYVbPEK2Jnw\n3oUwfycsY8+yK0XzZeV7Tfs2wcNVT5Lw+Xfz8YmCFYUrXfPvs/j5HvMdvqqD4bEZvqN5G1ynl8Ne\n8kx4rWNm20NCLV2/61qnyPVpfh1PPZzqpD+UjQ/iknYKj2VJ65LahffkwwEvKxv4qzhccEfZmCNt\nhWFtw+t74fmwZS1K2ilM3+uLrTgk78BlTMK6lFiPmcJ6rIZl70laKCy/U4ip+LxdmG+57vrShEf4\njtaHvLaqo8Nc59/rwO+wrw4uF94zqk4V68/CkPpaun7XtU5l12da6jhHCJlXzSzh8UDZ6Ib50LOv\njWPi7l1lexkzkmY923OZV9hjCa/njltWtzg97JFs2+vtowOXMeD9da1Ld8C03LwOh1OWsh9BXkYu\n37N6GV7Ln+cDevUkXRmwbLyup77ROcNe6qqyZsRW2Mu9p+wzzh33HR55j0bUKWUbzXx5xb3m0vW7\n7711r9PA9SmYijpOQhhuT4cDSw07jOsqGxo3b6fvKdsTyAcLi1mWpFdtlivuvlEmnmPeX/u6FPRU\naL5Stve0IqkdklAZ82rG0NW1CzsBi3a0k7KtbIO1o6zpZcXdV3TCNm+Nrge7Oqw/r+KIqd/F907A\nOg1cn4KpqOONHtwuJT8c4Kqj7IseNMDVprLDzaXw/x1J22a2pMO9gbLLGmRfoePsFMuofV3c/WX/\njyJM21C2R1em4/uyDsfox2j5xmhW2R7og7BRVeH7k0Z/9q/qYHFiiXpQrD/FPeKByy5Zr+pYp7ze\nHrs+01LHGdzuFMKe84q7rxX+l7t3w15Eu7CBnWip18WyMzC6Qw63y8y/Hfb+gIk0DXWchHBCYe/i\ntrKNaH63qRllewNStmex5kfPiphIVa2LmbVHNG0Nm29V0pOT/tCAqjS9jpMQAACS6FQGAAQkBACA\nJBICACAgIQAAJJEQAAABCQEAIEn6f3go3Ap7mNr3AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x147f7c2d0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.clf()\n",
    "fig = plt.figure(1, figsize=(7., 10.), dpi=100)\n",
    "ax = fig.add_subplot(121)\n",
    "h = ax.hist(joined['Minimum Di'], 10, log=True, ec='#333333')\n",
    "\n",
    "ax.grid(b=False)\n",
    "ax.spines['top'].set_visible(False)\n",
    "ax.spines['right'].set_visible(False)\n",
    "ax.set_xlabel(u'Polygon-to-Polygon (km)', fontsize=fontsize)\n",
    "ax.set_ylabel('Number of post codes (log)', fontsize=fontsize)\n",
    "\n",
    "\n",
    "ax2 = fig.add_subplot(122, sharey=ax)\n",
    "h2 = ax2.hist(joined['Minimum Distance Centroid'], 10, log=True, ec='#333333')\n",
    "\n",
    "ax2.grid(b=False)\n",
    "ax2.spines['top'].set_visible(False)\n",
    "ax2.spines['right'].set_visible(False)\n",
    "ax2.spines['left'].set_visible(False)\n",
    "ax2.set_xlabel(u'Centroid-to-Centroid (km)', fontsize=fontsize)\n",
    "\n",
    "ax2.tick_params(\n",
    "    axis='y',          # changes apply to the x-axis\n",
    "    which='both',      # both major and minor ticks are affected\n",
    "    left='off')\n",
    "ax2.yaxis.set_tick_params(size=0)\n",
    "ax2.yaxis.set_visible(False)\n",
    "\n",
    "\n",
    "plt.suptitle(\"Post Code Distances, 1673814 Post Codes\", fontsize=fontsize)\n",
    "plt.savefig(\"postcodes.png\", **save_args)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "joined.to_file(\"joined_postcodes_centroids.shp\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can use [Chebyshev's theorem](http://www.philender.com/courses/intro/notes3/chebyshev.html) to calculate distributions:\n",
    "\n",
    "Mean: `4.650122`  \n",
    "Standard deviation: `5.663566`  \n",
    "Chebyshev: $1 - (1/k^2)$  \n",
    "where `k` is the number of standard deviations."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def chebyshev(mu, sigma, stddev):\n",
    "    \"\"\"\n",
    "    Use Chebyshev's theorem to calculate distributions\n",
    "    mu: mean\n",
    "    sigma: the standard deviation\n",
    "    stddev: the number of standard deviations you wish to calculate\n",
    "    \n",
    "    In this case, we can't have a distance less than 0.0, so it's\n",
    "    not mu +/- (sigma * stddev), just mu + (sigma * stddev)\n",
    "    \n",
    "    Returns a tuple:\n",
    "    the percentage of the distribution, and the distance, in this case.\n",
    "    \n",
    "    \"\"\"\n",
    "    return (1. - (1. / stddev ** 2)), mu + (sigma * stddev)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((0.75, 15.977254), (0.8888888888888888, 21.64082), (0.9375, 27.304386))"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# get 75%, 88%, and 93.75% values\n",
    "chebyshev(4.650122, 5.663566, 2),\\\n",
    "chebyshev(4.650122, 5.663566, 3),\\\n",
    "chebyshev(4.650122, 5.663566, 4)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Therefore:\n",
    "- 75% of post codes (2 standard deviations) lie within **15.97 km**\n",
    "- 88% of post codes (3 standard deviations) lie within **21.64 km**\n",
    "- 93.75% of post codes (4 standard deviations) lie within **27.30 km**\n",
    "\n",
    "of a funded project, measured by **centroid distance**"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Convert km to miles: multiply by 0.621371"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}