{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import os\n",
"import geopandas as gp\n",
"import pandas as pd\n",
"from matplotlib import pyplot as plt\n",
"from matplotlib.collections import PatchCollection\n",
"from matplotlib import rc\n",
"from shapely.geometry import LineString, Polygon, Point, box, shape\n",
"from shapely.ops import cascaded_union\n",
"from fiona.crs import from_epsg\n",
"from rtree import index"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def nearest_postcode_distance(df, df2, sindex):\n",
" \"\"\"\n",
" Find nearest neighbour to df in df2 spatial index\n",
" Then, calculate distance between nearest neighbour geometry and df geometry\n",
" Return distance and postcode\n",
" Note: distance between adjacent postcodes is 0.0\n",
" \"\"\"\n",
" idx_pos = sindex.nearest(df.geometry.bounds, 1).next()\n",
" return df.geometry.distance(df2.iloc[idx_pos].geometry) / 1000., df2.iloc[idx_pos]['POSTCODE']"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# CRS has been converted from 27700 to 25832 (ETRS89) for Euclidean distance measurement\n",
"merged = gp.GeoDataFrame.from_file(\"merged_postcodes_centroids.shp\")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Distance P</th>\n",
" <th>Minimum Di</th>\n",
" <th>PC_AREA</th>\n",
" <th>POSTCODE</th>\n",
" <th>UPP</th>\n",
" <th>geometry</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>AB10 1AN</td>\n",
" <td>0.08436385277905387</td>\n",
" <td>AB</td>\n",
" <td>AB10 1AB</td>\n",
" <td>00000000000000000002</td>\n",
" <td>POLYGON ((-169708.694024045 6388795.9234469, -...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>AB10 1AN</td>\n",
" <td>0</td>\n",
" <td>AB</td>\n",
" <td>AB10 1AL</td>\n",
" <td>00000000000000000003</td>\n",
" <td>POLYGON ((-169614.706281362 6388752.870788309,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>AB10 1AN</td>\n",
" <td>0</td>\n",
" <td>AB</td>\n",
" <td>AB10 1AN</td>\n",
" <td>00000000000000000004</td>\n",
" <td>POLYGON ((-169546.2997755085 6388773.476457709...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>AB10 1AN</td>\n",
" <td>0.0233134882466831</td>\n",
" <td>AB</td>\n",
" <td>AB10 1AP</td>\n",
" <td>00000000000000000005</td>\n",
" <td>POLYGON ((-169690.3220984091 6388713.581593464...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>AB10 1AN</td>\n",
" <td>0.1438272018464478</td>\n",
" <td>AB</td>\n",
" <td>AB10 1AS</td>\n",
" <td>00000000000000000006</td>\n",
" <td>POLYGON ((-169779.7872413595 6388697.504694026...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Distance P Minimum Di PC_AREA POSTCODE UPP \\\n",
"0 AB10 1AN 0.08436385277905387 AB AB10 1AB 00000000000000000002 \n",
"1 AB10 1AN 0 AB AB10 1AL 00000000000000000003 \n",
"2 AB10 1AN 0 AB AB10 1AN 00000000000000000004 \n",
"3 AB10 1AN 0.0233134882466831 AB AB10 1AP 00000000000000000005 \n",
"4 AB10 1AN 0.1438272018464478 AB AB10 1AS 00000000000000000006 \n",
"\n",
" geometry \n",
"0 POLYGON ((-169708.694024045 6388795.9234469, -... \n",
"1 POLYGON ((-169614.706281362 6388752.870788309,... \n",
"2 POLYGON ((-169546.2997755085 6388773.476457709... \n",
"3 POLYGON ((-169690.3220984091 6388713.581593464... \n",
"4 POLYGON ((-169779.7872413595 6388697.504694026... "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"merged.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create \"funded\" dataframe from CR-supplied data, and create a spatial index for it"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df_funded = pd.read_excel(\"/Users/sth/Downloads/cr_funded.xlsx\")\n",
"df_funded_geo = merged[merged['POSTCODE'].isin(list(df_funded['Grants Project Postcode']))]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"merged.sort_index(inplace=True)\n",
"df_funded_geo.sort_index(inplace=True)\n",
"df_funded_geo['Funded Project Postcode'] = df_funded_geo[u'POSTCODE']\n",
"df_funded_geo.rename(columns={'Grants Project Postcode': u'POSTCODE'}, inplace=True)\n",
"sindex = df_funded_geo.sindex"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index([ u'Distance P', u'Minimum Di',\n",
" u'PC_AREA', u'POSTCODE',\n",
" u'UPP', u'geometry',\n",
" u'Minimum Distance', u'Nearest Project Postcode',\n",
" u'Funded Project Postcode'],\n",
" dtype='object')"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_funded_geo.columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Calculate minimum distance in `km` and post code \n",
"This will result in distances of 0.0 for postcodes adjacent to postcodes in which a project has been funded. **CAUTION**: this will take a long time (20 - 45 minutes, depending on processor speed), and will use a lot of memory."
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"merged['Minimum Distance'], merged['Nearest Project Postcode'] = zip(*merged.apply(\n",
" nearest_postcode_distance, axis=1, args=(df_funded_geo, sindex)))"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index([ u'PC_AREA', u'POSTCODE',\n",
" u'UPP', u'geometry',\n",
" u'Minimum Distance', u'Nearest Project Postcode'],\n",
" dtype='object')"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"merged.columns"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index([u'PC_AREA', u'POSTCODE', u'UPP', u'geometry',\n",
" u'Funded Project Postcode'],\n",
" dtype='object')"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_funded_geo.columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Join the funded projects DataFrame to the postcode DataFrame \n",
"This is an intermediate step -- we'll be using the new geometry column to calculate\n",
"centroid distance between the nearest postcodes\n",
"\n",
"Duplicate columns from the funded projects df have the `_funded` suffix."
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"joined = merged.join(\n",
" df_funded_geo.set_index(u'POSTCODE'),\n",
" on = \"Nearest Project Postcode\",\n",
" rsuffix=u'_funded'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index([ u'PC_AREA', u'POSTCODE',\n",
" u'UPP', u'geometry',\n",
" u'Minimum Distance', u'Nearest Project Postcode',\n",
" u'PC_AREA_funded', u'UPP_funded',\n",
" u'geometry_funded', u'Funded Project Postcode'],\n",
" dtype='object')"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"joined.columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Calculate the [centroid](https://en.wikipedia.org/wiki/Centroid) distance in `km` between each UK postcode, and the nearest postcode in which Comic Relief have funded a project. \n",
"\n",
"See the accompanying image for an explanation of the difference between the two measurements."
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"joined['Minimum Distance Centroid'] = joined.apply(lambda df:\n",
" df.geometry.centroid.distance(df[u'geometry_funded'].centroid) / 1000., axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 192,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"PC_AREA AB\n",
"POSTCODE AB11 5QH\n",
"UPP 469\n",
"geometry foo\n",
"Minimum Distance 1\n",
"Distance Postcode AB10 1AN\n",
"Name: 721, dtype: object"
]
},
"execution_count": 192,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# fix AB11 5QH -- its polygon is wrong\n",
"# merged.iloc[721]['Minimum Distance'] = 1.0\n",
"# merged.iloc[721]['Distance Postcode'] = 'AB10 1AN'\n",
"# merged.iloc[721]\n",
"\n",
"# AB11 5QH HAS BEEN REMOVED DUE TO AN INVALID POLYGON."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Show the 20 post codes which are located the furthest from a post code in which Comic Relief have funded a project. The distance in this case is **polygon distance**."
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>POSTCODE</th>\n",
" <th>Minimum Distance</th>\n",
" <th>Nearest Project Postcode</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>689983</th>\n",
" <td>IV27 4XD</td>\n",
" <td>102.243906</td>\n",
" <td>KW15 1JD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689981</th>\n",
" <td>IV27 4XA</td>\n",
" <td>102.193813</td>\n",
" <td>KW15 1JD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689982</th>\n",
" <td>IV27 4XB</td>\n",
" <td>102.186633</td>\n",
" <td>KW15 1JD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689984</th>\n",
" <td>IV27 4XE</td>\n",
" <td>102.154803</td>\n",
" <td>IV30 5XF</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689986</th>\n",
" <td>IV27 4XG</td>\n",
" <td>102.145813</td>\n",
" <td>IV30 5XF</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689992</th>\n",
" <td>IV27 4XQ</td>\n",
" <td>101.753515</td>\n",
" <td>IV30 5XF</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689989</th>\n",
" <td>IV27 4XL</td>\n",
" <td>101.681902</td>\n",
" <td>KW15 1JD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689985</th>\n",
" <td>IV27 4XF</td>\n",
" <td>101.569011</td>\n",
" <td>IV30 5XF</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689987</th>\n",
" <td>IV27 4XH</td>\n",
" <td>100.444232</td>\n",
" <td>KW15 1JD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689929</th>\n",
" <td>IV27 4QE</td>\n",
" <td>100.420907</td>\n",
" <td>HS2 0BB</td>\n",
" </tr>\n",
" <tr>\n",
" <th>690001</th>\n",
" <td>IV27 4YR</td>\n",
" <td>100.302919</td>\n",
" <td>KW15 1JD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689932</th>\n",
" <td>IV27 4QH</td>\n",
" <td>100.149834</td>\n",
" <td>HS2 0BB</td>\n",
" </tr>\n",
" <tr>\n",
" <th>690000</th>\n",
" <td>IV27 4YQ</td>\n",
" <td>99.836299</td>\n",
" <td>KW15 1JD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>690004</th>\n",
" <td>IV27 4YU</td>\n",
" <td>99.698467</td>\n",
" <td>KW15 1JD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>693656</th>\n",
" <td>VIV00223</td>\n",
" <td>99.570614</td>\n",
" <td>HS2 0BB</td>\n",
" </tr>\n",
" <tr>\n",
" <th>693659</th>\n",
" <td>VIV00226</td>\n",
" <td>99.554083</td>\n",
" <td>KW15 1JD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689924</th>\n",
" <td>IV27 4PY</td>\n",
" <td>99.537784</td>\n",
" <td>HS2 0BB</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689918</th>\n",
" <td>IV27 4PR</td>\n",
" <td>99.480622</td>\n",
" <td>HS2 0BB</td>\n",
" </tr>\n",
" <tr>\n",
" <th>690002</th>\n",
" <td>IV27 4YS</td>\n",
" <td>99.442818</td>\n",
" <td>KW15 1JD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689919</th>\n",
" <td>IV27 4PS</td>\n",
" <td>99.378948</td>\n",
" <td>HS2 0BB</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" POSTCODE Minimum Distance Nearest Project Postcode\n",
"689983 IV27 4XD 102.243906 KW15 1JD\n",
"689981 IV27 4XA 102.193813 KW15 1JD\n",
"689982 IV27 4XB 102.186633 KW15 1JD\n",
"689984 IV27 4XE 102.154803 IV30 5XF\n",
"689986 IV27 4XG 102.145813 IV30 5XF\n",
"689992 IV27 4XQ 101.753515 IV30 5XF\n",
"689989 IV27 4XL 101.681902 KW15 1JD\n",
"689985 IV27 4XF 101.569011 IV30 5XF\n",
"689987 IV27 4XH 100.444232 KW15 1JD\n",
"689929 IV27 4QE 100.420907 HS2 0BB\n",
"690001 IV27 4YR 100.302919 KW15 1JD\n",
"689932 IV27 4QH 100.149834 HS2 0BB\n",
"690000 IV27 4YQ 99.836299 KW15 1JD\n",
"690004 IV27 4YU 99.698467 KW15 1JD\n",
"693656 VIV00223 99.570614 HS2 0BB\n",
"693659 VIV00226 99.554083 KW15 1JD\n",
"689924 IV27 4PY 99.537784 HS2 0BB\n",
"689918 IV27 4PR 99.480622 HS2 0BB\n",
"690002 IV27 4YS 99.442818 KW15 1JD\n",
"689919 IV27 4PS 99.378948 HS2 0BB"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"merged[['POSTCODE', 'Minimum Distance', 'Nearest Project Postcode']].sort_values(\n",
" by='Minimum Distance', ascending=False).head(20)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Show the 20 post codes which are located the furthest from a post code in which Comic Relief have funded a project. The distance in this case is **centroid distance**, which is greater than polygon distance (see for instance the top result: the minimum polygon distance between `HS6 5DL` and `IV51 9DT` is 79.094 km, but the centroid distance is 111.73 km) but is a subjectively better measure of distances between adjacent post codes."
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>POSTCODE</th>\n",
" <th>Minimum Distance</th>\n",
" <th>Minimum Distance Centroid</th>\n",
" <th>Nearest Project Postcode</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>642866</th>\n",
" <td>HS6 5DL</td>\n",
" <td>79.094774</td>\n",
" <td>111.729833</td>\n",
" <td>IV51 9DT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>643062</th>\n",
" <td>HS9 5YW</td>\n",
" <td>97.247336</td>\n",
" <td>105.578325</td>\n",
" <td>IV51 9DT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689929</th>\n",
" <td>IV27 4QE</td>\n",
" <td>100.420907</td>\n",
" <td>103.728483</td>\n",
" <td>HS2 0BB</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689985</th>\n",
" <td>IV27 4XF</td>\n",
" <td>101.569011</td>\n",
" <td>103.334439</td>\n",
" <td>IV30 5XF</td>\n",
" </tr>\n",
" <tr>\n",
" <th>690003</th>\n",
" <td>IV27 4YT</td>\n",
" <td>97.668976</td>\n",
" <td>103.241990</td>\n",
" <td>KW15 1JD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>690001</th>\n",
" <td>IV27 4YR</td>\n",
" <td>100.302919</td>\n",
" <td>102.885857</td>\n",
" <td>KW15 1JD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689986</th>\n",
" <td>IV27 4XG</td>\n",
" <td>102.145813</td>\n",
" <td>102.855545</td>\n",
" <td>IV30 5XF</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689984</th>\n",
" <td>IV27 4XE</td>\n",
" <td>102.154803</td>\n",
" <td>102.510731</td>\n",
" <td>IV30 5XF</td>\n",
" </tr>\n",
" <tr>\n",
" <th>720099</th>\n",
" <td>KW14 7TJ</td>\n",
" <td>79.766687</td>\n",
" <td>102.389255</td>\n",
" <td>KW17 2PU</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689992</th>\n",
" <td>IV27 4XQ</td>\n",
" <td>101.753515</td>\n",
" <td>102.341355</td>\n",
" <td>IV30 5XF</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689983</th>\n",
" <td>IV27 4XD</td>\n",
" <td>102.243906</td>\n",
" <td>102.335801</td>\n",
" <td>KW15 1JD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689981</th>\n",
" <td>IV27 4XA</td>\n",
" <td>102.193813</td>\n",
" <td>102.306455</td>\n",
" <td>KW15 1JD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689982</th>\n",
" <td>IV27 4XB</td>\n",
" <td>102.186633</td>\n",
" <td>102.300491</td>\n",
" <td>KW15 1JD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689928</th>\n",
" <td>IV27 4QD</td>\n",
" <td>98.620823</td>\n",
" <td>102.085076</td>\n",
" <td>HS2 0BB</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689989</th>\n",
" <td>IV27 4XL</td>\n",
" <td>101.681902</td>\n",
" <td>102.012712</td>\n",
" <td>KW15 1JD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689987</th>\n",
" <td>IV27 4XH</td>\n",
" <td>100.444232</td>\n",
" <td>101.951465</td>\n",
" <td>KW15 1JD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689932</th>\n",
" <td>IV27 4QH</td>\n",
" <td>100.149834</td>\n",
" <td>101.847465</td>\n",
" <td>HS2 0BB</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689926</th>\n",
" <td>IV27 4QA</td>\n",
" <td>99.207035</td>\n",
" <td>101.726996</td>\n",
" <td>HS2 0BB</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689998</th>\n",
" <td>IV27 4YL</td>\n",
" <td>94.432503</td>\n",
" <td>101.615775</td>\n",
" <td>IV30 5XF</td>\n",
" </tr>\n",
" <tr>\n",
" <th>689927</th>\n",
" <td>IV27 4QB</td>\n",
" <td>98.351980</td>\n",
" <td>101.505845</td>\n",
" <td>HS2 0BB</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" POSTCODE Minimum Distance Minimum Distance Centroid \\\n",
"642866 HS6 5DL 79.094774 111.729833 \n",
"643062 HS9 5YW 97.247336 105.578325 \n",
"689929 IV27 4QE 100.420907 103.728483 \n",
"689985 IV27 4XF 101.569011 103.334439 \n",
"690003 IV27 4YT 97.668976 103.241990 \n",
"690001 IV27 4YR 100.302919 102.885857 \n",
"689986 IV27 4XG 102.145813 102.855545 \n",
"689984 IV27 4XE 102.154803 102.510731 \n",
"720099 KW14 7TJ 79.766687 102.389255 \n",
"689992 IV27 4XQ 101.753515 102.341355 \n",
"689983 IV27 4XD 102.243906 102.335801 \n",
"689981 IV27 4XA 102.193813 102.306455 \n",
"689982 IV27 4XB 102.186633 102.300491 \n",
"689928 IV27 4QD 98.620823 102.085076 \n",
"689989 IV27 4XL 101.681902 102.012712 \n",
"689987 IV27 4XH 100.444232 101.951465 \n",
"689932 IV27 4QH 100.149834 101.847465 \n",
"689926 IV27 4QA 99.207035 101.726996 \n",
"689998 IV27 4YL 94.432503 101.615775 \n",
"689927 IV27 4QB 98.351980 101.505845 \n",
"\n",
" Nearest Project Postcode \n",
"642866 IV51 9DT \n",
"643062 IV51 9DT \n",
"689929 HS2 0BB \n",
"689985 IV30 5XF \n",
"690003 KW15 1JD \n",
"690001 KW15 1JD \n",
"689986 IV30 5XF \n",
"689984 IV30 5XF \n",
"720099 KW17 2PU \n",
"689992 IV30 5XF \n",
"689983 KW15 1JD \n",
"689981 KW15 1JD \n",
"689982 KW15 1JD \n",
"689928 HS2 0BB \n",
"689989 KW15 1JD \n",
"689987 KW15 1JD \n",
"689932 HS2 0BB \n",
"689926 HS2 0BB \n",
"689998 IV30 5XF \n",
"689927 HS2 0BB "
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"joined[['POSTCODE', 'Minimum Distance', 'Minimum Distance Centroid', 'Nearest Project Postcode']].sort_values(\n",
" by='Minimum Distance Centroid', ascending=False).head(20)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's look at some summary stats for both sets of distance calculations"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"4.3708649416420808"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"joined['Minimum Distance'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"4.6501219489666115"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"joined['Minimum Distance Centroid'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"2.3061707476917683"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"joined['Minimum Distance'].median()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"2.5462807258789875"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"joined['Minimum Distance Centroid'].median()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 1.673695e+06\n",
"mean 4.370865e+00\n",
"std 5.529624e+00\n",
"min 0.000000e+00\n",
"25% 9.216085e-01\n",
"50% 2.306171e+00\n",
"75% 5.910296e+00\n",
"max 1.022439e+02\n",
"Name: Minimum Di, dtype: float64"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"joined['Minimum Distance'].describe()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 1.673695e+06\n",
"mean 4.650122e+00\n",
"std 5.663566e+00\n",
"min 0.000000e+00\n",
"25% 1.105992e+00\n",
"50% 2.546281e+00\n",
"75% 6.253126e+00\n",
"max 1.117298e+02\n",
"Name: Minimum Distance Centroid, dtype: float64"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"joined['Minimum Distance Centroid'].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"These are Matplotlib parameters -- you can ignore this."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"rc('font', **{'family':'sans-serif',\n",
" 'sans-serif':['Helvetica'],\n",
" 'monospace': ['Inconsolata'],\n",
" 'serif': ['Adobe Garamamond Pro']}\n",
")\n",
"\n",
"rc('text', **{'usetex': True})\n",
"rc('text', **{'latex.preamble': '\\usepackage{sfmath}'})\n",
"\n",
"save_args = {\n",
" \"format\": \"png\",\n",
" \"bbox_inches\": 'tight',\n",
" \"alpha\": True,\n",
" \"transparent\": True,\n",
" \"dpi\": 200\n",
"}\n",
"fontsize = 10"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Plot a histogram of both sets of distance measurements. Use a log scale for the `y` axis, in order to stop it being gigantic, due to the fact that so many (75%) postcodes are within ~16km of a project."
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEiCAYAAAD5+KUgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGvZJREFUeJzt3cFzG0d2x/Hfs7dyjGjKLN8Um9Le1xT0B+yK9HprS4es\nSfkPiEUqd5uUcnH5EhlyrqktUsk9CpXsQeWy14DyD0ik9w9YwpujSxIlXlPlfTlMjziEAGKaRM9g\nwO+nCkVwgJl+AzTmzXTP9Ji7CwCAN+oOAAAwGUgIAABJJAQAQEBCAABIIiEAAAISAgBAEgmhkcxs\nxszczHbCY8/MVk+wjOUB0+fNrFNYbjtyuXsR5RfXYSePx8yWzWw9Ju46mdlqf7xhWid8hgth2nrf\n+nr4vLcLn/lCYRmbhfcujiozTN8zs/nC/6euK4Xl1FZfUBF359Gwh6QZSXt9/3vkMuYlbQ+Yvidp\nvvB/R9JyxHL3Sr5v0DocKTsm7hq/i44kl7TeF+NO//NB6yFpVVI7TFuQ1AnPFwvPF4rLGFRmmL4e\nphe/v1PXlUmoLzyqefzsZGkEE2Y2f2Jm28p+9LOSNty9G/YYN8NbXkq6IWlD0qKZLbv7gzDvsqSu\nu/cKy14ZsewZZRu2fNkqvH9TUiv8e8Pdd4etgLu/DHuXa2b2WNKVEPOxcZtZp7CYzTBtUdJaIdY7\nhXUsrsOKu/f64wxlHSnX3Y+sWyHupbDHPVOYvCzpfni9Z2ZXB8y6qeyznZXULUzPy9kvTJuV9OS4\nMsN3vCRp6GdcWNYrk1xfFPE9YDxICM01b2Y7hf9XQhNCz903wg/vB0lvKdtA7Ybpi8p+oG1Js/mP\nO1+msj2+V/If4DHLvq1sT/ZuaO54FN6/Gua/HN6/I+niiHXal3S58P+xcecbrvB8IbyWr898X9kP\nwjo8LsS6aGb5ehbfuzmg3JgN0Xll308nzLuhwkY/bEg74bPNP99NZUcLl0M8u2aWN6nMq7ChHWJT\nWRLcHvDaa3UllDnp9eW03wMi0YfQXD13v1x4PFC2we1Ihz/MYEuSwgZqRUf3Po8sU30bbTNbCD/W\nYcteUNjY9R0BXJbUCnuJ91Tuh9z/gx8V976kpbAxXet7LY+puLwrxVjdfWtInGU/r2O5+5Kkq3p9\nI307L6Pw3jVln/F2KHtd2cbwYph+b1g54fvp9O2pFw2qK9Lk15exfA8oj4QwXfaUNRvkTQi565Lu\nhw3UnrI90deEDcVisWNT2Z7h/jHL3lXW3q2++XaUNSesuPuKQhPKMGGvcEOHTQRl4r6trG192J5x\nv14h1sXQRDUozlKf1zE6GrLxyj+7wp50u9DJu6/DJp3zkp4Xph/nsrLE2FFo6+/7joaZ9Ppy2u8B\nkWgymiLhMHy70FSRNzM8UbaR6Cnb89pQ9qNdKLYJByuS2mY2q6z990Gx/X3Asu+EZS+FZfdCLFuF\n90tHN/S5/qaMjdDmnm8ojo1b2UajHcruheUVNzL9n89G/zqE8vrj7PWV+69m9sLd3xq27L5yuma2\nFNZtRll7eO5V/0KQf375EU7/5/px3/RB5b06OgplrhxztFCcb9LrS//3sDFqnXA65s5op8AoZrYa\nmpiAqUWTEVDOyD1uoOk4QgAASOIIAQAQkBAAAJJICACAgIQAAJBEQgAABCQEAIAkEgIAICAhAAAk\nkRAAAAEJAQAgiYQAAAhICAAASSQEAEBAQgAASCIhAAACEgIAQBIJAQAQkBAAAJJICACA4Gd1BxDj\nww8/9G+//bbuMDC9rKZyubE5Uipdrxt1hPDs2bO6QwCAqdWohAAASIeEAACQ1JCEYGbXzGzr4OCg\n7lAAYGo1IiG4+0N3Xz137lzdoQDA1GpEQgAApEdCAABIIiEAAAISAgBAUsOuVB7mg9/8VvtPf4ye\nb3buHX33zdcJIgLGg7qNKk1FQth/+qOeLX4RP2P38/EHA4wRdRtVqr3JyMzmzWw7PObrjgcAzqpJ\nOEJYk7QvSe7eqzkWADizkh0hmNmqma33TWubWcfMdgpHA/OS2pK2zWw5VTwAgOMlSQhm1pG02Tdt\nQdKCuy9JulF4fb/wmE0RDwBgtCRNRu6+ZGarkmYKkxcldcLru2bWCtPbkrYlvVSWKAAANaiyD+G8\npMf9E0O/wdKwmUJiWZWkCxcuJAsOAM66Ks8yeq6svyA3M+yNRe6+5e4td2/Nzc2liQwAUGlC6Coc\nCYT+hG7ZGRn+GgDSqywhuPuupN3Q4dxWdrpp2XkZ/hoAEkvWh+DuWwOmbaQqDwBwOrVfqVwGTUYA\nkF4jEgJNRgCQXiMSAkcIAJBeIxICRwgAkF4jEgIAIL1GJASajAAgvUYkBJqMACC9RiQEAEB6JAQA\ngKSGJAT6EAAgvUYkBPoQACC9RiQEAEB6JAQAgKSGJAT6EAAgvUYkBPoQACC9RiQEAEB6JAQAgCQS\nAgAgICEAACQ1JCFwlhEApNeIhMBZRgCQXiMSAgAgPRICAEASCQEAEJAQAACSSAgAgKARCYHTTgEg\nvdIJwczeNbPfmdmd8PfddGEdxWmnAJDeyIRgZu+b2X9K2pB0XlI3/L1lZvfN7BeJYwQAVOBnJd7T\ncvfrfdMeSbonSWZ2Q9Kfxh0YAKBaIxOCu987zeuTzN94U61WK3q+2bl39N03XyeICADqU+YIQZJk\nZk8k9Tfi9yStuftfxhlUVeyvP+nZ4hfxM3Y/H38wwBixs4OTKJ0QJD2RtO3uj8xsUdKipPuSNiX9\nOkVwAE6GnR2cRMxpp++5+yNJcveupPfd/XtJnAsKAFMg5gjhwMw+VXaW0ZIkM7NfpQkLAFC10kcI\n4UyjtyX9k7LTTlckmaQbpwnAzJbNrGNm22Y2f5plAQBOLuYIQe5+q2/SozHEMK+sc1ru3hvD8gAA\nJxBzpfJVM9s3s+fh8csR7181s/W+ae1wNLBTOBp44O5rkrbNbDl+FQAA4xDTqfylso7l85KuSLo7\n7I1m1lF29lFx2oKkBXdfUtbMlL++EP7uKztaAADUIKbJ6IW7H0hZ046ZvRj2RndfMrNVSTOFyYuS\nOuH1XTPLT5J+aWbb4fmp+iMAACcXkxBemtkdZRv1DyS9jCzrvKTH/RPDKazdYTOFxLIqSRcuXIgs\nEgBQVuxZRvvKzi56NmB8o1Ge62iT0MywN/aVu+XuLXdvzc3NRRYJACirzGinX4Yhr+8o28s/kPR2\n+D9Gfv1C3p8w9KhgQAzcDwEAEivTZPRaM89JhH6D3dDhLElrEfM+lPSw1WrRxwAAiZRJCFck/Ye7\nvzbEtZm9L+m6u9/uf83dtwZM2zhJkGZ2TdK1S5cunWR2AEAJZYa/vmVmn5nZXUkvlPUjnFc28mln\nUDIYN44QACC9UmcZuftXkr4ys3MKVxbnp6ACAKZDzIVpcvcDd/++6mRApzIApBeVEOri7g/dffXc\nuf778wAAxiVmLKO/NbN3w99PzezddGEBAKoWc4RwT9JFSW1lw163k0Q0AE1GAJBeTEKYCXdMmw+d\nzKWuNB4HmowAIL2YhGDh6uTvw/UHFxPFBACoQUxCWFM2HtE/S7qsbEyjStBkBADpRTUZSbqp7C5p\nb0l6L0lEA9BkBADpxSSELWVHBj+EPoTSYxEBACZf9IVpkjz8a+MPBwBQl5iEsGNmv5c0HzqXY2+Q\nAwCYYDE3yLkpaVfSjrKxjGJvkHNidCoDQHqxTUb33P2mu99LFdCQculUBoDERo52amZ/1mG/wWzh\npZ67X0kSFQCgciOPENz9krv/XNL3khbd/byyW2H+kDo4AEB1YpqMzrn791J2O0xlN8gBAEyJ2KEr\nPjWzX5jZZ6rwtFM6lQEgvZiEsCLpbUl3ld1Cs7KhK+hUBoD0St1CU3p1UdqthLEAAGoUc4Ocq2a2\nb2bPw+OXKQMDAFQrpsnoS0nvhbOMrihrOgIATInSTUaSXoRmI7l7z8xeJIpp4vkbb6rVakXPNzv3\njr775usEEQHA6cUkhJdhDKOOpA90hscysr/+pGeLX8TP2P18/MEAwJjEjGV0XdK+pOuSnlU5lhEA\nIL2YTuV3lTUb3ZT0tpn9XaqgBpTNdQgAkFhMp/K2pCfheVfSg/GHMxjXIQBAerGdyn+SJHfvmtl6\nopgA1IQTJs622E7lT5UdHSzqDHcqA9OKEybOtthOZZP0T5LO06kMANMl5ghB7v5VqkAAAPWKumMa\nAGB6xZx2+rvj/gcANFuZW2h+JOljSVfN7ON8sqT3Jf33OIIwsxlJ2+6+NI7lAQDilelD6EralbQh\nqV2Yvj/GOG6Ls5YAoFZl7ql84O4/KEsILum5pI8kvXXcfGa22n+tgpm1zaxjZjtmNh+mLUu6r/Em\nGABApJhO5S1JF5UdJZiOHi0cYWYdSZt90xYkLYRmoRuF15ckrUlaDMkBAFCDmIQw4+6PJM2H009n\nhr0xbPTX+iYvKhspVe6+K6kVnq+5+5qkrrtXNhwGAOComIRgYfjr783sF8qOFmKcl9Qb9mJICoMK\nXTWzJ2b25OnTp5FFAgDKikkIa8ra+e8ou2PaSmRZzyXNF/4feoRR5O5b7t5y99bc3FxkkQCAsmKG\nrvjB3b8Kncz33P37yLK6yvoL8v6EbtkZGf4aANKLuTDtqpntm9nz8PhlTEGh32A3dDi39Xofw3Hz\nMvw1ACQWM5bRl5Lec/eDcMrofWVNRwO5+9aAaRvxIQIAqhDTh/DC3Q8kyd17kl6kCel1NBkBQHox\nCeGlmd0xs1+Z2Zeq8MpimowAIL3Y+yHsS7ou6XmV90PgCAEA0osa/jpckPZl1fdF4AgBANKLOcvo\nIzPbl7QZzjL6+4RxAQAqFnOW0S13n83/MbPHkv4w/pBeZ2bXJF27dOlSFcUBwJkUdZbRiP+TockI\nANKLOULomdkflQ1QtyTJzewTSXL3f0sRHACgOjEJYS88TIfDThx7TwQAQHOUTghVn1lURB8CAKQX\nddppXehDAID0RiaEvJ/AzH6XPhwAQF3KNBldN7MVSS0zy0coNUnu7r9OFxoAoEojE4K7fxBGN13X\nMfdRTok+BABIr1Qfgrv33P1muEnOq0fq4Arl04cAAInFDF3xq9PcIAcAMNlizjJqK7tBznllN8a5\nmyYkAEAdGnGDHABAejFXKr80szvKhq74QBXeIAcAkF7MlcrXzewzZTfI2XP3W+nCOmpazjLyN95U\nq9WKnm927h19983XCSICgEMxRwi1DV/h7g8lPWy1WjfqKH9c7K8/6dniF/Ezdj8ffzAA0CfmLKN3\n04UBAKhbzBFCR9LPUwUCoLlO0hxKU+jkiUkIj8Jd0vKhr+Xut8cfEoCmOVFzKE2hEyf2CKGTKhAA\nQL1izjL6r5SBAADqFdOp/L6Z/dnMHpvZp1UOh21m18xs6+DgoKoiAeDMiblSeUvSZUk/uPu/SFob\n8f6xYXA7AEgv6o5pYegKD//a+MMBANQlJiHsmNnvJc2HISwYugIApkjphODuNyXtStqR1HP368mi\nAgBULnbointm1nH3vySKB8AZwdhek6d0QjCzjyTdk/TEzC5L+sTd/5AsMgBTjbG9Jk/MEcItd5+V\nJDObUXaRGgmhAuxJAahCTEJ4dUMcd39pZtwgpyLsSQGowsiEULgA7aWZ/VGHN8gZS0IwswVl1zTM\nuvvKOJaJDAOOAYhR5gjhYvj7uDCto8PrEU6r5e5rZrZsZovu3h09C8pgwDEAMUYmhPymOGb2vqSP\nJc3kLx03n5mtSppx97uFaW1JC5JmJa24e8/dt8xsUdJtSVdPtBYAgFOLHbrisaTt8Hgw7I1m1pG0\n2TdtQdKCuy9JupG/bmbL4ajghrLbcwIAahDVqVx2xFN3X8qPEAqTFxWGz3b3XTN71bhtZtuS9iVt\nRMQDABijqPshhE7lXj7B3f8xYv7zOtoPkS/jgY4/2liVtCpJFy5ciCgOABAjJiGsKduDP+kYRs8l\nzRf+nxn2xiJ331LWXKVWqzWujmwAQJ+YhLB7ypvkdCW1Jd0N/QmlzyYys2uSrl26dOkUxQOYBlyo\nmU5MQpgJTUa7+YSYeyqHfoPd0OEsRdxPwd0fSnrYarVulI4WwFTiQs10YhJCO2bBoamnf9qJOo05\nQgCA9GLuqfwoZSAjyuYIAQASixnt9In6LkZz9ytjjwgAUIuYI4TidQPLyu6vXAmajAAgvah7KufC\ntQMLY47luPIeuvvquXPnqioSAM6cmCajz3TYZPS2JEsSEQCgFjFnGfUKz39w91vjDmYYmowAIL2R\nTUZm9omZfSLpreLDzP4hdXA5mowAIL0yRwhv9f3vkm6G6f8+9ogAALUofT8ESTKz95QNW91195sp\nAyuiyag6DAsAnF0xnco3JK1LWnP3/0kX0uu4MK06DAsAnF1l7qn8rrIb4jxx95+nDggAUI8yRwi9\n8DhvZveLL7j7x0miAgBUrkxCWEoeBQCgdmU6lWsb1C5HpzKA0+KEidFiLkyrDZ3KAE6LEyZGO9FY\nRgCA6UNCAABIIiEAAIJGJAQzu2ZmWwcHB3WHAgBTqxEJgcHtACC9RiQEAEB6JAQAgCQSAgAgICEA\nACSREAAAAQkBACCpIQmB6xAAID0Gt8NYMJIk0HyNSAiYfIwkCTRfI5qMAADpkRAAAJJICACAgIQA\nAJBEQgAABLUnBDNbMLPt8JivOx4AOKtqTwiSWu6+IumOpOW6gwGAsypZQjCzVTNb75vWNrOOme3k\nRwPuvmVmM5LWJD1IFQ8A4HhJEoKZdSRt9k1bkLTg7kuSbuSvh+m33X3N3Xsp4gEAjJYkIYSN/lrf\n5EVJnfD6rqR8nIPbkuZDHwJNRgBQkyqHrjgv6XH/xNB/MJSZrUpalaQLFy6kiQwAUGmn8nNJxbOI\nZsrM5O5b7t5y99bc3FyayAAAlSaErqQl6VW/QbfsjAx/DQDpVZYQQr/Bbuhwbuv1Pobj5n3o7qvn\nzp1LFh8AnHXJ+hDcfWvAtI1U5QEATmcSLkwbiSYjAEivETfI4Y5pAOpy0rsB6s2/kX76v+jZ6ryL\nYCMSgpldk3Tt0qVLdYcC4Iw56d0A3+5+3ri7CDYiIXCEgH4f/Oa32n/6Y/R83MMZGK4RCQHot//0\nx8btfQGTjk5lAICkhiQErkMAgPRoMkKtTnwGB4CxIyGgVqc5gwPAeDWiyYg+BABIrxEJgT4EAEiv\nEQkBAJAeCQEAIKkhCYE+BABIrxEJgT4EAEivEQkBAJAeCQEAIEkyd687htLM7Kmk/x3w0tuSnlUc\nziDEcVTT4njm7h+mDqafmX2rLMZB6vwMKXs6yi5drxuVEIYxsyfuXvv4B8RBHONWZ+yUfbbKlmgy\nAgAEJAQAgKTpSQhbdQcQEMdRxHF6dcZO2Wer7OnoQwAAnN60HCEAAE6JhAAAkDQFCcHM2mbWMbMd\nM5uvuOzNUPaemS0Xpr8I8eyY2WYFcQwsr6rPxszWC+XvmJnn5VXxWZjZqpmt900buO511pcYdcRZ\nV32uq/7WVW8nur66e2MfkhYkdfqfV1T2oqTN8HxG0ovwfF7SdoVxDCyvrs+mGE8Vn4WkjiSXtD5q\n3eusL5HrVHmcddXnSam/VdXbSa+vTT9CWFT2AcvddyVVeUFHT1I7lP1S0n6YPi9p3sy2Q2ZfSBzH\nsPLq+mw2Jd0YEdvYuPuSpLW+ycPWvc76EqOOOOuqz5NSfyupt5NeX5t+T+Xzkh7XUbC79yQpHMZt\nK/yYlP2Q7rj7g1CRtiVdTBjKsPIq/2xCM0MnbFCOiy21YeteW32JVHmcNdbn2uvvBNTbiamvTU8I\nz5Vl89xMlYWHdsCPJd0IGTzP5K+em9msmc0UKttYDStP9Xw2tyVdHRVbqs+iYNi611pfItQSZx31\neULqb931dmLqa9ObjLqSliQpZPJuVQWb2aKkJXe/nP94wvT1vMMo7G3tp9wAHlNepZ9N3uFVXNeq\nP4uCYeteW32JVHmcddXnuuvvhNTbiamvjT5CCNl718w6YVJ/21xKS5JaZrZXiOeiu98NbY87YfJK\nyiCGlVfDZ7Ms6X6Z2FIbtu4115fSaoqzlvo8AfW39no7SfWVK5UBAJKa32QEABgTEgIAQBIJAQAQ\nkBAAAJJICACA4MwnBDObCYNa5QNZ7ZnZ6jHvXzaz9rDXE8S2PPqdR94/kesSymtbNrDX0JiGzLda\nwRAgU8XM5u1wULS9k37PJ6iDy9Y3cFuYvjdgWuyyK1+nmPUJ0xtdx898Qgh64YKcy5IuKxvXZBLM\nKrtyNMZErkv4Ud0f+cYB3H1L2dWkKK8jaS3UhYuSFmI2vgVRddDdH7j73RTLVg3rFLM+01DHSQiv\nm82f2OHgVjvhSk4VXtvMpxX3tAvzbBYubBm4LDNb7JveX7k3JC3m04+Lp851KbEeUnYVbPEK2Jnw\n3oUwfycsY8+yK0XzZeV7Tfs2wcNVT5Lw+Xfz8YmCFYUrXfPvs/j5HvMdvqqD4bEZvqN5G1ynl8Ne\n8kx4rWNm20NCLV2/61qnyPVpfh1PPZzqpD+UjQ/iknYKj2VJ65LahffkwwEvKxv4qzhccEfZmCNt\nhWFtw+t74fmwZS1K2ilM3+uLrTgk78BlTMK6lFiPmcJ6rIZl70laKCy/U4ip+LxdmG+57vrShEf4\njtaHvLaqo8Nc59/rwO+wrw4uF94zqk4V68/CkPpaun7XtU5l12da6jhHCJlXzSzh8UDZ6Ib50LOv\njWPi7l1lexkzkmY923OZV9hjCa/njltWtzg97JFs2+vtowOXMeD9da1Ld8C03LwOh1OWsh9BXkYu\n37N6GV7Ln+cDevUkXRmwbLyup77ROcNe6qqyZsRW2Mu9p+wzzh33HR55j0bUKWUbzXx5xb3m0vW7\n7711r9PA9SmYijpOQhhuT4cDSw07jOsqGxo3b6fvKdsTyAcLi1mWpFdtlivuvlEmnmPeX/u6FPRU\naL5Stve0IqkdklAZ82rG0NW1CzsBi3a0k7KtbIO1o6zpZcXdV3TCNm+Nrge7Oqw/r+KIqd/F907A\nOg1cn4KpqOONHtwuJT8c4Kqj7IseNMDVprLDzaXw/x1J22a2pMO9gbLLGmRfoePsFMuofV3c/WX/\njyJM21C2R1em4/uyDsfox2j5xmhW2R7og7BRVeH7k0Z/9q/qYHFiiXpQrD/FPeKByy5Zr+pYp7ze\nHrs+01LHGdzuFMKe84q7rxX+l7t3w15Eu7CBnWip18WyMzC6Qw63y8y/Hfb+gIk0DXWchHBCYe/i\ntrKNaH63qRllewNStmex5kfPiphIVa2LmbVHNG0Nm29V0pOT/tCAqjS9jpMQAACS6FQGAAQkBACA\nJBICACAgIQAAJJEQAAABCQEAIEn6f3go3Ap7mNr3AAAAAElFTkSuQmCC\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x147f7c2d0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.clf()\n",
"fig = plt.figure(1, figsize=(7., 10.), dpi=100)\n",
"ax = fig.add_subplot(121)\n",
"h = ax.hist(joined['Minimum Di'], 10, log=True, ec='#333333')\n",
"\n",
"ax.grid(b=False)\n",
"ax.spines['top'].set_visible(False)\n",
"ax.spines['right'].set_visible(False)\n",
"ax.set_xlabel(u'Polygon-to-Polygon (km)', fontsize=fontsize)\n",
"ax.set_ylabel('Number of post codes (log)', fontsize=fontsize)\n",
"\n",
"\n",
"ax2 = fig.add_subplot(122, sharey=ax)\n",
"h2 = ax2.hist(joined['Minimum Distance Centroid'], 10, log=True, ec='#333333')\n",
"\n",
"ax2.grid(b=False)\n",
"ax2.spines['top'].set_visible(False)\n",
"ax2.spines['right'].set_visible(False)\n",
"ax2.spines['left'].set_visible(False)\n",
"ax2.set_xlabel(u'Centroid-to-Centroid (km)', fontsize=fontsize)\n",
"\n",
"ax2.tick_params(\n",
" axis='y', # changes apply to the x-axis\n",
" which='both', # both major and minor ticks are affected\n",
" left='off')\n",
"ax2.yaxis.set_tick_params(size=0)\n",
"ax2.yaxis.set_visible(False)\n",
"\n",
"\n",
"plt.suptitle(\"Post Code Distances, 1673814 Post Codes\", fontsize=fontsize)\n",
"plt.savefig(\"postcodes.png\", **save_args)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"joined.to_file(\"joined_postcodes_centroids.shp\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can use [Chebyshev's theorem](http://www.philender.com/courses/intro/notes3/chebyshev.html) to calculate distributions:\n",
"\n",
"Mean: `4.650122` \n",
"Standard deviation: `5.663566` \n",
"Chebyshev: $1 - (1/k^2)$ \n",
"where `k` is the number of standard deviations."
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def chebyshev(mu, sigma, stddev):\n",
" \"\"\"\n",
" Use Chebyshev's theorem to calculate distributions\n",
" mu: mean\n",
" sigma: the standard deviation\n",
" stddev: the number of standard deviations you wish to calculate\n",
" \n",
" In this case, we can't have a distance less than 0.0, so it's\n",
" not mu +/- (sigma * stddev), just mu + (sigma * stddev)\n",
" \n",
" Returns a tuple:\n",
" the percentage of the distribution, and the distance, in this case.\n",
" \n",
" \"\"\"\n",
" return (1. - (1. / stddev ** 2)), mu + (sigma * stddev)"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"((0.75, 15.977254), (0.8888888888888888, 21.64082), (0.9375, 27.304386))"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# get 75%, 88%, and 93.75% values\n",
"chebyshev(4.650122, 5.663566, 2),\\\n",
"chebyshev(4.650122, 5.663566, 3),\\\n",
"chebyshev(4.650122, 5.663566, 4)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Therefore:\n",
"- 75% of post codes (2 standard deviations) lie within **15.97 km**\n",
"- 88% of post codes (3 standard deviations) lie within **21.64 km**\n",
"- 93.75% of post codes (4 standard deviations) lie within **27.30 km**\n",
"\n",
"of a funded project, measured by **centroid distance**"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Convert km to miles: multiply by 0.621371"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}