iso3166-3 0.3.0

#!/usr/bin/python3
# ISC License (ISC)
#
# Copyright (c) 2016, Austin Hellyer <hello@austinhellyer.me>
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
# What is ISO 3166-3?
#
# | ISO 3166-3 is part of the ISO 3166 standard published by the International
# | Organization for Standardization (ISO), and defines codes for country names
# | which have been deleted from ISO 3166-1 since its first publication in 1974.
# |
# | - [Wikipedia](http://en.wikipedia.org/wiki/ISO_3166-3)
#
# Originally by zeyla on GitHub.

# USAGE:
# Requires python3. Go to the following page, sort by the numeric range, copy
# the contents of the table minus the header cells, then run 'make update':
# https://en.wikipedia.org/wiki/ISO_3166-3#Current_codes

from bs4 import BeautifulSoup
import os
import re
import subprocess
import sys
import urllib.request

# Get the contents of the URL.
html = urllib.request.urlopen('https://en.wikipedia.org/wiki/ISO_3166-3').read()

soup = BeautifulSoup(html, 'html.parser')

rows = soup.find('table', class_='sortable') \
    .find_all('tr', style='vertical-align:top;')

text = ""

# Cycle through each row and append to the text its entry.
for row in rows:
    # 0: Former country name
    # 1: Former codes (alpha2, alpha3, num)
    # 2: Period of validity
    # 3: New country names and codes
    cells = row.find_all('td')

    # 0: ISO 3166-3 code
    headers = row.find_all('th')

    # 0: alpha2
    # 1: alpha3
    # 2: num
    codes_data = cells[1].get_text().split('[')[0].split(', ')

    # 0: from
    # 1: to
    valid_data = cells[2].get_text().split('[')[0].split('–')

    regex_description = re.compile('\s\(.+?\)')
    regex_description2 = re.compile('\s\[.+?\]')

    alpha2 = codes_data[0]
    alpha3 = codes_data[1]
    code = headers[0].get_text().split('\n')[0].split(' [')[0]
    description_reg = regex_description2.sub('',
                                         regex_description.sub('',
                                                               cells[3].\
                                                               get_text()))

    description_parts = description_reg.split('\n', 1)

    if len(description_parts) > 1:
        description_end = description_parts[1].split('\n')
        description = description_parts[0] + ' ' + '; '.join(description_end)
    else:
        description = description_reg

    name = cells[0].get_text().split(' !')[0].split(' [')[0]
    num = codes_data[2][0:3]
    valid_from = valid_data[0][0:4]
    valid_to = valid_data[1][0:4]

    text += '    codes.push(FormerCountryCode {\n'
    text += '        code: "{}",\n'.format(code)
    text += '        codes_former: FormerCountryCodeCodes {\n'
    text += '            alpha2: "{}",\n'.format(alpha2)
    text += '            alpha3: "{}",\n'.format(alpha3)
    text += '            num: "{}",\n'.format(num)
    text += '        },\n'
    text += '        description: "{}",\n'.format(description)
    text += '        name: "{}",\n'.format(name)
    text += '        validity: [{}, {}],\n'.format(valid_from, valid_to)
    text += '    });\n'

# Read the codes.rs file and split it to find the code after 'Begin' and before
# 'End'
codes_path = os.path.join(os.path.dirname(__file__), '../src/codes.rs')

with open(codes_path, 'r') as f:
    codes_file = f.read()

# Split by where to insert the text.
codes = codes_file.rsplit('// Begin', 1)
# And where to end putting the text.
codes_end = codes_file.rsplit('// End\n', 1)

with open(codes_path, 'w') as f:
    f.write(codes[0] + '// Begin\n' + text + '    // End\n' + codes_end[1])

print('Updated.')