from bs4 import BeautifulSoup
import os
import re
import subprocess
import sys
import urllib.request
html = urllib.request.urlopen('https://en.wikipedia.org/wiki/ISO_3166-3').read()
soup = BeautifulSoup(html, 'html.parser')
rows = soup.find('table', class_='sortable') \
.find_all('tr', style='vertical-align:top;')
text = ""
for row in rows:
cells = row.find_all('td')
headers = row.find_all('th')
codes_data = cells[1].get_text().split('[')[0].split(', ')
valid_data = cells[2].get_text().split('[')[0].split('–')
regex_description = re.compile('\s\(.+?\)')
regex_description2 = re.compile('\s\[.+?\]')
alpha2 = codes_data[0]
alpha3 = codes_data[1]
code = headers[0].get_text().split('\n')[0].split(' [')[0]
description_reg = regex_description2.sub('',
regex_description.sub('',
cells[3].\
get_text()))
description_parts = description_reg.split('\n', 1)
if len(description_parts) > 1:
description_end = description_parts[1].split('\n')
description = description_parts[0] + ' ' + '; '.join(description_end)
else:
description = description_reg
name = cells[0].get_text().split(' !')[0].split(' [')[0]
num = codes_data[2][0:3]
valid_from = valid_data[0][0:4]
valid_to = valid_data[1][0:4]
text += ' codes.push(FormerCountryCode {\n'
text += ' code: "{}",\n'.format(code)
text += ' codes_former: FormerCountryCodeCodes {\n'
text += ' alpha2: "{}",\n'.format(alpha2)
text += ' alpha3: "{}",\n'.format(alpha3)
text += ' num: "{}",\n'.format(num)
text += ' },\n'
text += ' description: "{}",\n'.format(description)
text += ' name: "{}",\n'.format(name)
text += ' validity: [{}, {}],\n'.format(valid_from, valid_to)
text += ' });\n'
codes_path = os.path.join(os.path.dirname(__file__), '../src/codes.rs')
with open(codes_path, 'r') as f:
codes_file = f.read()
codes = codes_file.rsplit('// Begin', 1)
codes_end = codes_file.rsplit('// End\n', 1)
with open(codes_path, 'w') as f:
f.write(codes[0] + '// Begin\n' + text + ' // End\n' + codes_end[1])
print('Updated.')