From f1eeb74a7ba69d93b31fee5f47b69ee2da7dcb02 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Thu, 21 Nov 2024 00:58:34 +0000 Subject: [PATCH 1/5] =?UTF-8?q?chore(=F0=9F=A6=BE):=20bump=20python=20ruff?= =?UTF-8?q?=200.4.5=20->=200.7.4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements/development.txt | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/requirements/development.txt b/requirements/development.txt index 00aa0a9effb3f..3739ff9906807 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -8,7 +8,7 @@ -r base.txt -e file:. # via - # -r requirements/base.in + # -r /home/runner/work/superset/superset/requirements/base.in # -r requirements/development.in astroid==3.1.0 # via pylint @@ -168,7 +168,7 @@ pyee==11.0.1 # via playwright pyfakefs==5.3.5 # via apache-superset -pyhive[presto]==0.7.0 +pyhive[hive]==0.7.0 # via apache-superset pyinstrument==4.4.0 # via apache-superset @@ -195,10 +195,14 @@ requests-oauthlib==2.0.0 # via google-auth-oauthlib rfc3339-validator==0.1.4 # via openapi-schema-validator -ruff==0.4.5 +rfc3986==2.0.0 + # via tableschema +ruff==0.7.4 # via apache-superset s3transfer==0.10.1 # via boto3 +sasl==0.3.1 + # via pyhive sqlalchemy-bigquery==1.11.0 # via apache-superset sqloxide==0.1.43 From 667839eb38899f4991caba9e2ba71ba12485b799 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Thu, 21 Nov 2024 09:22:50 -0800 Subject: [PATCH 2/5] lint new rules --- requirements/development.txt | 2 +- .../Country Map GeoJSON Generator.ipynb | 957 ++++++++++-------- superset/extensions/metadb.py | 4 +- tests/integration_tests/sqla_models_tests.py | 2 +- tests/integration_tests/viz_tests.py | 2 +- tests/unit_tests/dao/key_value_test.py | 2 +- 6 files changed, 540 insertions(+), 429 deletions(-) diff --git a/requirements/development.txt b/requirements/development.txt index 3739ff9906807..dbfb1d872ff83 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -8,7 +8,7 @@ -r base.txt -e file:. # via - # -r /home/runner/work/superset/superset/requirements/base.in + # -r requirements/base.in # -r requirements/development.in astroid==3.1.0 # via pylint diff --git a/superset-frontend/plugins/legacy-plugin-chart-country-map/scripts/Country Map GeoJSON Generator.ipynb b/superset-frontend/plugins/legacy-plugin-chart-country-map/scripts/Country Map GeoJSON Generator.ipynb index 2969a7f96e43e..21559309284b3 100644 --- a/superset-frontend/plugins/legacy-plugin-chart-country-map/scripts/Country Map GeoJSON Generator.ipynb +++ b/superset-frontend/plugins/legacy-plugin-chart-country-map/scripts/Country Map GeoJSON Generator.ipynb @@ -48,18 +48,17 @@ "source": [ "# Dependencies\n", "\n", - "import os\n", "import json\n", - "import requests\n", + "import os\n", + "\n", "import geopandas as gpd\n", "import matplotlib.pyplot as plt\n", - "import shapely\n", "import pandas as pd\n", - "import shapely.geometry\n", - "import shapely.ops\n", + "import requests\n", + "import shapely\n", "import shapely.affinity\n", - "from shapely.geometry import Polygon, MultiPolygon\n", - "import shutil" + "import shapely.geometry\n", + "import shapely.ops" ] }, { @@ -96,32 +95,38 @@ "if not os.path.exists(data_dir):\n", " os.mkdir(data_dir)\n", "\n", + "\n", "def download_files(skip_existing: bool):\n", " for url in [\n", " \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip\",\n", " \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_1_states_provinces.zip\",\n", - " \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/50m/cultural/ne_50m_admin_1_states_provinces.zip\"\n", + " \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/50m/cultural/ne_50m_admin_1_states_provinces.zip\",\n", " ]:\n", - " file_name = url.split('/')[-1]\n", - " full_file_name = f'{data_dir}/{file_name}'\n", + " file_name = url.split(\"/\")[-1]\n", + " full_file_name = f\"{data_dir}/{file_name}\"\n", " # temporary fix\n", - " url = url.replace(\"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download\", \"https://naciscdn.org/naturalearth\")\n", + " url = url.replace(\n", + " \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download\",\n", + " \"https://naciscdn.org/naturalearth\",\n", + " )\n", " with requests.get(\n", " url,\n", " headers={\n", " \"accept-encoding\": \"gzip, deflate, br\",\n", - " \"user-agent\": \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36\"\n", + " \"user-agent\": \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36\",\n", " },\n", " stream=True,\n", " ) as res:\n", - " file_size = int(res.headers['content-length'])\n", + " file_size = int(res.headers[\"content-length\"])\n", " if res.status_code != 200:\n", - " print(\"Error downloading files. Please open the URL to download them from browser manually.\")\n", + " print(\n", + " \"Error downloading files. Please open the URL to download them from browser manually.\"\n", + " )\n", " break\n", " if (\n", - " skip_existing and\n", - " os.path.exists(full_file_name) and\n", - " file_size == os.path.getsize(full_file_name)\n", + " skip_existing\n", + " and os.path.exists(full_file_name)\n", + " and file_size == os.path.getsize(full_file_name)\n", " ):\n", " print(f\"Skip {file_name} because it already exists\")\n", " continue\n", @@ -130,6 +135,7 @@ " fh.write(res.content)\n", " print(\"Done. \")\n", "\n", + "\n", "download_files(skip_existing=False)" ] }, @@ -522,7 +528,7 @@ } ], "source": [ - "df_50m.groupby('admin').count()" + "df_50m.groupby(\"admin\").count()" ] }, { @@ -552,7 +558,7 @@ "source": [ "# Use 1:50m geometry for some large countries:\n", "\n", - "print(*df_50m['admin'].unique(), sep='\\n')" + "print(*df_50m[\"admin\"].unique(), sep=\"\\n\")" ] }, { @@ -563,7 +569,7 @@ }, "outputs": [], "source": [ - "df = pd.concat([df_10m[~df_10m['admin'].isin(df_50m['admin'].unique())], df_50m])" + "df = pd.concat([df_10m[~df_10m[\"admin\"].isin(df_50m[\"admin\"].unique())], df_50m])" ] }, { @@ -587,241 +593,241 @@ "source": [ "# Country names used in file names\n", "countries = [\n", - " 'afghanistan',\n", - " 'aland',\n", - " 'albania',\n", - " 'algeria',\n", - " 'american samoa',\n", - " 'andorra',\n", - " 'angola',\n", - " 'anguilla', \n", - " 'antarctica',\n", - " 'antigua and barbuda',\n", - " 'argentina',\n", - " 'armenia',\n", - " 'aruba',\n", - " 'australia',\n", - " 'austria',\n", - " 'azerbaijan',\n", - " 'the bahamas',\n", - " 'bahrain',\n", - " 'bangladesh',\n", - " 'barbados',\n", - " 'belarus',\n", - " 'belgium',\n", - " 'belize',\n", - " 'benin',\n", - " 'bermuda',\n", - " 'bhutan',\n", - " 'bolivia',\n", - " # 'bonaire, sint eustatius and saba', # Part of Netherlands Antilles, part of Netherlands, accordsing to Natural Earth?\n", - " 'bosnia and herzegovina',\n", - " 'botswana',\n", - " # 'bouvet island', # part of Norway, in Natural Earth data\n", - " 'brazil',\n", - " 'brunei',\n", - " 'british indian ocean territory',\n", - " 'bulgaria',\n", - " 'burkina faso',\n", - " 'burundi',\n", - " 'cape verde',\n", - " 'cambodia',\n", - " 'cameroon',\n", - " 'canada',\n", - " 'cayman islands',\n", - " 'central african republic',\n", - " 'chad',\n", - " 'chile',\n", - " 'china',\n", - " # 'christmas island', # part of British Indian Ocean Territory, according to Natural Earth\n", - " # 'cocos (keeling) islands', # part of British Indian Ocean Territory, according to Natural Earth\n", - " 'colombia',\n", - " 'comoros',\n", - " 'democratic republic of the congo',\n", - " 'cook islands',\n", - " 'costa rica',\n", - " # 'côte d\\'ivoire', # not sure why this isn't working\n", - " 'croatia',\n", - " 'cuba',\n", - " 'curaçao',\n", - " 'cyprus',\n", - " 'czech republic',\n", - " 'denmark',\n", - " 'djibouti',\n", - " 'dominica',\n", - " 'dominican republic',\n", - " 'ecuador',\n", - " 'egypt',\n", - " 'el salvador',\n", - " 'equatorial guinea',\n", - " 'eritrea',\n", - " 'estonia',\n", - " # 'eswatini', # not sure why this doesn't work — Swaziland isn't available to alias, either.\n", - " 'ethiopia',\n", - " 'falkland islands',\n", - " 'faroe islands',\n", - " 'fiji',\n", - " 'finland',\n", - " 'france',\n", - " # 'french guiana',\n", - " 'french polynesia',\n", - " # 'french southern territories (the)',\n", - " 'gabon', \n", - " 'gambia',\n", - " 'germany',\n", - " 'ghana',\n", - " 'gibraltar',\n", - " 'greece',\n", - " 'greenland',\n", - " 'grenada',\n", - " # 'guadeloupe', # part of France, in Natural Earth data\n", - " 'guam',\n", - " 'guatemala',\n", - " 'haiti',\n", - " 'hungary',\n", - " 'guernsey',\n", - " 'guinea',\n", - " # 'guinea-bissau', # not sure why this isn't working\n", - " 'guyana',\n", - " 'honduras',\n", - " 'iceland',\n", - " 'india',\n", - " 'indonesia',\n", - " 'iran',\n", - " 'israel',\n", - " 'italy',\n", - " 'japan',\n", - " 'jordan',\n", - " 'kazakhstan',\n", - " 'kenya',\n", - " 'korea',\n", - " 'kuwait',\n", - " 'kyrgyzstan',\n", - " 'laos',\n", - " 'latvia',\n", - " 'lebanon',\n", - " 'lesotho',\n", - " 'liberia',\n", - " 'libya',\n", - " 'liechtenstein',\n", - " 'lithuania',\n", - " 'luxembourg',\n", - " # 'macao', # part of China, in Natural Earth data\n", - " 'macedonia',\n", - " 'madagascar',\n", - " 'malawi',\n", - " 'malaysia',\n", - " 'maldives',\n", - " 'mali',\n", - " 'malta',\n", - " 'marshall islands',\n", - " # 'martinique', # part of France, in Natural Earth data\n", - " 'mauritania',\n", - " 'mauritius',\n", - " # 'mayotte', # part of France, in Natural Earth data\n", - " 'mexico',\n", - " 'moldova',\n", - " 'montserrat',\n", - " 'monaco',\n", - " 'mongolia',\n", - " 'montenegro',\n", - " 'morocco',\n", - " 'mozambique',\n", - " 'myanmar',\n", - " 'namibia',\n", - " 'nauru',\n", - " 'nepal',\n", - " 'netherlands',\n", - " 'new caledonia',\n", - " 'new zealand',\n", - " 'nicaragua',\n", - " 'niger',\n", - " 'nigeria',\n", - " 'niue',\n", - " 'norfolk island',\n", - " 'northern mariana islands',\n", - " 'norway',\n", - " 'oman',\n", - " 'pakistan',\n", - " 'palau',\n", - " # 'palestine', # part of Israel, in Natural Earth data\n", - " 'panama',\n", - " 'papua new guinea',\n", - " 'paraguay',\n", - " 'peru',\n", - " # 'pitcairn', # part of UK, in Natural Earth data\n", - " 'philippines',\n", - " 'poland',\n", - " 'portugal',\n", - " 'puerto rico',\n", - " 'qatar',\n", - " # 'réunion', # part of France, in Natural Earth data\n", - " 'romania',\n", - " 'russia',\n", - " 'rwanda',\n", - " 'saint barthelemy',\n", - " # 'saint helena, ascension and tristan da cunha', # part of UK, in Natural Earth data\n", - " 'saint lucia',\n", - " 'saint martin',\n", - " # 'saint martin (french part)', part of Saint Martin, in Natural Earth data\n", - " 'saint pierre and miquelon',\n", - " 'saint vincent and the grenadines',\n", - " 'samoa',\n", - " 'san marino',\n", - " 'sao tome and principe',\n", - " 'saudi arabia',\n", - " 'senegal',\n", - " # 'serbia', # not sure why this isn't working!\n", - " 'seychelles',\n", - " 'sierra leone',\n", - " 'singapore',\n", - " # 'sint maarten (dutch part)', part of Saint Martin, in Natural Earth data\n", - " 'slovakia',\n", - " 'slovenia',\n", - " 'solomon islands',\n", - " 'somalia',\n", - " 'south africa',\n", - " # 'south sudan', # part of Sudan, in Natural Earth data\n", - " 'spain',\n", - " 'sri lanka',\n", - " 'sudan',\n", - " 'suriname',\n", - " # 'svalbard and jan mayen', # part of Norway, in Natural Earth data\n", - " 'sweden',\n", - " 'switzerland',\n", - " 'syria',\n", - " 'taiwan',\n", - " 'tajikistan',\n", - " 'tanzania',\n", - " 'thailand',\n", - " 'timorleste',\n", - " 'togo',\n", - " # 'tokelau', # part of New Zealand, in Natural Earth data\n", - " 'tonga',\n", - " 'trinidad and tobago',\n", - " 'tunisia',\n", - " 'turkey',\n", - " 'turkmenistan',\n", - " 'turks and caicos islands',\n", - " 'tuvalu',\n", - " 'uganda',\n", - " 'uk',\n", - " 'ukraine',\n", - " 'united arab emirates',\n", - " 'united states minor outlying islands',\n", - " 'uruguay',\n", - " 'usa',\n", - " 'uzbekistan',\n", - " 'vanuatu',\n", - " 'vatican',\n", - " 'venezuela',\n", - " 'vietnam',\n", - " 'british virgin islands',\n", - " 'united states virgin islands',\n", - " 'wallis and futuna',\n", - " 'yemen',\n", - " 'zambia',\n", - " 'zimbabwe'\n", + " \"afghanistan\",\n", + " \"aland\",\n", + " \"albania\",\n", + " \"algeria\",\n", + " \"american samoa\",\n", + " \"andorra\",\n", + " \"angola\",\n", + " \"anguilla\",\n", + " \"antarctica\",\n", + " \"antigua and barbuda\",\n", + " \"argentina\",\n", + " \"armenia\",\n", + " \"aruba\",\n", + " \"australia\",\n", + " \"austria\",\n", + " \"azerbaijan\",\n", + " \"the bahamas\",\n", + " \"bahrain\",\n", + " \"bangladesh\",\n", + " \"barbados\",\n", + " \"belarus\",\n", + " \"belgium\",\n", + " \"belize\",\n", + " \"benin\",\n", + " \"bermuda\",\n", + " \"bhutan\",\n", + " \"bolivia\",\n", + " # 'bonaire, sint eustatius and saba', # Part of Netherlands Antilles, part of Netherlands, accordsing to Natural Earth?\n", + " \"bosnia and herzegovina\",\n", + " \"botswana\",\n", + " # 'bouvet island', # part of Norway, in Natural Earth data\n", + " \"brazil\",\n", + " \"brunei\",\n", + " \"british indian ocean territory\",\n", + " \"bulgaria\",\n", + " \"burkina faso\",\n", + " \"burundi\",\n", + " \"cape verde\",\n", + " \"cambodia\",\n", + " \"cameroon\",\n", + " \"canada\",\n", + " \"cayman islands\",\n", + " \"central african republic\",\n", + " \"chad\",\n", + " \"chile\",\n", + " \"china\",\n", + " # 'christmas island', # part of British Indian Ocean Territory, according to Natural Earth\n", + " # 'cocos (keeling) islands', # part of British Indian Ocean Territory, according to Natural Earth\n", + " \"colombia\",\n", + " \"comoros\",\n", + " \"democratic republic of the congo\",\n", + " \"cook islands\",\n", + " \"costa rica\",\n", + " # 'côte d\\'ivoire', # not sure why this isn't working\n", + " \"croatia\",\n", + " \"cuba\",\n", + " \"curaçao\",\n", + " \"cyprus\",\n", + " \"czech republic\",\n", + " \"denmark\",\n", + " \"djibouti\",\n", + " \"dominica\",\n", + " \"dominican republic\",\n", + " \"ecuador\",\n", + " \"egypt\",\n", + " \"el salvador\",\n", + " \"equatorial guinea\",\n", + " \"eritrea\",\n", + " \"estonia\",\n", + " # 'eswatini', # not sure why this doesn't work — Swaziland isn't available to alias, either.\n", + " \"ethiopia\",\n", + " \"falkland islands\",\n", + " \"faroe islands\",\n", + " \"fiji\",\n", + " \"finland\",\n", + " \"france\",\n", + " # 'french guiana',\n", + " \"french polynesia\",\n", + " # 'french southern territories (the)',\n", + " \"gabon\",\n", + " \"gambia\",\n", + " \"germany\",\n", + " \"ghana\",\n", + " \"gibraltar\",\n", + " \"greece\",\n", + " \"greenland\",\n", + " \"grenada\",\n", + " # 'guadeloupe', # part of France, in Natural Earth data\n", + " \"guam\",\n", + " \"guatemala\",\n", + " \"haiti\",\n", + " \"hungary\",\n", + " \"guernsey\",\n", + " \"guinea\",\n", + " # 'guinea-bissau', # not sure why this isn't working\n", + " \"guyana\",\n", + " \"honduras\",\n", + " \"iceland\",\n", + " \"india\",\n", + " \"indonesia\",\n", + " \"iran\",\n", + " \"israel\",\n", + " \"italy\",\n", + " \"japan\",\n", + " \"jordan\",\n", + " \"kazakhstan\",\n", + " \"kenya\",\n", + " \"korea\",\n", + " \"kuwait\",\n", + " \"kyrgyzstan\",\n", + " \"laos\",\n", + " \"latvia\",\n", + " \"lebanon\",\n", + " \"lesotho\",\n", + " \"liberia\",\n", + " \"libya\",\n", + " \"liechtenstein\",\n", + " \"lithuania\",\n", + " \"luxembourg\",\n", + " # 'macao', # part of China, in Natural Earth data\n", + " \"macedonia\",\n", + " \"madagascar\",\n", + " \"malawi\",\n", + " \"malaysia\",\n", + " \"maldives\",\n", + " \"mali\",\n", + " \"malta\",\n", + " \"marshall islands\",\n", + " # 'martinique', # part of France, in Natural Earth data\n", + " \"mauritania\",\n", + " \"mauritius\",\n", + " # 'mayotte', # part of France, in Natural Earth data\n", + " \"mexico\",\n", + " \"moldova\",\n", + " \"montserrat\",\n", + " \"monaco\",\n", + " \"mongolia\",\n", + " \"montenegro\",\n", + " \"morocco\",\n", + " \"mozambique\",\n", + " \"myanmar\",\n", + " \"namibia\",\n", + " \"nauru\",\n", + " \"nepal\",\n", + " \"netherlands\",\n", + " \"new caledonia\",\n", + " \"new zealand\",\n", + " \"nicaragua\",\n", + " \"niger\",\n", + " \"nigeria\",\n", + " \"niue\",\n", + " \"norfolk island\",\n", + " \"northern mariana islands\",\n", + " \"norway\",\n", + " \"oman\",\n", + " \"pakistan\",\n", + " \"palau\",\n", + " # 'palestine', # part of Israel, in Natural Earth data\n", + " \"panama\",\n", + " \"papua new guinea\",\n", + " \"paraguay\",\n", + " \"peru\",\n", + " # 'pitcairn', # part of UK, in Natural Earth data\n", + " \"philippines\",\n", + " \"poland\",\n", + " \"portugal\",\n", + " \"puerto rico\",\n", + " \"qatar\",\n", + " # 'réunion', # part of France, in Natural Earth data\n", + " \"romania\",\n", + " \"russia\",\n", + " \"rwanda\",\n", + " \"saint barthelemy\",\n", + " # 'saint helena, ascension and tristan da cunha', # part of UK, in Natural Earth data\n", + " \"saint lucia\",\n", + " \"saint martin\",\n", + " # 'saint martin (french part)', part of Saint Martin, in Natural Earth data\n", + " \"saint pierre and miquelon\",\n", + " \"saint vincent and the grenadines\",\n", + " \"samoa\",\n", + " \"san marino\",\n", + " \"sao tome and principe\",\n", + " \"saudi arabia\",\n", + " \"senegal\",\n", + " # 'serbia', # not sure why this isn't working!\n", + " \"seychelles\",\n", + " \"sierra leone\",\n", + " \"singapore\",\n", + " # 'sint maarten (dutch part)', part of Saint Martin, in Natural Earth data\n", + " \"slovakia\",\n", + " \"slovenia\",\n", + " \"solomon islands\",\n", + " \"somalia\",\n", + " \"south africa\",\n", + " # 'south sudan', # part of Sudan, in Natural Earth data\n", + " \"spain\",\n", + " \"sri lanka\",\n", + " \"sudan\",\n", + " \"suriname\",\n", + " # 'svalbard and jan mayen', # part of Norway, in Natural Earth data\n", + " \"sweden\",\n", + " \"switzerland\",\n", + " \"syria\",\n", + " \"taiwan\",\n", + " \"tajikistan\",\n", + " \"tanzania\",\n", + " \"thailand\",\n", + " \"timorleste\",\n", + " \"togo\",\n", + " # 'tokelau', # part of New Zealand, in Natural Earth data\n", + " \"tonga\",\n", + " \"trinidad and tobago\",\n", + " \"tunisia\",\n", + " \"turkey\",\n", + " \"turkmenistan\",\n", + " \"turks and caicos islands\",\n", + " \"tuvalu\",\n", + " \"uganda\",\n", + " \"uk\",\n", + " \"ukraine\",\n", + " \"united arab emirates\",\n", + " \"united states minor outlying islands\",\n", + " \"uruguay\",\n", + " \"usa\",\n", + " \"uzbekistan\",\n", + " \"vanuatu\",\n", + " \"vatican\",\n", + " \"venezuela\",\n", + " \"vietnam\",\n", + " \"british virgin islands\",\n", + " \"united states virgin islands\",\n", + " \"wallis and futuna\",\n", + " \"yemen\",\n", + " \"zambia\",\n", + " \"zimbabwe\",\n", "]\n", "\n", "# country name used in dataset\n", @@ -835,17 +841,22 @@ "\n", "# CSV files that are defined later in the notebook:\n", "region_maps = [\n", - " 'france_regions',\n", - " 'italy_regions',\n", - " 'philippines_regions',\n", - " 'turkey_regions'\n", + " \"france_regions\",\n", + " \"italy_regions\",\n", + " \"philippines_regions\",\n", + " \"turkey_regions\",\n", "]\n", "\n", "# Make sure all country names are covered:\n", - "invalid_countries = [x for x in countries if (country_name_aliases.get(x, x) not in df[\"admin\"].str.lower().unique()) and (x not in region_maps)]\n", + "invalid_countries = [\n", + " x\n", + " for x in countries\n", + " if (country_name_aliases.get(x, x) not in df[\"admin\"].str.lower().unique())\n", + " and (x not in region_maps)\n", + "]\n", "\n", "if invalid_countries:\n", - " print(f\"Following country names are not valid: {invalid_countries}\")" + " print(f\"Following country names are not valid: {invalid_countries}\")" ] }, { @@ -865,11 +876,11 @@ "source": [ "# Plotting style defaults:\n", "plot_styles = {\n", - " 'edgecolor': 'black', # Sets the color of the border of each geometry in the plot to black\n", - " 'column': 'name', # Specifies the column to be used for coloring the geometries based on its values\n", - " 'legend': False, # Disables the legend for the plot\n", - " 'cmap': 'tab20', # Sets the colormap to 'tab20' which provides a distinct set of colors for visual differentiation\n", - " 'linewidth': 0.25 # Sets the thickness of the edges/borders of the geometries\n", + " \"edgecolor\": \"black\", # Sets the color of the border of each geometry in the plot to black\n", + " \"column\": \"name\", # Specifies the column to be used for coloring the geometries based on its values\n", + " \"legend\": False, # Disables the legend for the plot\n", + " \"cmap\": \"tab20\", # Sets the colormap to 'tab20' which provides a distinct set of colors for visual differentiation\n", + " \"linewidth\": 0.25, # Sets the thickness of the edges/borders of the geometries\n", "}" ] }, @@ -890,7 +901,10 @@ " gdf = df[df[\"admin\"].str.lower() == country_alias]\n", " return gdf.copy()\n", "\n", - "def plot_all_countries(countries, subplot_width=5, subplot_height=5, base_tolerance=0.01):\n", + "\n", + "def plot_all_countries(\n", + " countries, subplot_width=5, subplot_height=5, base_tolerance=0.01\n", + "):\n", " if not countries:\n", " print(\"No countries to plot.\")\n", " return\n", @@ -900,24 +914,28 @@ " nrows = max((num_countries + ncols - 1) // ncols, 1) # Ensure at least one row\n", "\n", " figsize = (ncols * subplot_width, nrows * subplot_height)\n", - " print(f\"Debug Info: figsize={figsize}, ncols={ncols}, nrows={nrows}, num_countries={num_countries}\")\n", + " print(\n", + " f\"Debug Info: figsize={figsize}, ncols={ncols}, nrows={nrows}, num_countries={num_countries}\"\n", + " )\n", "\n", " plt.figure(figsize=figsize)\n", - " plt.rc('font', size=24) # Sets the font size globally\n", + " plt.rc(\"font\", size=24) # Sets the font size globally\n", "\n", " for i, country in enumerate(countries):\n", " ax = plt.subplot(nrows, ncols, i + 1)\n", " gdf = get_gdf(country)\n", " if not gdf.empty:\n", " gdf_projected = gdf.to_crs(epsg=6933)\n", - " area = gdf_projected['geometry'].area.sum()\n", - " dynamic_tolerance = base_tolerance * (area / 1e6) ** 0.8 \n", - " gdf_projected['geometry'] = gdf_projected['geometry'].simplify(tolerance=dynamic_tolerance, preserve_topology=True)\n", + " area = gdf_projected[\"geometry\"].area.sum()\n", + " dynamic_tolerance = base_tolerance * (area / 1e6) ** 0.8\n", + " gdf_projected[\"geometry\"] = gdf_projected[\"geometry\"].simplify(\n", + " tolerance=dynamic_tolerance, preserve_topology=True\n", + " )\n", "\n", " gdf.plot(ax=ax, **plot_styles)\n", - " ax.set_aspect('equal', adjustable='datalim')\n", + " ax.set_aspect(\"equal\", adjustable=\"datalim\")\n", " else:\n", - " ax.text(0.5, 0.5, country, ha='center', va='center', fontsize=24)\n", + " ax.text(0.5, 0.5, country, ha=\"center\", va=\"center\", fontsize=24)\n", " ax.set_title(country)\n", "\n", " plt.tight_layout()\n", @@ -929,7 +947,7 @@ "# It's disabled because it takes a while to run.\n", "plot_preview = False\n", "if plot_preview:\n", - " plot_all_countries()" + " plot_all_countries()" ] }, { @@ -996,7 +1014,7 @@ } ], "source": [ - "usa = df[df['adm0_a3'] == 'USA']\n", + "usa = df[df[\"adm0_a3\"] == \"USA\"]\n", "not speed_run and usa.plot(**plot_styles)" ] }, @@ -1031,7 +1049,6 @@ ], "source": [ "def reposition(df, idx, xoff=None, yoff=None, xscale=None, yscale=None, simplify=None):\n", - "\n", " def move_and_scale(series):\n", " if xoff or yoff:\n", " series = shapely.affinity.translate(series, xoff or 0, yoff or 0)\n", @@ -1041,14 +1058,14 @@ " series = series.simplify(simplify, preserve_topology=False)\n", " return series\n", "\n", - " df.loc[idx, 'geometry'] = df.loc[idx, 'geometry'].apply(move_and_scale)\n", + " df.loc[idx, \"geometry\"] = df.loc[idx, \"geometry\"].apply(move_and_scale)\n", "\n", "\n", "usa_copy = usa.copy()\n", - "reposition(usa_copy, usa.name == 'Hawaii', 51, 5.5)\n", - "reposition(usa_copy, usa.name == 'Alaska', 35, -34, 0.35, 0.35)\n", + "reposition(usa_copy, usa.name == \"Hawaii\", 51, 5.5)\n", + "reposition(usa_copy, usa.name == \"Alaska\", 35, -34, 0.35, 0.35)\n", "\n", - "not speed_run and usa_copy.plot(figsize=(8,8), **plot_styles)" + "not speed_run and usa_copy.plot(figsize=(8, 8), **plot_styles)" ] }, { @@ -1231,16 +1248,20 @@ "source": [ "# Chinese Special Administrative Regions\n", "china_sars = df_admin0_10m.loc[\n", - " df_admin0_10m.name_en.isin(['Taiwan', 'Hong Kong', 'Macau']),\n", - " [x for x in df_admin0_10m.columns if x in df.columns]\n", + " df_admin0_10m.name_en.isin([\"Taiwan\", \"Hong Kong\", \"Macau\"]),\n", + " [x for x in df_admin0_10m.columns if x in df.columns],\n", "]\n", - "china_sars = china_sars.merge(pd.DataFrame(\n", - " data={\n", - " \"name_en\": [\"Taiwan\", \"Hong Kong\", \"Macau\"],\n", - " \"name_zh\": [\"中国台湾\", \"香港特别行政区\", \"澳门特别行政区\"],\n", - " \"iso_3166_2\": [\"CN-71\", \"CN-91\", \"CN-92\"],\n", - " },\n", - "), on=\"name_en\", how=\"left\")\n", + "china_sars = china_sars.merge(\n", + " pd.DataFrame(\n", + " data={\n", + " \"name_en\": [\"Taiwan\", \"Hong Kong\", \"Macau\"],\n", + " \"name_zh\": [\"中国台湾\", \"香港特别行政区\", \"澳门特别行政区\"],\n", + " \"iso_3166_2\": [\"CN-71\", \"CN-91\", \"CN-92\"],\n", + " },\n", + " ),\n", + " on=\"name_en\",\n", + " how=\"left\",\n", + ")\n", "china_sars" ] }, @@ -1317,16 +1338,20 @@ "outputs": [], "source": [ "finland_aland = df_admin0_10m.loc[\n", - " df_admin0_10m.name_en.isin(['Åland']),\n", - " [x for x in df_admin0_10m.columns if x in df.columns]\n", + " df_admin0_10m.name_en.isin([\"Åland\"]),\n", + " [x for x in df_admin0_10m.columns if x in df.columns],\n", "]\n", - "finland_aland = finland_aland.merge(pd.DataFrame(\n", - " data={\n", - " \"name_en\": [\"Åland\"],\n", - " \"name_fi\": [\"Ahvenanmaan maakunta\"],\n", - " \"iso_3166_2\": [\"FI-01\"],\n", - " },\n", - "), on=\"name_en\", how=\"left\")\n" + "finland_aland = finland_aland.merge(\n", + " pd.DataFrame(\n", + " data={\n", + " \"name_en\": [\"Åland\"],\n", + " \"name_fi\": [\"Ahvenanmaan maakunta\"],\n", + " \"iso_3166_2\": [\"FI-01\"],\n", + " },\n", + " ),\n", + " on=\"name_en\",\n", + " how=\"left\",\n", + ")" ] }, { @@ -1373,7 +1398,7 @@ "finland_copy = finland_copy.drop([\"name_fi\"], axis=1)\n", "\n", "# Plotting the DataFrame\n", - "not speed_run and finland_copy.plot(figsize=(7, 7), **plot_styles)\n" + "not speed_run and finland_copy.plot(figsize=(7, 7), **plot_styles)" ] }, { @@ -1416,24 +1441,27 @@ } ], "source": [ + "russia_copy = df[df[\"adm0_a3\"] == \"RUS\"].copy()\n", + "crimea = russia_copy[russia_copy[\"iso_3166_2\"] == \"UA-43\"].copy()\n", + "sevastopol = russia_copy[russia_copy[\"iso_3166_2\"] == \"UA-40\"].copy()\n", "\n", - "russia_copy = df[df['adm0_a3'] == 'RUS'].copy()\n", - "crimea = russia_copy[russia_copy['iso_3166_2'] == 'UA-43'].copy()\n", - "sevastopol = russia_copy[russia_copy['iso_3166_2'] == 'UA-40'].copy()\n", - "\n", - "ukraine_with_crimea = pd.concat([df[df['adm0_a3'] == 'UKR'], crimea, sevastopol], ignore_index=True)\n", + "ukraine_with_crimea = pd.concat(\n", + " [df[df[\"adm0_a3\"] == \"UKR\"], crimea, sevastopol], ignore_index=True\n", + ")\n", "\n", "# kyiv = ukraine_with_crimea[ukraine_with_crimea['iso_3166_2'] == 'UA-30']\n", "# kyiv_oblast = ukraine_with_crimea[ukraine_with_crimea['iso_3166_2'] == 'UA-32']\n", "\n", "# Update the name of the Kyiv city entry\n", - "ukraine_with_crimea.loc[ukraine_with_crimea['iso_3166_2'] == 'UA-30', 'name'] = 'Kyiv'\n", + "ukraine_with_crimea.loc[ukraine_with_crimea[\"iso_3166_2\"] == \"UA-30\", \"name\"] = \"Kyiv\"\n", "\n", "# # Update the name of the Kyiv Oblast entry\n", - "ukraine_with_crimea.loc[ukraine_with_crimea['iso_3166_2'] == 'UA-32', 'name'] = 'Kyiv Oblast'\n", + "ukraine_with_crimea.loc[ukraine_with_crimea[\"iso_3166_2\"] == \"UA-32\", \"name\"] = (\n", + " \"Kyiv Oblast\"\n", + ")\n", "\n", "# Plotting the DataFrame\n", - "not speed_run and ukraine_with_crimea.plot(figsize=(7,7), **plot_styles)" + "not speed_run and ukraine_with_crimea.plot(figsize=(7, 7), **plot_styles)" ] }, { @@ -1459,7 +1487,7 @@ } ], "source": [ - "india = df[df['admin'] == 'India']\n", + "india = df[df[\"admin\"] == \"India\"]\n", "india_copy = india.copy()\n", "\n", "# Download and load the GeoJSON file for India\n", @@ -1468,13 +1496,22 @@ "try:\n", " india_gdf = gpd.read_file(india_geojson_url)\n", " # Rename column to 'ST_ID' to 'iso_3166_2' for consistency\n", - " india_gdf.rename(columns={'ST_ID': 'iso_3166_2'}, inplace=True)\n", + " india_gdf.rename(columns={\"ST_ID\": \"iso_3166_2\"}, inplace=True)\n", " # Update the geometry for the states of Jammu and Kashmir and Ladakh\n", - " india_copy.loc[india_copy['iso_3166_2'] == 'IN-JK', 'geometry'] = india_gdf[india_gdf['iso_3166_2'] == 'IN-JK'].dissolve(by='iso_3166_2').reset_index()[\"geometry\"].values\n", - " india_copy.loc[india_copy['iso_3166_2'] == 'IN-LA', 'geometry'] = india_gdf[india_gdf['iso_3166_2'] == 'IN-LA'].dissolve(by='iso_3166_2').reset_index()[\"geometry\"].values\n", + " india_copy.loc[india_copy[\"iso_3166_2\"] == \"IN-JK\", \"geometry\"] = (\n", + " india_gdf[india_gdf[\"iso_3166_2\"] == \"IN-JK\"]\n", + " .dissolve(by=\"iso_3166_2\")\n", + " .reset_index()[\"geometry\"]\n", + " .values\n", + " )\n", + " india_copy.loc[india_copy[\"iso_3166_2\"] == \"IN-LA\", \"geometry\"] = (\n", + " india_gdf[india_gdf[\"iso_3166_2\"] == \"IN-LA\"]\n", + " .dissolve(by=\"iso_3166_2\")\n", + " .reset_index()[\"geometry\"]\n", + " .values\n", + " )\n", " print(\"GeoJSON file for India downloaded and loaded successfully.\")\n", "except Exception as e:\n", - " \n", " print(f\"Unable to download or load the GeoJSON file for India. Error: {str(e)}\")\n", " print(\"Please download the file from the URL and try again.\")" ] @@ -1550,7 +1587,7 @@ } ], "source": [ - "norway = df[df['adm0_a3'] == 'NOR']\n", + "norway = df[df[\"adm0_a3\"] == \"NOR\"]\n", "not speed_run and norway.plot(**plot_styles)" ] }, @@ -1587,8 +1624,8 @@ "norway_copy = norway.copy()\n", "\n", "norway_copy = norway_copy[norway_copy[\"iso_3166_2\"] != \"NO-X01~\"]\n", - "reposition(norway_copy, norway.name == 'Svalbard', -12, -8, 0.5, 0.5)\n", - "#reposition(norway_copy, norway.name == 'Nordland', 10, 0, 2, 2)\n", + "reposition(norway_copy, norway.name == \"Svalbard\", -12, -8, 0.5, 0.5)\n", + "# reposition(norway_copy, norway.name == 'Nordland', 10, 0, 2, 2)\n", "\n", "not speed_run and norway_copy.plot(**plot_styles)" ] @@ -1632,7 +1669,7 @@ } ], "source": [ - "portugal = df[df.admin == 'Portugal']\n", + "portugal = df[df.admin == \"Portugal\"]\n", "not speed_run and portugal.plot(**plot_styles)" ] }, @@ -1668,8 +1705,8 @@ "source": [ "portugal_copy = portugal.copy()\n", "\n", - "reposition(portugal_copy, portugal.name == 'Azores', 11, 0)\n", - "reposition(portugal_copy, portugal.name == 'Madeira', 6, 2, simplify=0.015)\n", + "reposition(portugal_copy, portugal.name == \"Azores\", 11, 0)\n", + "reposition(portugal_copy, portugal.name == \"Madeira\", 6, 2, simplify=0.015)\n", "\n", "not speed_run and portugal_copy.plot(figsize=(8, 8), **plot_styles)" ] @@ -1713,7 +1750,7 @@ } ], "source": [ - "spain = df[df.admin == 'Spain']\n", + "spain = df[df.admin == \"Spain\"]\n", "not speed_run and spain.plot(**plot_styles)" ] }, @@ -1749,7 +1786,9 @@ "source": [ "spain_copy = spain.copy()\n", "\n", - "reposition(spain_copy, spain.name.isin(['Las Palmas', 'Santa Cruz de Tenerife']), 3, 7, 1, 1)\n", + "reposition(\n", + " spain_copy, spain.name.isin([\"Las Palmas\", \"Santa Cruz de Tenerife\"]), 3, 7, 1, 1\n", + ")\n", "\n", "not speed_run and spain_copy.plot(figsize=(8, 8), **plot_styles)" ] @@ -1793,7 +1832,7 @@ } ], "source": [ - "russia = df[df.admin == 'Russia']\n", + "russia = df[df.admin == \"Russia\"]\n", "not speed_run and russia.plot(**plot_styles)" ] }, @@ -1860,16 +1899,17 @@ " # Combine all moved geometries into a single geometry\n", " return shapely.ops.unary_union(moved_geom)\n", "\n", + "\n", "# Applying the function to the DataFrame\n", "russia_copy = russia.copy()\n", - "russia_copy.loc[\n", - " russia.name == 'Chukchi Autonomous Okrug', 'geometry'\n", - "] = russia_copy.loc[\n", - " russia.name == 'Chukchi Autonomous Okrug', 'geometry'\n", - "].apply(shift_geom)\n", + "russia_copy.loc[russia.name == \"Chukchi Autonomous Okrug\", \"geometry\"] = (\n", + " russia_copy.loc[\n", + " russia.name == \"Chukchi Autonomous Okrug\", \"geometry\"\n", + " ].apply(shift_geom)\n", + ")\n", "\n", "# Plotting\n", - "not speed_run and russia_copy.plot(figsize=(20, 20), **plot_styles)\n" + "not speed_run and russia_copy.plot(figsize=(20, 20), **plot_styles)" ] }, { @@ -1913,8 +1953,10 @@ } ], "source": [ - "turkey = df[df.admin == 'Turkey'][['iso_3166_2','geometry']]\n", - "not speed_run and turkey.plot(**{key: value for key, value in plot_styles.items() if key != 'column'})" + "turkey = df[df.admin == \"Turkey\"][[\"iso_3166_2\", \"geometry\"]]\n", + "not speed_run and turkey.plot(\n", + " **{key: value for key, value in plot_styles.items() if key != \"column\"}\n", + ")" ] }, { @@ -1926,50 +1968,77 @@ "# NUTS - 1 Codes for Turkey and correspong region - city names\n", "\n", "region_dict = {\n", - " 'TR1': ['TR-34'],\n", - " 'TR2': ['TR-59', 'TR-22', 'TR-39', 'TR-10', 'TR-17'],\n", - " 'TR3': ['TR-35', 'TR-09', 'TR-20', 'TR-48', 'TR-45', 'TR-03', 'TR-43', 'TR-64'],\n", - " 'TR4': ['TR-16', 'TR-26', 'TR-11', 'TR-41', 'TR-54', 'TR-81', 'TR-14', 'TR-77'],\n", - " 'TR5': ['TR-06', 'TR-42', 'TR-70'],\n", - " 'TR6': ['TR-07', 'TR-32', 'TR-15', 'TR-01', 'TR-33', 'TR-31', 'TR-46', 'TR-80'],\n", - " 'TR7': ['TR-71', 'TR-68', 'TR-51', 'TR-50', 'TR-40', 'TR-38', 'TR-58', 'TR-66'],\n", - " 'TR8': ['TR-67', 'TR-78', 'TR-74', 'TR-37', 'TR-18', 'TR-57', 'TR-55', 'TR-60', 'TR-19', 'TR-05'],\n", - " 'TR9': ['TR-61', 'TR-52', 'TR-28', 'TR-53', 'TR-08', 'TR-29'],\n", - " 'TRA': ['TR-25', 'TR-24', 'TR-69', 'TR-04', 'TR-36', 'TR-76', 'TR-75'],\n", - " 'TRB': ['TR-44', 'TR-23', 'TR-12', 'TR-62', 'TR-65', 'TR-49', 'TR-13', 'TR-30'],\n", - " 'TRC': ['TR-27', 'TR-02', 'TR-79', 'TR-63', 'TR-21', 'TR-47', 'TR-72', 'TR-73', 'TR-56']}\n", + " \"TR1\": [\"TR-34\"],\n", + " \"TR2\": [\"TR-59\", \"TR-22\", \"TR-39\", \"TR-10\", \"TR-17\"],\n", + " \"TR3\": [\"TR-35\", \"TR-09\", \"TR-20\", \"TR-48\", \"TR-45\", \"TR-03\", \"TR-43\", \"TR-64\"],\n", + " \"TR4\": [\"TR-16\", \"TR-26\", \"TR-11\", \"TR-41\", \"TR-54\", \"TR-81\", \"TR-14\", \"TR-77\"],\n", + " \"TR5\": [\"TR-06\", \"TR-42\", \"TR-70\"],\n", + " \"TR6\": [\"TR-07\", \"TR-32\", \"TR-15\", \"TR-01\", \"TR-33\", \"TR-31\", \"TR-46\", \"TR-80\"],\n", + " \"TR7\": [\"TR-71\", \"TR-68\", \"TR-51\", \"TR-50\", \"TR-40\", \"TR-38\", \"TR-58\", \"TR-66\"],\n", + " \"TR8\": [\n", + " \"TR-67\",\n", + " \"TR-78\",\n", + " \"TR-74\",\n", + " \"TR-37\",\n", + " \"TR-18\",\n", + " \"TR-57\",\n", + " \"TR-55\",\n", + " \"TR-60\",\n", + " \"TR-19\",\n", + " \"TR-05\",\n", + " ],\n", + " \"TR9\": [\"TR-61\", \"TR-52\", \"TR-28\", \"TR-53\", \"TR-08\", \"TR-29\"],\n", + " \"TRA\": [\"TR-25\", \"TR-24\", \"TR-69\", \"TR-04\", \"TR-36\", \"TR-76\", \"TR-75\"],\n", + " \"TRB\": [\"TR-44\", \"TR-23\", \"TR-12\", \"TR-62\", \"TR-65\", \"TR-49\", \"TR-13\", \"TR-30\"],\n", + " \"TRC\": [\n", + " \"TR-27\",\n", + " \"TR-02\",\n", + " \"TR-79\",\n", + " \"TR-63\",\n", + " \"TR-21\",\n", + " \"TR-47\",\n", + " \"TR-72\",\n", + " \"TR-73\",\n", + " \"TR-56\",\n", + " ],\n", + "}\n", "\n", "# Region names corresponding to NUTS-1\n", "\n", - "region_name_dict = {'TR1':'İstanbul',\n", - " 'TR2':'Batı Marmara',\n", - " 'TR3':'Ege',\n", - " 'TR4':'Doğu Marmara',\n", - " 'TR5':'Batı Anadolu',\n", - " 'TR6':'Akdeniz',\n", - " 'TR7':'Orta Anadolu',\n", - " 'TR8':'Batı Karadeniz',\n", - " 'TR9':'Doğu Karadeniz',\n", - " 'TRA':'Kuzeydoğu Anadolu',\n", - " 'TRC':'Güneydoğu Anadolu',\n", - " 'TRB':'Ortadoğu Anadolu'\n", - " }\n", + "region_name_dict = {\n", + " \"TR1\": \"İstanbul\",\n", + " \"TR2\": \"Batı Marmara\",\n", + " \"TR3\": \"Ege\",\n", + " \"TR4\": \"Doğu Marmara\",\n", + " \"TR5\": \"Batı Anadolu\",\n", + " \"TR6\": \"Akdeniz\",\n", + " \"TR7\": \"Orta Anadolu\",\n", + " \"TR8\": \"Batı Karadeniz\",\n", + " \"TR9\": \"Doğu Karadeniz\",\n", + " \"TRA\": \"Kuzeydoğu Anadolu\",\n", + " \"TRC\": \"Güneydoğu Anadolu\",\n", + " \"TRB\": \"Ortadoğu Anadolu\",\n", + "}\n", "\n", "\n", "def create_region_polygons(region_dict, turkey_gdf):\n", " # Create a reverse dictionary where city codes map to region codes\n", - " city_to_region = {city_code: region_code for region_code, city_codes in region_dict.items() for city_code in city_codes}\n", + " city_to_region = {\n", + " city_code: region_code\n", + " for region_code, city_codes in region_dict.items()\n", + " for city_code in city_codes\n", + " }\n", "\n", " # Create a new column 'REGION' in the GeoDataFrame that maps each city to its region\n", - " turkey_gdf['REGION'] = turkey_gdf['iso_3166_2'].map(city_to_region)\n", + " turkey_gdf[\"REGION\"] = turkey_gdf[\"iso_3166_2\"].map(city_to_region)\n", "\n", " # Dissolve the GeoDataFrame on the 'REGION' column to combine city polygons into region polygons\n", - " region_gdf = turkey_gdf.dissolve(by='REGION')\n", + " region_gdf = turkey_gdf.dissolve(by=\"REGION\")\n", "\n", " # Reset the index of the new GeoDataFrame\n", " region_gdf.reset_index(inplace=True)\n", - " \n", - " return region_gdf.drop(columns=['iso_3166_2'])" + "\n", + " return region_gdf.drop(columns=[\"iso_3166_2\"])" ] }, { @@ -1981,10 +2050,10 @@ "turkey_regions = create_region_polygons(region_dict, turkey)\n", "\n", "# Rename 'REGION' column to 'ISO'\n", - "turkey_regions = turkey_regions.rename(columns={'REGION': 'iso_3166_2'})\n", + "turkey_regions = turkey_regions.rename(columns={\"REGION\": \"iso_3166_2\"})\n", "\n", "# Map the region_name_dict to a new 'NAME_1' column\n", - "turkey_regions['name'] = turkey_regions['iso_3166_2'].map(region_name_dict)" + "turkey_regions[\"name\"] = turkey_regions[\"iso_3166_2\"].map(region_name_dict)" ] }, { @@ -2056,7 +2125,7 @@ } ], "source": [ - "france = df[df.admin == 'France']\n", + "france = df[df.admin == \"France\"]\n", "not speed_run and france.plot(**plot_styles)" ] }, @@ -2086,12 +2155,13 @@ "outputs": [], "source": [ "def replace_name(df, old, new):\n", - " if old in list(df.name): \n", + " if old in list(df.name):\n", " index = df[df.name == old].index[0]\n", - " df.at[index, 'name'] = new\n", - " \n", - "replace_name(france, 'Seien-et-Marne', 'Seine-et-Marne')\n", - "replace_name(france, 'Haute-Rhin', 'Haut-Rhin')" + " df.at[index, \"name\"] = new\n", + "\n", + "\n", + "replace_name(france, \"Seien-et-Marne\", \"Seine-et-Marne\")\n", + "replace_name(france, \"Haute-Rhin\", \"Haut-Rhin\")" ] }, { @@ -2125,11 +2195,11 @@ ], "source": [ "france_copy = france.copy()\n", - "reposition(france_copy, france.name=='Guadeloupe', 57.4, 25.4, 1.5, 1.5)\n", - "reposition(france_copy, france.name=='Martinique', 58.4, 27.1, 1.5, 1.5)\n", - "reposition(france_copy, france.name=='Guyane française', 52, 37.7, 0.35, 0.35)\n", - "reposition(france_copy, france.name=='La Réunion', -55, 62.8, 1.5, 1.5)\n", - "reposition(france_copy, france.name=='Mayotte', -43, 54.3, 1.5, 1.5)\n", + "reposition(france_copy, france.name == \"Guadeloupe\", 57.4, 25.4, 1.5, 1.5)\n", + "reposition(france_copy, france.name == \"Martinique\", 58.4, 27.1, 1.5, 1.5)\n", + "reposition(france_copy, france.name == \"Guyane française\", 52, 37.7, 0.35, 0.35)\n", + "reposition(france_copy, france.name == \"La Réunion\", -55, 62.8, 1.5, 1.5)\n", + "reposition(france_copy, france.name == \"Mayotte\", -43, 54.3, 1.5, 1.5)\n", "\n", "not speed_run and france_copy.plot(figsize=(8, 8), **plot_styles)" ] @@ -2147,7 +2217,7 @@ "metadata": {}, "outputs": [], "source": [ - "france_regions = france_copy[['geometry','region_cod','region']]" + "france_regions = france_copy[[\"geometry\", \"region_cod\", \"region\"]]" ] }, { @@ -2156,9 +2226,11 @@ "metadata": {}, "outputs": [], "source": [ - "france_regions = france_regions.dissolve(by=['region_cod', 'region']).reset_index()\n", + "france_regions = france_regions.dissolve(by=[\"region_cod\", \"region\"]).reset_index()\n", "\n", - "france_regions = france_regions.rename(columns={'region': 'name', 'region_cod': 'iso_3166_2'})" + "france_regions = france_regions.rename(\n", + " columns={\"region\": \"name\", \"region_cod\": \"iso_3166_2\"}\n", + ")" ] }, { @@ -2211,11 +2283,13 @@ "metadata": {}, "outputs": [], "source": [ - "italy_regions = df[df.admin == 'Italy'][['geometry','region_cod','region']]\n", + "italy_regions = df[df.admin == \"Italy\"][[\"geometry\", \"region_cod\", \"region\"]]\n", "\n", - "italy_regions = italy_regions.dissolve(by=['region_cod', 'region']).reset_index()\n", + "italy_regions = italy_regions.dissolve(by=[\"region_cod\", \"region\"]).reset_index()\n", "\n", - "italy_regions = italy_regions.rename(columns={'region': 'name', 'region_cod': 'iso_3166_2'})" + "italy_regions = italy_regions.rename(\n", + " columns={\"region\": \"name\", \"region_cod\": \"iso_3166_2\"}\n", + ")" ] }, { @@ -2266,7 +2340,7 @@ "def apply_bounds(df, northwest, southeast):\n", " x1, y1 = northwest\n", " x2, y2 = southeast\n", - " boundry = shapely.geometry.Polygon([(x1, y1),(x1, y2), (x2, y2), (x2, y1)])\n", + " boundry = shapely.geometry.Polygon([(x1, y1), (x1, y2), (x2, y2), (x2, y1)])\n", " df = df.copy()\n", " return df[df.geometry.apply(lambda x: boundry.contains(x))]" ] @@ -2301,7 +2375,7 @@ } ], "source": [ - "netherlands = df[df.admin == 'Netherlands']\n", + "netherlands = df[df.admin == \"Netherlands\"]\n", "not speed_run and netherlands.plot(**plot_styles)" ] }, @@ -2378,7 +2452,7 @@ } ], "source": [ - "uk = df[df.admin == 'United Kingdom']\n", + "uk = df[df.admin == \"United Kingdom\"]\n", "not speed_run and uk.plot(**plot_styles)" ] }, @@ -2465,7 +2539,9 @@ } ], "source": [ - "philippines_copy = philippines_copy.rename(columns={'NAME_1': 'name','ISO': 'iso_3166_2'})\n", + "philippines_copy = philippines_copy.rename(\n", + " columns={\"NAME_1\": \"name\", \"ISO\": \"iso_3166_2\"}\n", + ")\n", "not speed_run and philippines_copy.plot(**plot_styles)" ] }, @@ -2484,7 +2560,9 @@ "metadata": {}, "outputs": [], "source": [ - "philippines_regions = df[df.admin == 'Philippines'][['geometry','region_cod','region']]" + "philippines_regions = df[df.admin == \"Philippines\"][\n", + " [\"geometry\", \"region_cod\", \"region\"]\n", + "]" ] }, { @@ -2493,8 +2571,12 @@ "metadata": {}, "outputs": [], "source": [ - "philippines_regions = philippines_regions.dissolve(by=['region_cod', 'region']).reset_index()\n", - "philippines_regions = philippines_regions.rename(columns={'region': 'name', 'region_cod': 'iso_3166_2'})" + "philippines_regions = philippines_regions.dissolve(\n", + " by=[\"region_cod\", \"region\"]\n", + ").reset_index()\n", + "philippines_regions = philippines_regions.rename(\n", + " columns={\"region\": \"name\", \"region_cod\": \"iso_3166_2\"}\n", + ")" ] }, { @@ -2517,10 +2599,12 @@ } ], "source": [ - "philippines_regions['name'] = philippines_regions['name'].replace({\n", - " 'Dinagat Islands (Region XIII)': 'Caraga Administrative Region (Region XIII)',\n", - " 'Autonomous Region in Muslim Mindanao (ARMM)': 'Bangsamoro Autonomous Region in Muslim Mindanao (BARMM)'\n", - "})" + "philippines_regions[\"name\"] = philippines_regions[\"name\"].replace(\n", + " {\n", + " \"Dinagat Islands (Region XIII)\": \"Caraga Administrative Region (Region XIII)\",\n", + " \"Autonomous Region in Muslim Mindanao (ARMM)\": \"Bangsamoro Autonomous Region in Muslim Mindanao (BARMM)\",\n", + " }\n", + ")" ] }, { @@ -2550,7 +2634,7 @@ } ], "source": [ - "not speed_run and philippines_regions.plot(figsize = (10, 7), **plot_styles)" + "not speed_run and philippines_regions.plot(figsize=(10, 7), **plot_styles)" ] }, { @@ -2637,21 +2721,21 @@ } ], "source": [ - "vietnam = df[df.admin == 'Vietnam']\n", + "vietnam = df[df.admin == \"Vietnam\"]\n", "vietnam_copy = vietnam.copy()\n", - "replace_name(vietnam_copy, 'Ðong Tháp', 'Đồng Tháp')\n", - "replace_name(vietnam_copy, 'Son La', 'Sơn La')\n", - "replace_name(vietnam_copy, 'Ha Tinh', 'Hà Tĩnh')\n", - "replace_name(vietnam_copy, 'Quàng Nam', 'Quảng Nam')\n", - "replace_name(vietnam_copy, 'Lai Chau', 'Lai Châu')\n", - "replace_name(vietnam_copy, 'Hồ Chí Minh city', 'Thành phố Hồ Chí Minh')\n", - "replace_name(vietnam_copy, 'Hau Giang', 'Hậu Giang')\n", - "replace_name(vietnam_copy, 'Ha Noi', 'Hà Nội')\n", - "replace_name(vietnam_copy, 'Can Tho', 'Cần Thơ')\n", - "replace_name(vietnam_copy, 'Đông Nam Bộ', 'Đồng Nai')\n", - "replace_name(vietnam_copy, 'Đông Bắc', 'Bắc Kạn')\n", - "replace_name(vietnam_copy, 'Đồng Bằng Sông Hồng', 'Hưng Yên')\n", - "for i in vietnam_copy['name']:\n", + "replace_name(vietnam_copy, \"Ðong Tháp\", \"Đồng Tháp\")\n", + "replace_name(vietnam_copy, \"Son La\", \"Sơn La\")\n", + "replace_name(vietnam_copy, \"Ha Tinh\", \"Hà Tĩnh\")\n", + "replace_name(vietnam_copy, \"Quàng Nam\", \"Quảng Nam\")\n", + "replace_name(vietnam_copy, \"Lai Chau\", \"Lai Châu\")\n", + "replace_name(vietnam_copy, \"Hồ Chí Minh city\", \"Thành phố Hồ Chí Minh\")\n", + "replace_name(vietnam_copy, \"Hau Giang\", \"Hậu Giang\")\n", + "replace_name(vietnam_copy, \"Ha Noi\", \"Hà Nội\")\n", + "replace_name(vietnam_copy, \"Can Tho\", \"Cần Thơ\")\n", + "replace_name(vietnam_copy, \"Đông Nam Bộ\", \"Đồng Nai\")\n", + "replace_name(vietnam_copy, \"Đông Bắc\", \"Bắc Kạn\")\n", + "replace_name(vietnam_copy, \"Đồng Bằng Sông Hồng\", \"Hưng Yên\")\n", + "for i in vietnam_copy[\"name\"]:\n", " print(i)" ] }, @@ -2688,9 +2772,8 @@ " \"portugal\": portugal_copy,\n", " \"ukraine\": ukraine_with_crimea,\n", " \"india\": india_copy,\n", - " \"vietnam\": vietnam_copy\n", - "}\n", - "\n" + " \"vietnam\": vietnam_copy,\n", + "}" ] }, { @@ -2744,15 +2827,19 @@ "source": [ "# Filter out countries that only have one region, making them effectively useless as a choropleth\n", "\n", + "\n", "def get_num_subdivisions(country):\n", " gdf = get_gdf(country)\n", - " subdivisions = gdf['iso_3166_2'].unique()\n", + " subdivisions = gdf[\"iso_3166_2\"].unique()\n", " if len(subdivisions) == 1:\n", " print(country, \"has only one subdivision - removing from countries array\")\n", " return len(subdivisions)\n", "\n", + "\n", "# we add the unnecessaery countries to a list here, for clearing out unneeded geojson files later\n", - "countries_to_purge = [country for country in countries if get_num_subdivisions(country) <= 1]\n", + "countries_to_purge = [\n", + " country for country in countries if get_num_subdivisions(country) <= 1\n", + "]\n", "\n", "# now we purge those from our main \"countries\" list to continue processing\n", "countries = [country for country in countries if get_num_subdivisions(country) > 1]" @@ -2992,15 +3079,20 @@ "}\n", "useful_columns = [\"ISO\", \"NAME_1\", \"geometry\"]\n", "\n", + "\n", "def get_simplify_factor_by_size(gdf):\n", " xmin, ymin, xmax, ymax = shapely.ops.unary_union(gdf[\"geometry\"]).bounds\n", " size = (xmax - xmin) * (ymax - ymin)\n", " print(\"Size\", round(size, 3), end=\"\\t\")\n", - " if size > 1000: return 0.03\n", - " if size > 300: return 0.02\n", - " if size > 100: return 0.01\n", + " if size > 1000:\n", + " return 0.03\n", + " if size > 300:\n", + " return 0.02\n", + " if size > 100:\n", + " return 0.01\n", " return 0\n", "\n", + "\n", "def simplify_if_needed(country, gdf):\n", " \"\"\"Simplify the maps based on country size\"\"\"\n", " country_alias = country_name_aliases.get(country, country)\n", @@ -3012,6 +3104,7 @@ " if factor:\n", " gdf[\"geometry\"] = gdf.simplify(factor)\n", "\n", + "\n", "def save_geojson(country):\n", " gdf = get_gdf(country)\n", " print(country, end=\"\\t\")\n", @@ -3022,12 +3115,15 @@ "\n", " simplify_if_needed(country, gdf)\n", "\n", - " print(f'Saving geojson for {country}...')\n", - " filename_country = country.replace(' ', '_')\n", - " gdf[useful_columns].to_file(f\"../src/countries/{filename_country}.geojson\", driver=\"GeoJSON\")\n", + " print(f\"Saving geojson for {country}...\")\n", + " filename_country = country.replace(\" \", \"_\")\n", + " gdf[useful_columns].to_file(\n", + " f\"../src/countries/{filename_country}.geojson\", driver=\"GeoJSON\"\n", + " )\n", + "\n", "\n", "for country in countries_to_purge:\n", - " filename_country = country.replace(' ', '_')\n", + " filename_country = country.replace(\" \", \"_\")\n", " filepath = f\"../src/countries/{filename_country}.geojson\"\n", " if os.path.exists(filepath):\n", " os.remove(filepath)\n", @@ -3088,11 +3184,10 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - "\n", "# Function to convert country name to a valid JavaScript identifier\n", "def to_js_identifier(name):\n", - " return name.replace(' ', '_').replace('-', '_')\n", + " return name.replace(\" \", \"_\").replace(\"-\", \"_\")\n", + "\n", "\n", "# License boilerplate\n", "license_boilerplate = \"\"\"/*\n", @@ -3119,10 +3214,19 @@ "countries_combined = sorted(countries_combined)\n", "\n", "# Generate TypeScript import statements\n", - "imports = \"\\n\".join([f\"import {to_js_identifier(country)} from './countries/{to_js_identifier(country)}.geojson';\" for country in countries_combined])\n", + "imports = \"\\n\".join(\n", + " [\n", + " f\"import {to_js_identifier(country)} from './countries/{to_js_identifier(country)}.geojson';\"\n", + " for country in countries_combined\n", + " ]\n", + ")\n", "\n", "# Generate the export object\n", - "exports = \"export const countries = {\\n \" + \",\\n \".join([to_js_identifier(country) for country in countries_combined]) + \",\\n};\"\n", + "exports = (\n", + " \"export const countries = {\\n \"\n", + " + \",\\n \".join([to_js_identifier(country) for country in countries_combined])\n", + " + \",\\n};\"\n", + ")\n", "\n", "# Additional exports\n", "additional_exports = \"\"\"\n", @@ -3163,8 +3267,13 @@ "# DOCS JSON:\n", "# Replace underscores with spaces and title-case each country name\n", "formatted_countries = [country.replace(\"_\", \" \") for country in countries_combined]\n", - "formatted_countries = [country.upper() if country in {\"usa\", \"uk\"} else country.title() for country in formatted_countries]\n", - "formatted_countries = [country.replace(\" Regions\",\" (regions)\") for country in formatted_countries]\n", + "formatted_countries = [\n", + " country.upper() if country in {\"usa\", \"uk\"} else country.title()\n", + " for country in formatted_countries\n", + "]\n", + "formatted_countries = [\n", + " country.replace(\" Regions\", \" (regions)\") for country in formatted_countries\n", + "]\n", "\n", "\n", "# Create a dictionary in the desired format\n", diff --git a/superset/extensions/metadb.py b/superset/extensions/metadb.py index b2d86149383cb..8424d8ee4ee8e 100644 --- a/superset/extensions/metadb.py +++ b/superset/extensions/metadb.py @@ -334,7 +334,9 @@ def _set_columns(self) -> None: primary_keys = [ column for column in list(self._table.primary_key) if column.primary_key ] - if len(primary_keys) == 1 and primary_keys[0].type.python_type == int: + if len(primary_keys) == 1 and isinstance( + primary_keys[0].type.python_type, type(int) + ): self._rowid = primary_keys[0].name self.columns = { diff --git a/tests/integration_tests/sqla_models_tests.py b/tests/integration_tests/sqla_models_tests.py index d4ca3bc1c1a47..2f22b92c5d292 100644 --- a/tests/integration_tests/sqla_models_tests.py +++ b/tests/integration_tests/sqla_models_tests.py @@ -1091,7 +1091,7 @@ def _convert_dttm( columns_by_name, ) - assert type(normalized) == type(result) + assert isinstance(normalized, type(result)) if isinstance(normalized, TextClause): assert str(normalized) == str(result) diff --git a/tests/integration_tests/viz_tests.py b/tests/integration_tests/viz_tests.py index 872c178bfa547..8403708a532ac 100644 --- a/tests/integration_tests/viz_tests.py +++ b/tests/integration_tests/viz_tests.py @@ -91,7 +91,7 @@ def test_get_df_returns_empty_df(self): datasource = self.get_datasource_mock() test_viz = viz.BaseViz(datasource, form_data) result = test_viz.get_df(query_obj) - assert type(result) == pd.DataFrame + assert isinstance(result, pd.DataFrame) assert result.empty def test_get_df_handles_dttm_col(self): diff --git a/tests/unit_tests/dao/key_value_test.py b/tests/unit_tests/dao/key_value_test.py index 18c0dfb25f946..abe05f8777b18 100644 --- a/tests/unit_tests/dao/key_value_test.py +++ b/tests/unit_tests/dao/key_value_test.py @@ -143,7 +143,7 @@ def test_create_pickle_entry( found_entry = ( db.session.query(KeyValueEntry).filter_by(id=created_entry.id).one() ) - assert type(pickle.loads(found_entry.value)) == type(PICKLE_VALUE) + assert isinstance(pickle.loads(found_entry.value), type(PICKLE_VALUE)) assert found_entry.created_by_fk == admin_user.id From 2de096202e5ba69b0c09b570d48b0ef2d29c1e11 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Sun, 24 Nov 2024 16:49:15 -0800 Subject: [PATCH 3/5] bump to latest --- requirements/development.txt | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/requirements/development.txt b/requirements/development.txt index dbfb1d872ff83..0f4c3391d46cc 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -168,7 +168,7 @@ pyee==11.0.1 # via playwright pyfakefs==5.3.5 # via apache-superset -pyhive[hive]==0.7.0 +pyhive[presto]==0.7.0 # via apache-superset pyinstrument==4.4.0 # via apache-superset @@ -195,14 +195,10 @@ requests-oauthlib==2.0.0 # via google-auth-oauthlib rfc3339-validator==0.1.4 # via openapi-schema-validator -rfc3986==2.0.0 - # via tableschema -ruff==0.7.4 +ruff==0.8.0 # via apache-superset s3transfer==0.10.1 # via boto3 -sasl==0.3.1 - # via pyhive sqlalchemy-bigquery==1.11.0 # via apache-superset sqloxide==0.1.43 From 2fe9ac8537c5d9559fc55b5580e0ce42b81ff4af Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Sun, 24 Nov 2024 16:49:50 -0800 Subject: [PATCH 4/5] bump pre-commit --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 67742cb727f9d..cd08edcf403ae 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -75,7 +75,7 @@ repos: - id: helm-docs files: helm - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.0 + rev: v0.8.0 hooks: - id: ruff args: [ --fix ] From 037f2e888d3039ba92b7b8d41c3bb7fc2e18f5bf Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Sun, 24 Nov 2024 16:54:00 -0800 Subject: [PATCH 5/5] don't touch ipynb --- pyproject.toml | 1 + .../Country Map GeoJSON Generator.ipynb | 957 ++++++++---------- 2 files changed, 425 insertions(+), 533 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c905c4320beaa..65606953c8814 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -402,6 +402,7 @@ skipsdist = true [tool.ruff] # Exclude a variety of commonly ignored directories. exclude = [ + "**/*.ipynb", ".bzr", ".direnv", ".eggs", diff --git a/superset-frontend/plugins/legacy-plugin-chart-country-map/scripts/Country Map GeoJSON Generator.ipynb b/superset-frontend/plugins/legacy-plugin-chart-country-map/scripts/Country Map GeoJSON Generator.ipynb index 21559309284b3..2969a7f96e43e 100644 --- a/superset-frontend/plugins/legacy-plugin-chart-country-map/scripts/Country Map GeoJSON Generator.ipynb +++ b/superset-frontend/plugins/legacy-plugin-chart-country-map/scripts/Country Map GeoJSON Generator.ipynb @@ -48,17 +48,18 @@ "source": [ "# Dependencies\n", "\n", - "import json\n", "import os\n", - "\n", + "import json\n", + "import requests\n", "import geopandas as gpd\n", "import matplotlib.pyplot as plt\n", - "import pandas as pd\n", - "import requests\n", "import shapely\n", - "import shapely.affinity\n", + "import pandas as pd\n", "import shapely.geometry\n", - "import shapely.ops" + "import shapely.ops\n", + "import shapely.affinity\n", + "from shapely.geometry import Polygon, MultiPolygon\n", + "import shutil" ] }, { @@ -95,38 +96,32 @@ "if not os.path.exists(data_dir):\n", " os.mkdir(data_dir)\n", "\n", - "\n", "def download_files(skip_existing: bool):\n", " for url in [\n", " \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip\",\n", " \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_1_states_provinces.zip\",\n", - " \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/50m/cultural/ne_50m_admin_1_states_provinces.zip\",\n", + " \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/50m/cultural/ne_50m_admin_1_states_provinces.zip\"\n", " ]:\n", - " file_name = url.split(\"/\")[-1]\n", - " full_file_name = f\"{data_dir}/{file_name}\"\n", + " file_name = url.split('/')[-1]\n", + " full_file_name = f'{data_dir}/{file_name}'\n", " # temporary fix\n", - " url = url.replace(\n", - " \"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download\",\n", - " \"https://naciscdn.org/naturalearth\",\n", - " )\n", + " url = url.replace(\"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download\", \"https://naciscdn.org/naturalearth\")\n", " with requests.get(\n", " url,\n", " headers={\n", " \"accept-encoding\": \"gzip, deflate, br\",\n", - " \"user-agent\": \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36\",\n", + " \"user-agent\": \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36\"\n", " },\n", " stream=True,\n", " ) as res:\n", - " file_size = int(res.headers[\"content-length\"])\n", + " file_size = int(res.headers['content-length'])\n", " if res.status_code != 200:\n", - " print(\n", - " \"Error downloading files. Please open the URL to download them from browser manually.\"\n", - " )\n", + " print(\"Error downloading files. Please open the URL to download them from browser manually.\")\n", " break\n", " if (\n", - " skip_existing\n", - " and os.path.exists(full_file_name)\n", - " and file_size == os.path.getsize(full_file_name)\n", + " skip_existing and\n", + " os.path.exists(full_file_name) and\n", + " file_size == os.path.getsize(full_file_name)\n", " ):\n", " print(f\"Skip {file_name} because it already exists\")\n", " continue\n", @@ -135,7 +130,6 @@ " fh.write(res.content)\n", " print(\"Done. \")\n", "\n", - "\n", "download_files(skip_existing=False)" ] }, @@ -528,7 +522,7 @@ } ], "source": [ - "df_50m.groupby(\"admin\").count()" + "df_50m.groupby('admin').count()" ] }, { @@ -558,7 +552,7 @@ "source": [ "# Use 1:50m geometry for some large countries:\n", "\n", - "print(*df_50m[\"admin\"].unique(), sep=\"\\n\")" + "print(*df_50m['admin'].unique(), sep='\\n')" ] }, { @@ -569,7 +563,7 @@ }, "outputs": [], "source": [ - "df = pd.concat([df_10m[~df_10m[\"admin\"].isin(df_50m[\"admin\"].unique())], df_50m])" + "df = pd.concat([df_10m[~df_10m['admin'].isin(df_50m['admin'].unique())], df_50m])" ] }, { @@ -593,241 +587,241 @@ "source": [ "# Country names used in file names\n", "countries = [\n", - " \"afghanistan\",\n", - " \"aland\",\n", - " \"albania\",\n", - " \"algeria\",\n", - " \"american samoa\",\n", - " \"andorra\",\n", - " \"angola\",\n", - " \"anguilla\",\n", - " \"antarctica\",\n", - " \"antigua and barbuda\",\n", - " \"argentina\",\n", - " \"armenia\",\n", - " \"aruba\",\n", - " \"australia\",\n", - " \"austria\",\n", - " \"azerbaijan\",\n", - " \"the bahamas\",\n", - " \"bahrain\",\n", - " \"bangladesh\",\n", - " \"barbados\",\n", - " \"belarus\",\n", - " \"belgium\",\n", - " \"belize\",\n", - " \"benin\",\n", - " \"bermuda\",\n", - " \"bhutan\",\n", - " \"bolivia\",\n", - " # 'bonaire, sint eustatius and saba', # Part of Netherlands Antilles, part of Netherlands, accordsing to Natural Earth?\n", - " \"bosnia and herzegovina\",\n", - " \"botswana\",\n", - " # 'bouvet island', # part of Norway, in Natural Earth data\n", - " \"brazil\",\n", - " \"brunei\",\n", - " \"british indian ocean territory\",\n", - " \"bulgaria\",\n", - " \"burkina faso\",\n", - " \"burundi\",\n", - " \"cape verde\",\n", - " \"cambodia\",\n", - " \"cameroon\",\n", - " \"canada\",\n", - " \"cayman islands\",\n", - " \"central african republic\",\n", - " \"chad\",\n", - " \"chile\",\n", - " \"china\",\n", - " # 'christmas island', # part of British Indian Ocean Territory, according to Natural Earth\n", - " # 'cocos (keeling) islands', # part of British Indian Ocean Territory, according to Natural Earth\n", - " \"colombia\",\n", - " \"comoros\",\n", - " \"democratic republic of the congo\",\n", - " \"cook islands\",\n", - " \"costa rica\",\n", - " # 'côte d\\'ivoire', # not sure why this isn't working\n", - " \"croatia\",\n", - " \"cuba\",\n", - " \"curaçao\",\n", - " \"cyprus\",\n", - " \"czech republic\",\n", - " \"denmark\",\n", - " \"djibouti\",\n", - " \"dominica\",\n", - " \"dominican republic\",\n", - " \"ecuador\",\n", - " \"egypt\",\n", - " \"el salvador\",\n", - " \"equatorial guinea\",\n", - " \"eritrea\",\n", - " \"estonia\",\n", - " # 'eswatini', # not sure why this doesn't work — Swaziland isn't available to alias, either.\n", - " \"ethiopia\",\n", - " \"falkland islands\",\n", - " \"faroe islands\",\n", - " \"fiji\",\n", - " \"finland\",\n", - " \"france\",\n", - " # 'french guiana',\n", - " \"french polynesia\",\n", - " # 'french southern territories (the)',\n", - " \"gabon\",\n", - " \"gambia\",\n", - " \"germany\",\n", - " \"ghana\",\n", - " \"gibraltar\",\n", - " \"greece\",\n", - " \"greenland\",\n", - " \"grenada\",\n", - " # 'guadeloupe', # part of France, in Natural Earth data\n", - " \"guam\",\n", - " \"guatemala\",\n", - " \"haiti\",\n", - " \"hungary\",\n", - " \"guernsey\",\n", - " \"guinea\",\n", - " # 'guinea-bissau', # not sure why this isn't working\n", - " \"guyana\",\n", - " \"honduras\",\n", - " \"iceland\",\n", - " \"india\",\n", - " \"indonesia\",\n", - " \"iran\",\n", - " \"israel\",\n", - " \"italy\",\n", - " \"japan\",\n", - " \"jordan\",\n", - " \"kazakhstan\",\n", - " \"kenya\",\n", - " \"korea\",\n", - " \"kuwait\",\n", - " \"kyrgyzstan\",\n", - " \"laos\",\n", - " \"latvia\",\n", - " \"lebanon\",\n", - " \"lesotho\",\n", - " \"liberia\",\n", - " \"libya\",\n", - " \"liechtenstein\",\n", - " \"lithuania\",\n", - " \"luxembourg\",\n", - " # 'macao', # part of China, in Natural Earth data\n", - " \"macedonia\",\n", - " \"madagascar\",\n", - " \"malawi\",\n", - " \"malaysia\",\n", - " \"maldives\",\n", - " \"mali\",\n", - " \"malta\",\n", - " \"marshall islands\",\n", - " # 'martinique', # part of France, in Natural Earth data\n", - " \"mauritania\",\n", - " \"mauritius\",\n", - " # 'mayotte', # part of France, in Natural Earth data\n", - " \"mexico\",\n", - " \"moldova\",\n", - " \"montserrat\",\n", - " \"monaco\",\n", - " \"mongolia\",\n", - " \"montenegro\",\n", - " \"morocco\",\n", - " \"mozambique\",\n", - " \"myanmar\",\n", - " \"namibia\",\n", - " \"nauru\",\n", - " \"nepal\",\n", - " \"netherlands\",\n", - " \"new caledonia\",\n", - " \"new zealand\",\n", - " \"nicaragua\",\n", - " \"niger\",\n", - " \"nigeria\",\n", - " \"niue\",\n", - " \"norfolk island\",\n", - " \"northern mariana islands\",\n", - " \"norway\",\n", - " \"oman\",\n", - " \"pakistan\",\n", - " \"palau\",\n", - " # 'palestine', # part of Israel, in Natural Earth data\n", - " \"panama\",\n", - " \"papua new guinea\",\n", - " \"paraguay\",\n", - " \"peru\",\n", - " # 'pitcairn', # part of UK, in Natural Earth data\n", - " \"philippines\",\n", - " \"poland\",\n", - " \"portugal\",\n", - " \"puerto rico\",\n", - " \"qatar\",\n", - " # 'réunion', # part of France, in Natural Earth data\n", - " \"romania\",\n", - " \"russia\",\n", - " \"rwanda\",\n", - " \"saint barthelemy\",\n", - " # 'saint helena, ascension and tristan da cunha', # part of UK, in Natural Earth data\n", - " \"saint lucia\",\n", - " \"saint martin\",\n", - " # 'saint martin (french part)', part of Saint Martin, in Natural Earth data\n", - " \"saint pierre and miquelon\",\n", - " \"saint vincent and the grenadines\",\n", - " \"samoa\",\n", - " \"san marino\",\n", - " \"sao tome and principe\",\n", - " \"saudi arabia\",\n", - " \"senegal\",\n", - " # 'serbia', # not sure why this isn't working!\n", - " \"seychelles\",\n", - " \"sierra leone\",\n", - " \"singapore\",\n", - " # 'sint maarten (dutch part)', part of Saint Martin, in Natural Earth data\n", - " \"slovakia\",\n", - " \"slovenia\",\n", - " \"solomon islands\",\n", - " \"somalia\",\n", - " \"south africa\",\n", - " # 'south sudan', # part of Sudan, in Natural Earth data\n", - " \"spain\",\n", - " \"sri lanka\",\n", - " \"sudan\",\n", - " \"suriname\",\n", - " # 'svalbard and jan mayen', # part of Norway, in Natural Earth data\n", - " \"sweden\",\n", - " \"switzerland\",\n", - " \"syria\",\n", - " \"taiwan\",\n", - " \"tajikistan\",\n", - " \"tanzania\",\n", - " \"thailand\",\n", - " \"timorleste\",\n", - " \"togo\",\n", - " # 'tokelau', # part of New Zealand, in Natural Earth data\n", - " \"tonga\",\n", - " \"trinidad and tobago\",\n", - " \"tunisia\",\n", - " \"turkey\",\n", - " \"turkmenistan\",\n", - " \"turks and caicos islands\",\n", - " \"tuvalu\",\n", - " \"uganda\",\n", - " \"uk\",\n", - " \"ukraine\",\n", - " \"united arab emirates\",\n", - " \"united states minor outlying islands\",\n", - " \"uruguay\",\n", - " \"usa\",\n", - " \"uzbekistan\",\n", - " \"vanuatu\",\n", - " \"vatican\",\n", - " \"venezuela\",\n", - " \"vietnam\",\n", - " \"british virgin islands\",\n", - " \"united states virgin islands\",\n", - " \"wallis and futuna\",\n", - " \"yemen\",\n", - " \"zambia\",\n", - " \"zimbabwe\",\n", + " 'afghanistan',\n", + " 'aland',\n", + " 'albania',\n", + " 'algeria',\n", + " 'american samoa',\n", + " 'andorra',\n", + " 'angola',\n", + " 'anguilla', \n", + " 'antarctica',\n", + " 'antigua and barbuda',\n", + " 'argentina',\n", + " 'armenia',\n", + " 'aruba',\n", + " 'australia',\n", + " 'austria',\n", + " 'azerbaijan',\n", + " 'the bahamas',\n", + " 'bahrain',\n", + " 'bangladesh',\n", + " 'barbados',\n", + " 'belarus',\n", + " 'belgium',\n", + " 'belize',\n", + " 'benin',\n", + " 'bermuda',\n", + " 'bhutan',\n", + " 'bolivia',\n", + " # 'bonaire, sint eustatius and saba', # Part of Netherlands Antilles, part of Netherlands, accordsing to Natural Earth?\n", + " 'bosnia and herzegovina',\n", + " 'botswana',\n", + " # 'bouvet island', # part of Norway, in Natural Earth data\n", + " 'brazil',\n", + " 'brunei',\n", + " 'british indian ocean territory',\n", + " 'bulgaria',\n", + " 'burkina faso',\n", + " 'burundi',\n", + " 'cape verde',\n", + " 'cambodia',\n", + " 'cameroon',\n", + " 'canada',\n", + " 'cayman islands',\n", + " 'central african republic',\n", + " 'chad',\n", + " 'chile',\n", + " 'china',\n", + " # 'christmas island', # part of British Indian Ocean Territory, according to Natural Earth\n", + " # 'cocos (keeling) islands', # part of British Indian Ocean Territory, according to Natural Earth\n", + " 'colombia',\n", + " 'comoros',\n", + " 'democratic republic of the congo',\n", + " 'cook islands',\n", + " 'costa rica',\n", + " # 'côte d\\'ivoire', # not sure why this isn't working\n", + " 'croatia',\n", + " 'cuba',\n", + " 'curaçao',\n", + " 'cyprus',\n", + " 'czech republic',\n", + " 'denmark',\n", + " 'djibouti',\n", + " 'dominica',\n", + " 'dominican republic',\n", + " 'ecuador',\n", + " 'egypt',\n", + " 'el salvador',\n", + " 'equatorial guinea',\n", + " 'eritrea',\n", + " 'estonia',\n", + " # 'eswatini', # not sure why this doesn't work — Swaziland isn't available to alias, either.\n", + " 'ethiopia',\n", + " 'falkland islands',\n", + " 'faroe islands',\n", + " 'fiji',\n", + " 'finland',\n", + " 'france',\n", + " # 'french guiana',\n", + " 'french polynesia',\n", + " # 'french southern territories (the)',\n", + " 'gabon', \n", + " 'gambia',\n", + " 'germany',\n", + " 'ghana',\n", + " 'gibraltar',\n", + " 'greece',\n", + " 'greenland',\n", + " 'grenada',\n", + " # 'guadeloupe', # part of France, in Natural Earth data\n", + " 'guam',\n", + " 'guatemala',\n", + " 'haiti',\n", + " 'hungary',\n", + " 'guernsey',\n", + " 'guinea',\n", + " # 'guinea-bissau', # not sure why this isn't working\n", + " 'guyana',\n", + " 'honduras',\n", + " 'iceland',\n", + " 'india',\n", + " 'indonesia',\n", + " 'iran',\n", + " 'israel',\n", + " 'italy',\n", + " 'japan',\n", + " 'jordan',\n", + " 'kazakhstan',\n", + " 'kenya',\n", + " 'korea',\n", + " 'kuwait',\n", + " 'kyrgyzstan',\n", + " 'laos',\n", + " 'latvia',\n", + " 'lebanon',\n", + " 'lesotho',\n", + " 'liberia',\n", + " 'libya',\n", + " 'liechtenstein',\n", + " 'lithuania',\n", + " 'luxembourg',\n", + " # 'macao', # part of China, in Natural Earth data\n", + " 'macedonia',\n", + " 'madagascar',\n", + " 'malawi',\n", + " 'malaysia',\n", + " 'maldives',\n", + " 'mali',\n", + " 'malta',\n", + " 'marshall islands',\n", + " # 'martinique', # part of France, in Natural Earth data\n", + " 'mauritania',\n", + " 'mauritius',\n", + " # 'mayotte', # part of France, in Natural Earth data\n", + " 'mexico',\n", + " 'moldova',\n", + " 'montserrat',\n", + " 'monaco',\n", + " 'mongolia',\n", + " 'montenegro',\n", + " 'morocco',\n", + " 'mozambique',\n", + " 'myanmar',\n", + " 'namibia',\n", + " 'nauru',\n", + " 'nepal',\n", + " 'netherlands',\n", + " 'new caledonia',\n", + " 'new zealand',\n", + " 'nicaragua',\n", + " 'niger',\n", + " 'nigeria',\n", + " 'niue',\n", + " 'norfolk island',\n", + " 'northern mariana islands',\n", + " 'norway',\n", + " 'oman',\n", + " 'pakistan',\n", + " 'palau',\n", + " # 'palestine', # part of Israel, in Natural Earth data\n", + " 'panama',\n", + " 'papua new guinea',\n", + " 'paraguay',\n", + " 'peru',\n", + " # 'pitcairn', # part of UK, in Natural Earth data\n", + " 'philippines',\n", + " 'poland',\n", + " 'portugal',\n", + " 'puerto rico',\n", + " 'qatar',\n", + " # 'réunion', # part of France, in Natural Earth data\n", + " 'romania',\n", + " 'russia',\n", + " 'rwanda',\n", + " 'saint barthelemy',\n", + " # 'saint helena, ascension and tristan da cunha', # part of UK, in Natural Earth data\n", + " 'saint lucia',\n", + " 'saint martin',\n", + " # 'saint martin (french part)', part of Saint Martin, in Natural Earth data\n", + " 'saint pierre and miquelon',\n", + " 'saint vincent and the grenadines',\n", + " 'samoa',\n", + " 'san marino',\n", + " 'sao tome and principe',\n", + " 'saudi arabia',\n", + " 'senegal',\n", + " # 'serbia', # not sure why this isn't working!\n", + " 'seychelles',\n", + " 'sierra leone',\n", + " 'singapore',\n", + " # 'sint maarten (dutch part)', part of Saint Martin, in Natural Earth data\n", + " 'slovakia',\n", + " 'slovenia',\n", + " 'solomon islands',\n", + " 'somalia',\n", + " 'south africa',\n", + " # 'south sudan', # part of Sudan, in Natural Earth data\n", + " 'spain',\n", + " 'sri lanka',\n", + " 'sudan',\n", + " 'suriname',\n", + " # 'svalbard and jan mayen', # part of Norway, in Natural Earth data\n", + " 'sweden',\n", + " 'switzerland',\n", + " 'syria',\n", + " 'taiwan',\n", + " 'tajikistan',\n", + " 'tanzania',\n", + " 'thailand',\n", + " 'timorleste',\n", + " 'togo',\n", + " # 'tokelau', # part of New Zealand, in Natural Earth data\n", + " 'tonga',\n", + " 'trinidad and tobago',\n", + " 'tunisia',\n", + " 'turkey',\n", + " 'turkmenistan',\n", + " 'turks and caicos islands',\n", + " 'tuvalu',\n", + " 'uganda',\n", + " 'uk',\n", + " 'ukraine',\n", + " 'united arab emirates',\n", + " 'united states minor outlying islands',\n", + " 'uruguay',\n", + " 'usa',\n", + " 'uzbekistan',\n", + " 'vanuatu',\n", + " 'vatican',\n", + " 'venezuela',\n", + " 'vietnam',\n", + " 'british virgin islands',\n", + " 'united states virgin islands',\n", + " 'wallis and futuna',\n", + " 'yemen',\n", + " 'zambia',\n", + " 'zimbabwe'\n", "]\n", "\n", "# country name used in dataset\n", @@ -841,22 +835,17 @@ "\n", "# CSV files that are defined later in the notebook:\n", "region_maps = [\n", - " \"france_regions\",\n", - " \"italy_regions\",\n", - " \"philippines_regions\",\n", - " \"turkey_regions\",\n", + " 'france_regions',\n", + " 'italy_regions',\n", + " 'philippines_regions',\n", + " 'turkey_regions'\n", "]\n", "\n", "# Make sure all country names are covered:\n", - "invalid_countries = [\n", - " x\n", - " for x in countries\n", - " if (country_name_aliases.get(x, x) not in df[\"admin\"].str.lower().unique())\n", - " and (x not in region_maps)\n", - "]\n", + "invalid_countries = [x for x in countries if (country_name_aliases.get(x, x) not in df[\"admin\"].str.lower().unique()) and (x not in region_maps)]\n", "\n", "if invalid_countries:\n", - " print(f\"Following country names are not valid: {invalid_countries}\")" + " print(f\"Following country names are not valid: {invalid_countries}\")" ] }, { @@ -876,11 +865,11 @@ "source": [ "# Plotting style defaults:\n", "plot_styles = {\n", - " \"edgecolor\": \"black\", # Sets the color of the border of each geometry in the plot to black\n", - " \"column\": \"name\", # Specifies the column to be used for coloring the geometries based on its values\n", - " \"legend\": False, # Disables the legend for the plot\n", - " \"cmap\": \"tab20\", # Sets the colormap to 'tab20' which provides a distinct set of colors for visual differentiation\n", - " \"linewidth\": 0.25, # Sets the thickness of the edges/borders of the geometries\n", + " 'edgecolor': 'black', # Sets the color of the border of each geometry in the plot to black\n", + " 'column': 'name', # Specifies the column to be used for coloring the geometries based on its values\n", + " 'legend': False, # Disables the legend for the plot\n", + " 'cmap': 'tab20', # Sets the colormap to 'tab20' which provides a distinct set of colors for visual differentiation\n", + " 'linewidth': 0.25 # Sets the thickness of the edges/borders of the geometries\n", "}" ] }, @@ -901,10 +890,7 @@ " gdf = df[df[\"admin\"].str.lower() == country_alias]\n", " return gdf.copy()\n", "\n", - "\n", - "def plot_all_countries(\n", - " countries, subplot_width=5, subplot_height=5, base_tolerance=0.01\n", - "):\n", + "def plot_all_countries(countries, subplot_width=5, subplot_height=5, base_tolerance=0.01):\n", " if not countries:\n", " print(\"No countries to plot.\")\n", " return\n", @@ -914,28 +900,24 @@ " nrows = max((num_countries + ncols - 1) // ncols, 1) # Ensure at least one row\n", "\n", " figsize = (ncols * subplot_width, nrows * subplot_height)\n", - " print(\n", - " f\"Debug Info: figsize={figsize}, ncols={ncols}, nrows={nrows}, num_countries={num_countries}\"\n", - " )\n", + " print(f\"Debug Info: figsize={figsize}, ncols={ncols}, nrows={nrows}, num_countries={num_countries}\")\n", "\n", " plt.figure(figsize=figsize)\n", - " plt.rc(\"font\", size=24) # Sets the font size globally\n", + " plt.rc('font', size=24) # Sets the font size globally\n", "\n", " for i, country in enumerate(countries):\n", " ax = plt.subplot(nrows, ncols, i + 1)\n", " gdf = get_gdf(country)\n", " if not gdf.empty:\n", " gdf_projected = gdf.to_crs(epsg=6933)\n", - " area = gdf_projected[\"geometry\"].area.sum()\n", - " dynamic_tolerance = base_tolerance * (area / 1e6) ** 0.8\n", - " gdf_projected[\"geometry\"] = gdf_projected[\"geometry\"].simplify(\n", - " tolerance=dynamic_tolerance, preserve_topology=True\n", - " )\n", + " area = gdf_projected['geometry'].area.sum()\n", + " dynamic_tolerance = base_tolerance * (area / 1e6) ** 0.8 \n", + " gdf_projected['geometry'] = gdf_projected['geometry'].simplify(tolerance=dynamic_tolerance, preserve_topology=True)\n", "\n", " gdf.plot(ax=ax, **plot_styles)\n", - " ax.set_aspect(\"equal\", adjustable=\"datalim\")\n", + " ax.set_aspect('equal', adjustable='datalim')\n", " else:\n", - " ax.text(0.5, 0.5, country, ha=\"center\", va=\"center\", fontsize=24)\n", + " ax.text(0.5, 0.5, country, ha='center', va='center', fontsize=24)\n", " ax.set_title(country)\n", "\n", " plt.tight_layout()\n", @@ -947,7 +929,7 @@ "# It's disabled because it takes a while to run.\n", "plot_preview = False\n", "if plot_preview:\n", - " plot_all_countries()" + " plot_all_countries()" ] }, { @@ -1014,7 +996,7 @@ } ], "source": [ - "usa = df[df[\"adm0_a3\"] == \"USA\"]\n", + "usa = df[df['adm0_a3'] == 'USA']\n", "not speed_run and usa.plot(**plot_styles)" ] }, @@ -1049,6 +1031,7 @@ ], "source": [ "def reposition(df, idx, xoff=None, yoff=None, xscale=None, yscale=None, simplify=None):\n", + "\n", " def move_and_scale(series):\n", " if xoff or yoff:\n", " series = shapely.affinity.translate(series, xoff or 0, yoff or 0)\n", @@ -1058,14 +1041,14 @@ " series = series.simplify(simplify, preserve_topology=False)\n", " return series\n", "\n", - " df.loc[idx, \"geometry\"] = df.loc[idx, \"geometry\"].apply(move_and_scale)\n", + " df.loc[idx, 'geometry'] = df.loc[idx, 'geometry'].apply(move_and_scale)\n", "\n", "\n", "usa_copy = usa.copy()\n", - "reposition(usa_copy, usa.name == \"Hawaii\", 51, 5.5)\n", - "reposition(usa_copy, usa.name == \"Alaska\", 35, -34, 0.35, 0.35)\n", + "reposition(usa_copy, usa.name == 'Hawaii', 51, 5.5)\n", + "reposition(usa_copy, usa.name == 'Alaska', 35, -34, 0.35, 0.35)\n", "\n", - "not speed_run and usa_copy.plot(figsize=(8, 8), **plot_styles)" + "not speed_run and usa_copy.plot(figsize=(8,8), **plot_styles)" ] }, { @@ -1248,20 +1231,16 @@ "source": [ "# Chinese Special Administrative Regions\n", "china_sars = df_admin0_10m.loc[\n", - " df_admin0_10m.name_en.isin([\"Taiwan\", \"Hong Kong\", \"Macau\"]),\n", - " [x for x in df_admin0_10m.columns if x in df.columns],\n", + " df_admin0_10m.name_en.isin(['Taiwan', 'Hong Kong', 'Macau']),\n", + " [x for x in df_admin0_10m.columns if x in df.columns]\n", "]\n", - "china_sars = china_sars.merge(\n", - " pd.DataFrame(\n", - " data={\n", - " \"name_en\": [\"Taiwan\", \"Hong Kong\", \"Macau\"],\n", - " \"name_zh\": [\"中国台湾\", \"香港特别行政区\", \"澳门特别行政区\"],\n", - " \"iso_3166_2\": [\"CN-71\", \"CN-91\", \"CN-92\"],\n", - " },\n", - " ),\n", - " on=\"name_en\",\n", - " how=\"left\",\n", - ")\n", + "china_sars = china_sars.merge(pd.DataFrame(\n", + " data={\n", + " \"name_en\": [\"Taiwan\", \"Hong Kong\", \"Macau\"],\n", + " \"name_zh\": [\"中国台湾\", \"香港特别行政区\", \"澳门特别行政区\"],\n", + " \"iso_3166_2\": [\"CN-71\", \"CN-91\", \"CN-92\"],\n", + " },\n", + "), on=\"name_en\", how=\"left\")\n", "china_sars" ] }, @@ -1338,20 +1317,16 @@ "outputs": [], "source": [ "finland_aland = df_admin0_10m.loc[\n", - " df_admin0_10m.name_en.isin([\"Åland\"]),\n", - " [x for x in df_admin0_10m.columns if x in df.columns],\n", + " df_admin0_10m.name_en.isin(['Åland']),\n", + " [x for x in df_admin0_10m.columns if x in df.columns]\n", "]\n", - "finland_aland = finland_aland.merge(\n", - " pd.DataFrame(\n", - " data={\n", - " \"name_en\": [\"Åland\"],\n", - " \"name_fi\": [\"Ahvenanmaan maakunta\"],\n", - " \"iso_3166_2\": [\"FI-01\"],\n", - " },\n", - " ),\n", - " on=\"name_en\",\n", - " how=\"left\",\n", - ")" + "finland_aland = finland_aland.merge(pd.DataFrame(\n", + " data={\n", + " \"name_en\": [\"Åland\"],\n", + " \"name_fi\": [\"Ahvenanmaan maakunta\"],\n", + " \"iso_3166_2\": [\"FI-01\"],\n", + " },\n", + "), on=\"name_en\", how=\"left\")\n" ] }, { @@ -1398,7 +1373,7 @@ "finland_copy = finland_copy.drop([\"name_fi\"], axis=1)\n", "\n", "# Plotting the DataFrame\n", - "not speed_run and finland_copy.plot(figsize=(7, 7), **plot_styles)" + "not speed_run and finland_copy.plot(figsize=(7, 7), **plot_styles)\n" ] }, { @@ -1441,27 +1416,24 @@ } ], "source": [ - "russia_copy = df[df[\"adm0_a3\"] == \"RUS\"].copy()\n", - "crimea = russia_copy[russia_copy[\"iso_3166_2\"] == \"UA-43\"].copy()\n", - "sevastopol = russia_copy[russia_copy[\"iso_3166_2\"] == \"UA-40\"].copy()\n", "\n", - "ukraine_with_crimea = pd.concat(\n", - " [df[df[\"adm0_a3\"] == \"UKR\"], crimea, sevastopol], ignore_index=True\n", - ")\n", + "russia_copy = df[df['adm0_a3'] == 'RUS'].copy()\n", + "crimea = russia_copy[russia_copy['iso_3166_2'] == 'UA-43'].copy()\n", + "sevastopol = russia_copy[russia_copy['iso_3166_2'] == 'UA-40'].copy()\n", + "\n", + "ukraine_with_crimea = pd.concat([df[df['adm0_a3'] == 'UKR'], crimea, sevastopol], ignore_index=True)\n", "\n", "# kyiv = ukraine_with_crimea[ukraine_with_crimea['iso_3166_2'] == 'UA-30']\n", "# kyiv_oblast = ukraine_with_crimea[ukraine_with_crimea['iso_3166_2'] == 'UA-32']\n", "\n", "# Update the name of the Kyiv city entry\n", - "ukraine_with_crimea.loc[ukraine_with_crimea[\"iso_3166_2\"] == \"UA-30\", \"name\"] = \"Kyiv\"\n", + "ukraine_with_crimea.loc[ukraine_with_crimea['iso_3166_2'] == 'UA-30', 'name'] = 'Kyiv'\n", "\n", "# # Update the name of the Kyiv Oblast entry\n", - "ukraine_with_crimea.loc[ukraine_with_crimea[\"iso_3166_2\"] == \"UA-32\", \"name\"] = (\n", - " \"Kyiv Oblast\"\n", - ")\n", + "ukraine_with_crimea.loc[ukraine_with_crimea['iso_3166_2'] == 'UA-32', 'name'] = 'Kyiv Oblast'\n", "\n", "# Plotting the DataFrame\n", - "not speed_run and ukraine_with_crimea.plot(figsize=(7, 7), **plot_styles)" + "not speed_run and ukraine_with_crimea.plot(figsize=(7,7), **plot_styles)" ] }, { @@ -1487,7 +1459,7 @@ } ], "source": [ - "india = df[df[\"admin\"] == \"India\"]\n", + "india = df[df['admin'] == 'India']\n", "india_copy = india.copy()\n", "\n", "# Download and load the GeoJSON file for India\n", @@ -1496,22 +1468,13 @@ "try:\n", " india_gdf = gpd.read_file(india_geojson_url)\n", " # Rename column to 'ST_ID' to 'iso_3166_2' for consistency\n", - " india_gdf.rename(columns={\"ST_ID\": \"iso_3166_2\"}, inplace=True)\n", + " india_gdf.rename(columns={'ST_ID': 'iso_3166_2'}, inplace=True)\n", " # Update the geometry for the states of Jammu and Kashmir and Ladakh\n", - " india_copy.loc[india_copy[\"iso_3166_2\"] == \"IN-JK\", \"geometry\"] = (\n", - " india_gdf[india_gdf[\"iso_3166_2\"] == \"IN-JK\"]\n", - " .dissolve(by=\"iso_3166_2\")\n", - " .reset_index()[\"geometry\"]\n", - " .values\n", - " )\n", - " india_copy.loc[india_copy[\"iso_3166_2\"] == \"IN-LA\", \"geometry\"] = (\n", - " india_gdf[india_gdf[\"iso_3166_2\"] == \"IN-LA\"]\n", - " .dissolve(by=\"iso_3166_2\")\n", - " .reset_index()[\"geometry\"]\n", - " .values\n", - " )\n", + " india_copy.loc[india_copy['iso_3166_2'] == 'IN-JK', 'geometry'] = india_gdf[india_gdf['iso_3166_2'] == 'IN-JK'].dissolve(by='iso_3166_2').reset_index()[\"geometry\"].values\n", + " india_copy.loc[india_copy['iso_3166_2'] == 'IN-LA', 'geometry'] = india_gdf[india_gdf['iso_3166_2'] == 'IN-LA'].dissolve(by='iso_3166_2').reset_index()[\"geometry\"].values\n", " print(\"GeoJSON file for India downloaded and loaded successfully.\")\n", "except Exception as e:\n", + " \n", " print(f\"Unable to download or load the GeoJSON file for India. Error: {str(e)}\")\n", " print(\"Please download the file from the URL and try again.\")" ] @@ -1587,7 +1550,7 @@ } ], "source": [ - "norway = df[df[\"adm0_a3\"] == \"NOR\"]\n", + "norway = df[df['adm0_a3'] == 'NOR']\n", "not speed_run and norway.plot(**plot_styles)" ] }, @@ -1624,8 +1587,8 @@ "norway_copy = norway.copy()\n", "\n", "norway_copy = norway_copy[norway_copy[\"iso_3166_2\"] != \"NO-X01~\"]\n", - "reposition(norway_copy, norway.name == \"Svalbard\", -12, -8, 0.5, 0.5)\n", - "# reposition(norway_copy, norway.name == 'Nordland', 10, 0, 2, 2)\n", + "reposition(norway_copy, norway.name == 'Svalbard', -12, -8, 0.5, 0.5)\n", + "#reposition(norway_copy, norway.name == 'Nordland', 10, 0, 2, 2)\n", "\n", "not speed_run and norway_copy.plot(**plot_styles)" ] @@ -1669,7 +1632,7 @@ } ], "source": [ - "portugal = df[df.admin == \"Portugal\"]\n", + "portugal = df[df.admin == 'Portugal']\n", "not speed_run and portugal.plot(**plot_styles)" ] }, @@ -1705,8 +1668,8 @@ "source": [ "portugal_copy = portugal.copy()\n", "\n", - "reposition(portugal_copy, portugal.name == \"Azores\", 11, 0)\n", - "reposition(portugal_copy, portugal.name == \"Madeira\", 6, 2, simplify=0.015)\n", + "reposition(portugal_copy, portugal.name == 'Azores', 11, 0)\n", + "reposition(portugal_copy, portugal.name == 'Madeira', 6, 2, simplify=0.015)\n", "\n", "not speed_run and portugal_copy.plot(figsize=(8, 8), **plot_styles)" ] @@ -1750,7 +1713,7 @@ } ], "source": [ - "spain = df[df.admin == \"Spain\"]\n", + "spain = df[df.admin == 'Spain']\n", "not speed_run and spain.plot(**plot_styles)" ] }, @@ -1786,9 +1749,7 @@ "source": [ "spain_copy = spain.copy()\n", "\n", - "reposition(\n", - " spain_copy, spain.name.isin([\"Las Palmas\", \"Santa Cruz de Tenerife\"]), 3, 7, 1, 1\n", - ")\n", + "reposition(spain_copy, spain.name.isin(['Las Palmas', 'Santa Cruz de Tenerife']), 3, 7, 1, 1)\n", "\n", "not speed_run and spain_copy.plot(figsize=(8, 8), **plot_styles)" ] @@ -1832,7 +1793,7 @@ } ], "source": [ - "russia = df[df.admin == \"Russia\"]\n", + "russia = df[df.admin == 'Russia']\n", "not speed_run and russia.plot(**plot_styles)" ] }, @@ -1899,17 +1860,16 @@ " # Combine all moved geometries into a single geometry\n", " return shapely.ops.unary_union(moved_geom)\n", "\n", - "\n", "# Applying the function to the DataFrame\n", "russia_copy = russia.copy()\n", - "russia_copy.loc[russia.name == \"Chukchi Autonomous Okrug\", \"geometry\"] = (\n", - " russia_copy.loc[\n", - " russia.name == \"Chukchi Autonomous Okrug\", \"geometry\"\n", - " ].apply(shift_geom)\n", - ")\n", + "russia_copy.loc[\n", + " russia.name == 'Chukchi Autonomous Okrug', 'geometry'\n", + "] = russia_copy.loc[\n", + " russia.name == 'Chukchi Autonomous Okrug', 'geometry'\n", + "].apply(shift_geom)\n", "\n", "# Plotting\n", - "not speed_run and russia_copy.plot(figsize=(20, 20), **plot_styles)" + "not speed_run and russia_copy.plot(figsize=(20, 20), **plot_styles)\n" ] }, { @@ -1953,10 +1913,8 @@ } ], "source": [ - "turkey = df[df.admin == \"Turkey\"][[\"iso_3166_2\", \"geometry\"]]\n", - "not speed_run and turkey.plot(\n", - " **{key: value for key, value in plot_styles.items() if key != \"column\"}\n", - ")" + "turkey = df[df.admin == 'Turkey'][['iso_3166_2','geometry']]\n", + "not speed_run and turkey.plot(**{key: value for key, value in plot_styles.items() if key != 'column'})" ] }, { @@ -1968,77 +1926,50 @@ "# NUTS - 1 Codes for Turkey and correspong region - city names\n", "\n", "region_dict = {\n", - " \"TR1\": [\"TR-34\"],\n", - " \"TR2\": [\"TR-59\", \"TR-22\", \"TR-39\", \"TR-10\", \"TR-17\"],\n", - " \"TR3\": [\"TR-35\", \"TR-09\", \"TR-20\", \"TR-48\", \"TR-45\", \"TR-03\", \"TR-43\", \"TR-64\"],\n", - " \"TR4\": [\"TR-16\", \"TR-26\", \"TR-11\", \"TR-41\", \"TR-54\", \"TR-81\", \"TR-14\", \"TR-77\"],\n", - " \"TR5\": [\"TR-06\", \"TR-42\", \"TR-70\"],\n", - " \"TR6\": [\"TR-07\", \"TR-32\", \"TR-15\", \"TR-01\", \"TR-33\", \"TR-31\", \"TR-46\", \"TR-80\"],\n", - " \"TR7\": [\"TR-71\", \"TR-68\", \"TR-51\", \"TR-50\", \"TR-40\", \"TR-38\", \"TR-58\", \"TR-66\"],\n", - " \"TR8\": [\n", - " \"TR-67\",\n", - " \"TR-78\",\n", - " \"TR-74\",\n", - " \"TR-37\",\n", - " \"TR-18\",\n", - " \"TR-57\",\n", - " \"TR-55\",\n", - " \"TR-60\",\n", - " \"TR-19\",\n", - " \"TR-05\",\n", - " ],\n", - " \"TR9\": [\"TR-61\", \"TR-52\", \"TR-28\", \"TR-53\", \"TR-08\", \"TR-29\"],\n", - " \"TRA\": [\"TR-25\", \"TR-24\", \"TR-69\", \"TR-04\", \"TR-36\", \"TR-76\", \"TR-75\"],\n", - " \"TRB\": [\"TR-44\", \"TR-23\", \"TR-12\", \"TR-62\", \"TR-65\", \"TR-49\", \"TR-13\", \"TR-30\"],\n", - " \"TRC\": [\n", - " \"TR-27\",\n", - " \"TR-02\",\n", - " \"TR-79\",\n", - " \"TR-63\",\n", - " \"TR-21\",\n", - " \"TR-47\",\n", - " \"TR-72\",\n", - " \"TR-73\",\n", - " \"TR-56\",\n", - " ],\n", - "}\n", + " 'TR1': ['TR-34'],\n", + " 'TR2': ['TR-59', 'TR-22', 'TR-39', 'TR-10', 'TR-17'],\n", + " 'TR3': ['TR-35', 'TR-09', 'TR-20', 'TR-48', 'TR-45', 'TR-03', 'TR-43', 'TR-64'],\n", + " 'TR4': ['TR-16', 'TR-26', 'TR-11', 'TR-41', 'TR-54', 'TR-81', 'TR-14', 'TR-77'],\n", + " 'TR5': ['TR-06', 'TR-42', 'TR-70'],\n", + " 'TR6': ['TR-07', 'TR-32', 'TR-15', 'TR-01', 'TR-33', 'TR-31', 'TR-46', 'TR-80'],\n", + " 'TR7': ['TR-71', 'TR-68', 'TR-51', 'TR-50', 'TR-40', 'TR-38', 'TR-58', 'TR-66'],\n", + " 'TR8': ['TR-67', 'TR-78', 'TR-74', 'TR-37', 'TR-18', 'TR-57', 'TR-55', 'TR-60', 'TR-19', 'TR-05'],\n", + " 'TR9': ['TR-61', 'TR-52', 'TR-28', 'TR-53', 'TR-08', 'TR-29'],\n", + " 'TRA': ['TR-25', 'TR-24', 'TR-69', 'TR-04', 'TR-36', 'TR-76', 'TR-75'],\n", + " 'TRB': ['TR-44', 'TR-23', 'TR-12', 'TR-62', 'TR-65', 'TR-49', 'TR-13', 'TR-30'],\n", + " 'TRC': ['TR-27', 'TR-02', 'TR-79', 'TR-63', 'TR-21', 'TR-47', 'TR-72', 'TR-73', 'TR-56']}\n", "\n", "# Region names corresponding to NUTS-1\n", "\n", - "region_name_dict = {\n", - " \"TR1\": \"İstanbul\",\n", - " \"TR2\": \"Batı Marmara\",\n", - " \"TR3\": \"Ege\",\n", - " \"TR4\": \"Doğu Marmara\",\n", - " \"TR5\": \"Batı Anadolu\",\n", - " \"TR6\": \"Akdeniz\",\n", - " \"TR7\": \"Orta Anadolu\",\n", - " \"TR8\": \"Batı Karadeniz\",\n", - " \"TR9\": \"Doğu Karadeniz\",\n", - " \"TRA\": \"Kuzeydoğu Anadolu\",\n", - " \"TRC\": \"Güneydoğu Anadolu\",\n", - " \"TRB\": \"Ortadoğu Anadolu\",\n", - "}\n", + "region_name_dict = {'TR1':'İstanbul',\n", + " 'TR2':'Batı Marmara',\n", + " 'TR3':'Ege',\n", + " 'TR4':'Doğu Marmara',\n", + " 'TR5':'Batı Anadolu',\n", + " 'TR6':'Akdeniz',\n", + " 'TR7':'Orta Anadolu',\n", + " 'TR8':'Batı Karadeniz',\n", + " 'TR9':'Doğu Karadeniz',\n", + " 'TRA':'Kuzeydoğu Anadolu',\n", + " 'TRC':'Güneydoğu Anadolu',\n", + " 'TRB':'Ortadoğu Anadolu'\n", + " }\n", "\n", "\n", "def create_region_polygons(region_dict, turkey_gdf):\n", " # Create a reverse dictionary where city codes map to region codes\n", - " city_to_region = {\n", - " city_code: region_code\n", - " for region_code, city_codes in region_dict.items()\n", - " for city_code in city_codes\n", - " }\n", + " city_to_region = {city_code: region_code for region_code, city_codes in region_dict.items() for city_code in city_codes}\n", "\n", " # Create a new column 'REGION' in the GeoDataFrame that maps each city to its region\n", - " turkey_gdf[\"REGION\"] = turkey_gdf[\"iso_3166_2\"].map(city_to_region)\n", + " turkey_gdf['REGION'] = turkey_gdf['iso_3166_2'].map(city_to_region)\n", "\n", " # Dissolve the GeoDataFrame on the 'REGION' column to combine city polygons into region polygons\n", - " region_gdf = turkey_gdf.dissolve(by=\"REGION\")\n", + " region_gdf = turkey_gdf.dissolve(by='REGION')\n", "\n", " # Reset the index of the new GeoDataFrame\n", " region_gdf.reset_index(inplace=True)\n", - "\n", - " return region_gdf.drop(columns=[\"iso_3166_2\"])" + " \n", + " return region_gdf.drop(columns=['iso_3166_2'])" ] }, { @@ -2050,10 +1981,10 @@ "turkey_regions = create_region_polygons(region_dict, turkey)\n", "\n", "# Rename 'REGION' column to 'ISO'\n", - "turkey_regions = turkey_regions.rename(columns={\"REGION\": \"iso_3166_2\"})\n", + "turkey_regions = turkey_regions.rename(columns={'REGION': 'iso_3166_2'})\n", "\n", "# Map the region_name_dict to a new 'NAME_1' column\n", - "turkey_regions[\"name\"] = turkey_regions[\"iso_3166_2\"].map(region_name_dict)" + "turkey_regions['name'] = turkey_regions['iso_3166_2'].map(region_name_dict)" ] }, { @@ -2125,7 +2056,7 @@ } ], "source": [ - "france = df[df.admin == \"France\"]\n", + "france = df[df.admin == 'France']\n", "not speed_run and france.plot(**plot_styles)" ] }, @@ -2155,13 +2086,12 @@ "outputs": [], "source": [ "def replace_name(df, old, new):\n", - " if old in list(df.name):\n", + " if old in list(df.name): \n", " index = df[df.name == old].index[0]\n", - " df.at[index, \"name\"] = new\n", - "\n", - "\n", - "replace_name(france, \"Seien-et-Marne\", \"Seine-et-Marne\")\n", - "replace_name(france, \"Haute-Rhin\", \"Haut-Rhin\")" + " df.at[index, 'name'] = new\n", + " \n", + "replace_name(france, 'Seien-et-Marne', 'Seine-et-Marne')\n", + "replace_name(france, 'Haute-Rhin', 'Haut-Rhin')" ] }, { @@ -2195,11 +2125,11 @@ ], "source": [ "france_copy = france.copy()\n", - "reposition(france_copy, france.name == \"Guadeloupe\", 57.4, 25.4, 1.5, 1.5)\n", - "reposition(france_copy, france.name == \"Martinique\", 58.4, 27.1, 1.5, 1.5)\n", - "reposition(france_copy, france.name == \"Guyane française\", 52, 37.7, 0.35, 0.35)\n", - "reposition(france_copy, france.name == \"La Réunion\", -55, 62.8, 1.5, 1.5)\n", - "reposition(france_copy, france.name == \"Mayotte\", -43, 54.3, 1.5, 1.5)\n", + "reposition(france_copy, france.name=='Guadeloupe', 57.4, 25.4, 1.5, 1.5)\n", + "reposition(france_copy, france.name=='Martinique', 58.4, 27.1, 1.5, 1.5)\n", + "reposition(france_copy, france.name=='Guyane française', 52, 37.7, 0.35, 0.35)\n", + "reposition(france_copy, france.name=='La Réunion', -55, 62.8, 1.5, 1.5)\n", + "reposition(france_copy, france.name=='Mayotte', -43, 54.3, 1.5, 1.5)\n", "\n", "not speed_run and france_copy.plot(figsize=(8, 8), **plot_styles)" ] @@ -2217,7 +2147,7 @@ "metadata": {}, "outputs": [], "source": [ - "france_regions = france_copy[[\"geometry\", \"region_cod\", \"region\"]]" + "france_regions = france_copy[['geometry','region_cod','region']]" ] }, { @@ -2226,11 +2156,9 @@ "metadata": {}, "outputs": [], "source": [ - "france_regions = france_regions.dissolve(by=[\"region_cod\", \"region\"]).reset_index()\n", + "france_regions = france_regions.dissolve(by=['region_cod', 'region']).reset_index()\n", "\n", - "france_regions = france_regions.rename(\n", - " columns={\"region\": \"name\", \"region_cod\": \"iso_3166_2\"}\n", - ")" + "france_regions = france_regions.rename(columns={'region': 'name', 'region_cod': 'iso_3166_2'})" ] }, { @@ -2283,13 +2211,11 @@ "metadata": {}, "outputs": [], "source": [ - "italy_regions = df[df.admin == \"Italy\"][[\"geometry\", \"region_cod\", \"region\"]]\n", + "italy_regions = df[df.admin == 'Italy'][['geometry','region_cod','region']]\n", "\n", - "italy_regions = italy_regions.dissolve(by=[\"region_cod\", \"region\"]).reset_index()\n", + "italy_regions = italy_regions.dissolve(by=['region_cod', 'region']).reset_index()\n", "\n", - "italy_regions = italy_regions.rename(\n", - " columns={\"region\": \"name\", \"region_cod\": \"iso_3166_2\"}\n", - ")" + "italy_regions = italy_regions.rename(columns={'region': 'name', 'region_cod': 'iso_3166_2'})" ] }, { @@ -2340,7 +2266,7 @@ "def apply_bounds(df, northwest, southeast):\n", " x1, y1 = northwest\n", " x2, y2 = southeast\n", - " boundry = shapely.geometry.Polygon([(x1, y1), (x1, y2), (x2, y2), (x2, y1)])\n", + " boundry = shapely.geometry.Polygon([(x1, y1),(x1, y2), (x2, y2), (x2, y1)])\n", " df = df.copy()\n", " return df[df.geometry.apply(lambda x: boundry.contains(x))]" ] @@ -2375,7 +2301,7 @@ } ], "source": [ - "netherlands = df[df.admin == \"Netherlands\"]\n", + "netherlands = df[df.admin == 'Netherlands']\n", "not speed_run and netherlands.plot(**plot_styles)" ] }, @@ -2452,7 +2378,7 @@ } ], "source": [ - "uk = df[df.admin == \"United Kingdom\"]\n", + "uk = df[df.admin == 'United Kingdom']\n", "not speed_run and uk.plot(**plot_styles)" ] }, @@ -2539,9 +2465,7 @@ } ], "source": [ - "philippines_copy = philippines_copy.rename(\n", - " columns={\"NAME_1\": \"name\", \"ISO\": \"iso_3166_2\"}\n", - ")\n", + "philippines_copy = philippines_copy.rename(columns={'NAME_1': 'name','ISO': 'iso_3166_2'})\n", "not speed_run and philippines_copy.plot(**plot_styles)" ] }, @@ -2560,9 +2484,7 @@ "metadata": {}, "outputs": [], "source": [ - "philippines_regions = df[df.admin == \"Philippines\"][\n", - " [\"geometry\", \"region_cod\", \"region\"]\n", - "]" + "philippines_regions = df[df.admin == 'Philippines'][['geometry','region_cod','region']]" ] }, { @@ -2571,12 +2493,8 @@ "metadata": {}, "outputs": [], "source": [ - "philippines_regions = philippines_regions.dissolve(\n", - " by=[\"region_cod\", \"region\"]\n", - ").reset_index()\n", - "philippines_regions = philippines_regions.rename(\n", - " columns={\"region\": \"name\", \"region_cod\": \"iso_3166_2\"}\n", - ")" + "philippines_regions = philippines_regions.dissolve(by=['region_cod', 'region']).reset_index()\n", + "philippines_regions = philippines_regions.rename(columns={'region': 'name', 'region_cod': 'iso_3166_2'})" ] }, { @@ -2599,12 +2517,10 @@ } ], "source": [ - "philippines_regions[\"name\"] = philippines_regions[\"name\"].replace(\n", - " {\n", - " \"Dinagat Islands (Region XIII)\": \"Caraga Administrative Region (Region XIII)\",\n", - " \"Autonomous Region in Muslim Mindanao (ARMM)\": \"Bangsamoro Autonomous Region in Muslim Mindanao (BARMM)\",\n", - " }\n", - ")" + "philippines_regions['name'] = philippines_regions['name'].replace({\n", + " 'Dinagat Islands (Region XIII)': 'Caraga Administrative Region (Region XIII)',\n", + " 'Autonomous Region in Muslim Mindanao (ARMM)': 'Bangsamoro Autonomous Region in Muslim Mindanao (BARMM)'\n", + "})" ] }, { @@ -2634,7 +2550,7 @@ } ], "source": [ - "not speed_run and philippines_regions.plot(figsize=(10, 7), **plot_styles)" + "not speed_run and philippines_regions.plot(figsize = (10, 7), **plot_styles)" ] }, { @@ -2721,21 +2637,21 @@ } ], "source": [ - "vietnam = df[df.admin == \"Vietnam\"]\n", + "vietnam = df[df.admin == 'Vietnam']\n", "vietnam_copy = vietnam.copy()\n", - "replace_name(vietnam_copy, \"Ðong Tháp\", \"Đồng Tháp\")\n", - "replace_name(vietnam_copy, \"Son La\", \"Sơn La\")\n", - "replace_name(vietnam_copy, \"Ha Tinh\", \"Hà Tĩnh\")\n", - "replace_name(vietnam_copy, \"Quàng Nam\", \"Quảng Nam\")\n", - "replace_name(vietnam_copy, \"Lai Chau\", \"Lai Châu\")\n", - "replace_name(vietnam_copy, \"Hồ Chí Minh city\", \"Thành phố Hồ Chí Minh\")\n", - "replace_name(vietnam_copy, \"Hau Giang\", \"Hậu Giang\")\n", - "replace_name(vietnam_copy, \"Ha Noi\", \"Hà Nội\")\n", - "replace_name(vietnam_copy, \"Can Tho\", \"Cần Thơ\")\n", - "replace_name(vietnam_copy, \"Đông Nam Bộ\", \"Đồng Nai\")\n", - "replace_name(vietnam_copy, \"Đông Bắc\", \"Bắc Kạn\")\n", - "replace_name(vietnam_copy, \"Đồng Bằng Sông Hồng\", \"Hưng Yên\")\n", - "for i in vietnam_copy[\"name\"]:\n", + "replace_name(vietnam_copy, 'Ðong Tháp', 'Đồng Tháp')\n", + "replace_name(vietnam_copy, 'Son La', 'Sơn La')\n", + "replace_name(vietnam_copy, 'Ha Tinh', 'Hà Tĩnh')\n", + "replace_name(vietnam_copy, 'Quàng Nam', 'Quảng Nam')\n", + "replace_name(vietnam_copy, 'Lai Chau', 'Lai Châu')\n", + "replace_name(vietnam_copy, 'Hồ Chí Minh city', 'Thành phố Hồ Chí Minh')\n", + "replace_name(vietnam_copy, 'Hau Giang', 'Hậu Giang')\n", + "replace_name(vietnam_copy, 'Ha Noi', 'Hà Nội')\n", + "replace_name(vietnam_copy, 'Can Tho', 'Cần Thơ')\n", + "replace_name(vietnam_copy, 'Đông Nam Bộ', 'Đồng Nai')\n", + "replace_name(vietnam_copy, 'Đông Bắc', 'Bắc Kạn')\n", + "replace_name(vietnam_copy, 'Đồng Bằng Sông Hồng', 'Hưng Yên')\n", + "for i in vietnam_copy['name']:\n", " print(i)" ] }, @@ -2772,8 +2688,9 @@ " \"portugal\": portugal_copy,\n", " \"ukraine\": ukraine_with_crimea,\n", " \"india\": india_copy,\n", - " \"vietnam\": vietnam_copy,\n", - "}" + " \"vietnam\": vietnam_copy\n", + "}\n", + "\n" ] }, { @@ -2827,19 +2744,15 @@ "source": [ "# Filter out countries that only have one region, making them effectively useless as a choropleth\n", "\n", - "\n", "def get_num_subdivisions(country):\n", " gdf = get_gdf(country)\n", - " subdivisions = gdf[\"iso_3166_2\"].unique()\n", + " subdivisions = gdf['iso_3166_2'].unique()\n", " if len(subdivisions) == 1:\n", " print(country, \"has only one subdivision - removing from countries array\")\n", " return len(subdivisions)\n", "\n", - "\n", "# we add the unnecessaery countries to a list here, for clearing out unneeded geojson files later\n", - "countries_to_purge = [\n", - " country for country in countries if get_num_subdivisions(country) <= 1\n", - "]\n", + "countries_to_purge = [country for country in countries if get_num_subdivisions(country) <= 1]\n", "\n", "# now we purge those from our main \"countries\" list to continue processing\n", "countries = [country for country in countries if get_num_subdivisions(country) > 1]" @@ -3079,20 +2992,15 @@ "}\n", "useful_columns = [\"ISO\", \"NAME_1\", \"geometry\"]\n", "\n", - "\n", "def get_simplify_factor_by_size(gdf):\n", " xmin, ymin, xmax, ymax = shapely.ops.unary_union(gdf[\"geometry\"]).bounds\n", " size = (xmax - xmin) * (ymax - ymin)\n", " print(\"Size\", round(size, 3), end=\"\\t\")\n", - " if size > 1000:\n", - " return 0.03\n", - " if size > 300:\n", - " return 0.02\n", - " if size > 100:\n", - " return 0.01\n", + " if size > 1000: return 0.03\n", + " if size > 300: return 0.02\n", + " if size > 100: return 0.01\n", " return 0\n", "\n", - "\n", "def simplify_if_needed(country, gdf):\n", " \"\"\"Simplify the maps based on country size\"\"\"\n", " country_alias = country_name_aliases.get(country, country)\n", @@ -3104,7 +3012,6 @@ " if factor:\n", " gdf[\"geometry\"] = gdf.simplify(factor)\n", "\n", - "\n", "def save_geojson(country):\n", " gdf = get_gdf(country)\n", " print(country, end=\"\\t\")\n", @@ -3115,15 +3022,12 @@ "\n", " simplify_if_needed(country, gdf)\n", "\n", - " print(f\"Saving geojson for {country}...\")\n", - " filename_country = country.replace(\" \", \"_\")\n", - " gdf[useful_columns].to_file(\n", - " f\"../src/countries/{filename_country}.geojson\", driver=\"GeoJSON\"\n", - " )\n", - "\n", + " print(f'Saving geojson for {country}...')\n", + " filename_country = country.replace(' ', '_')\n", + " gdf[useful_columns].to_file(f\"../src/countries/{filename_country}.geojson\", driver=\"GeoJSON\")\n", "\n", "for country in countries_to_purge:\n", - " filename_country = country.replace(\" \", \"_\")\n", + " filename_country = country.replace(' ', '_')\n", " filepath = f\"../src/countries/{filename_country}.geojson\"\n", " if os.path.exists(filepath):\n", " os.remove(filepath)\n", @@ -3184,10 +3088,11 @@ "metadata": {}, "outputs": [], "source": [ + "\n", + "\n", "# Function to convert country name to a valid JavaScript identifier\n", "def to_js_identifier(name):\n", - " return name.replace(\" \", \"_\").replace(\"-\", \"_\")\n", - "\n", + " return name.replace(' ', '_').replace('-', '_')\n", "\n", "# License boilerplate\n", "license_boilerplate = \"\"\"/*\n", @@ -3214,19 +3119,10 @@ "countries_combined = sorted(countries_combined)\n", "\n", "# Generate TypeScript import statements\n", - "imports = \"\\n\".join(\n", - " [\n", - " f\"import {to_js_identifier(country)} from './countries/{to_js_identifier(country)}.geojson';\"\n", - " for country in countries_combined\n", - " ]\n", - ")\n", + "imports = \"\\n\".join([f\"import {to_js_identifier(country)} from './countries/{to_js_identifier(country)}.geojson';\" for country in countries_combined])\n", "\n", "# Generate the export object\n", - "exports = (\n", - " \"export const countries = {\\n \"\n", - " + \",\\n \".join([to_js_identifier(country) for country in countries_combined])\n", - " + \",\\n};\"\n", - ")\n", + "exports = \"export const countries = {\\n \" + \",\\n \".join([to_js_identifier(country) for country in countries_combined]) + \",\\n};\"\n", "\n", "# Additional exports\n", "additional_exports = \"\"\"\n", @@ -3267,13 +3163,8 @@ "# DOCS JSON:\n", "# Replace underscores with spaces and title-case each country name\n", "formatted_countries = [country.replace(\"_\", \" \") for country in countries_combined]\n", - "formatted_countries = [\n", - " country.upper() if country in {\"usa\", \"uk\"} else country.title()\n", - " for country in formatted_countries\n", - "]\n", - "formatted_countries = [\n", - " country.replace(\" Regions\", \" (regions)\") for country in formatted_countries\n", - "]\n", + "formatted_countries = [country.upper() if country in {\"usa\", \"uk\"} else country.title() for country in formatted_countries]\n", + "formatted_countries = [country.replace(\" Regions\",\" (regions)\") for country in formatted_countries]\n", "\n", "\n", "# Create a dictionary in the desired format\n",