diff --git a/utils/dbt_docs/get_dbt_package_versions.py b/utils/dbt_docs/get_dbt_package_versions.py index f758461c80..86a3bfe39e 100644 --- a/utils/dbt_docs/get_dbt_package_versions.py +++ b/utils/dbt_docs/get_dbt_package_versions.py @@ -1,5 +1,4 @@ import requests -import base64 import re import json import sys @@ -8,34 +7,48 @@ # Set your PAT key so you get the 5000 calls per hour for the github api # call with `token as cmd argument if using a local PAT key` -headers = {'Authorization': f"{sys.argv[1]}"} +headers = {"Authorization": f"{sys.argv[1]}"} # Functions to decode the contents and process into the format we want def process_package(packages): - decoded = [x for x in packages.split('\n')[1:] if x != '' and x.replace(' ', '')[0] != '#'] + decoded = [ + x for x in packages.split("\n")[1:] if x != "" and x.replace(" ", "")[0] != "#" + ] req_pkgs = {} for i in range(len(decoded)): if i % 2 == 0: - pkg_name = decoded[i].split(' ')[-1] + pkg_name = decoded[i].split(" ")[-1] # If it is hard pinned to something this will fail. - versions_re = re.compile('version: (.*)') - req_versions = versions_re.findall( - decoded[i+1])[0].replace('"', '').replace(',', '').replace('[', '').replace(']', '') + versions_re = re.compile("version: (.*)") + req_versions = ( + versions_re.findall(decoded[i + 1])[0] + .replace('"', "") + .replace("'", "") + .replace(",", "") + .replace("[", "") + .replace("]", "") + ) req_pkgs[pkg_name] = req_versions return req_pkgs def process_versions(project): - dbt_versions_re = re.compile('require-dbt-version: (.*)') + dbt_versions_re = re.compile("require-dbt-version: (.*)") try: - req_versions = dbt_versions_re.findall(project)[0].replace( - '"', '').replace(',', '').replace('[', '').replace(']', '') + req_versions = ( + dbt_versions_re.findall(project)[0] + .replace('"', "") + .replace("'", "") + .replace(",", "") + .replace("[", "") + .replace("]", "") + ) except IndexError: # If we haven't specified, just assume it needs a new version - req_versions = '>=1.3.0 <2.0.0' + req_versions = ">=1.3.0 <2.0.0" return req_versions @@ -44,21 +57,24 @@ def main(): for our_package in all_packages: # get tags tags = requests.get( - f'https://api.github.com/repos/snowplow/{our_package[1]}/git/refs/tags', headers=headers) + f"https://api.github.com/repos/snowplow/{our_package[1]}/git/refs/tags", + headers=headers, + ) package_list[our_package[0]] = {} + print(tags) for tag in tags.json(): - ref = tag['ref'].split('/')[-1] + ref = tag["ref"].split("/")[-1] package_list[our_package[0]][ref] = {} # Get the files and process them into our dictionary packages = github_read_file( - 'snowplow', our_package[1], 'packages.yml', ref, headers) + "snowplow", our_package[1], "packages.yml", ref, headers + ) project = github_read_file( - 'snowplow', our_package[1], 'dbt_project.yml', ref, headers) + "snowplow", our_package[1], "dbt_project.yml", ref, headers + ) - package_list[our_package[0] - ][ref]['dbtversion'] = process_versions(project) - package_list[our_package[0] - ][ref]['packages'] = process_package(packages) + package_list[our_package[0]][ref]["dbtversion"] = process_versions(project) + package_list[our_package[0]][ref]["packages"] = process_package(packages) # Currently nothing is done to deal with transient dbt version dependencies, e.g. if a required package has a smaller dbt version range than the main # package. This should be rare for us, and the reason it can't be done is because there isn't a good semver package for python that @@ -72,9 +88,11 @@ def main(): for ver, ver_info in versions.items(): # Set up one of depth/breadth first search tracker stuff visited_packages = set([pkg]) - unvisited_packages = list(ver_info['packages'].keys()) - unvisited_packages_versions = [re.compile( - '(\d*\.\d*\.\d*\S*)').findall(x)[0] for x in ver_info['packages'].values()] + unvisited_packages = list(ver_info["packages"].keys()) + unvisited_packages_versions = [ + re.compile("(\d*\.\d*\.\d*\S*)").findall(x)[0] + for x in ver_info["packages"].values() + ] current_package = None current_package_version = None @@ -83,23 +101,32 @@ def main(): current_package = unvisited_packages.pop() current_package_version = unvisited_packages_versions.pop() # Get the children packages for that package - child_packages = package_list.get(current_package, {}).get( - current_package_version, {}).get('packages', {}) + child_packages = ( + package_list.get(current_package, {}) + .get(current_package_version, {}) + .get("packages", {}) + ) for child_pkg, child_pkg_version in child_packages.items(): - if child_pkg not in visited_packages and child_pkg not in unvisited_packages: + if ( + child_pkg not in visited_packages + and child_pkg not in unvisited_packages + ): # If that package isn't already in our list of checked or list of ones to check, add it and it's minimum version unvisited_packages.append(child_pkg) - unvisited_packages_versions.append(re.compile( - '(\d*\.\d*\.\d*\S*)').findall(child_pkg_version)[0]) + unvisited_packages_versions.append( + re.compile("(\d*\.\d*\.\d*\S*)").findall(child_pkg_version)[ + 0 + ] + ) # Alter the original packages list of packages to include the downstream dependecies, prefer this order as | favours rightmost value when key clash - ver_info['packages'] = child_packages | ver_info['packages'] + ver_info["packages"] = child_packages | ver_info["packages"] # write the file out with open("src/dbtVersions.js", "w") as fdesc: - fdesc.write('export const dbtVersions = ') + fdesc.write("export const dbtVersions = ") json.dump(dict(sorted(package_list.items())), fdesc, indent=4) - fdesc.write('\n') + fdesc.write("\n") -if __name__ == '__main__': +if __name__ == "__main__": main()