Skip to content

Commit

Permalink
Remove additional quote types
Browse files Browse the repository at this point in the history
  • Loading branch information
rlh1994 committed Feb 5, 2024
1 parent 58c84f5 commit 829a1e8
Showing 1 changed file with 58 additions and 31 deletions.
89 changes: 58 additions & 31 deletions utils/dbt_docs/get_dbt_package_versions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import requests
import base64
import re
import json
import sys
Expand All @@ -8,34 +7,48 @@

# Set your PAT key so you get the 5000 calls per hour for the github api
# call with `token <YOUR_TOKEN> as cmd argument if using a local PAT key`
headers = {'Authorization': f"{sys.argv[1]}"}
headers = {"Authorization": f"{sys.argv[1]}"}

# Functions to decode the contents and process into the format we want


def process_package(packages):
decoded = [x for x in packages.split('\n')[1:] if x != '' and x.replace(' ', '')[0] != '#']
decoded = [
x for x in packages.split("\n")[1:] if x != "" and x.replace(" ", "")[0] != "#"
]
req_pkgs = {}
for i in range(len(decoded)):
if i % 2 == 0:
pkg_name = decoded[i].split(' ')[-1]
pkg_name = decoded[i].split(" ")[-1]
# If it is hard pinned to something this will fail.
versions_re = re.compile('version: (.*)')
req_versions = versions_re.findall(
decoded[i+1])[0].replace('"', '').replace(',', '').replace('[', '').replace(']', '')
versions_re = re.compile("version: (.*)")
req_versions = (
versions_re.findall(decoded[i + 1])[0]
.replace('"', "")
.replace("'", "")
.replace(",", "")
.replace("[", "")
.replace("]", "")
)
req_pkgs[pkg_name] = req_versions

return req_pkgs


def process_versions(project):
dbt_versions_re = re.compile('require-dbt-version: (.*)')
dbt_versions_re = re.compile("require-dbt-version: (.*)")
try:
req_versions = dbt_versions_re.findall(project)[0].replace(
'"', '').replace(',', '').replace('[', '').replace(']', '')
req_versions = (
dbt_versions_re.findall(project)[0]
.replace('"', "")
.replace("'", "")
.replace(",", "")
.replace("[", "")
.replace("]", "")
)
except IndexError:
# If we haven't specified, just assume it needs a new version
req_versions = '>=1.3.0 <2.0.0'
req_versions = ">=1.3.0 <2.0.0"
return req_versions


Expand All @@ -44,21 +57,24 @@ def main():
for our_package in all_packages:
# get tags
tags = requests.get(
f'https://api.github.com/repos/snowplow/{our_package[1]}/git/refs/tags', headers=headers)
f"https://api.github.com/repos/snowplow/{our_package[1]}/git/refs/tags",
headers=headers,
)
package_list[our_package[0]] = {}
print(tags)
for tag in tags.json():
ref = tag['ref'].split('/')[-1]
ref = tag["ref"].split("/")[-1]
package_list[our_package[0]][ref] = {}
# Get the files and process them into our dictionary
packages = github_read_file(
'snowplow', our_package[1], 'packages.yml', ref, headers)
"snowplow", our_package[1], "packages.yml", ref, headers
)
project = github_read_file(
'snowplow', our_package[1], 'dbt_project.yml', ref, headers)
"snowplow", our_package[1], "dbt_project.yml", ref, headers
)

package_list[our_package[0]
][ref]['dbtversion'] = process_versions(project)
package_list[our_package[0]
][ref]['packages'] = process_package(packages)
package_list[our_package[0]][ref]["dbtversion"] = process_versions(project)
package_list[our_package[0]][ref]["packages"] = process_package(packages)

# Currently nothing is done to deal with transient dbt version dependencies, e.g. if a required package has a smaller dbt version range than the main
# package. This should be rare for us, and the reason it can't be done is because there isn't a good semver package for python that
Expand All @@ -72,9 +88,11 @@ def main():
for ver, ver_info in versions.items():
# Set up one of depth/breadth first search tracker stuff
visited_packages = set([pkg])
unvisited_packages = list(ver_info['packages'].keys())
unvisited_packages_versions = [re.compile(
'(\d*\.\d*\.\d*\S*)').findall(x)[0] for x in ver_info['packages'].values()]
unvisited_packages = list(ver_info["packages"].keys())
unvisited_packages_versions = [
re.compile("(\d*\.\d*\.\d*\S*)").findall(x)[0]
for x in ver_info["packages"].values()
]
current_package = None
current_package_version = None

Expand All @@ -83,23 +101,32 @@ def main():
current_package = unvisited_packages.pop()
current_package_version = unvisited_packages_versions.pop()
# Get the children packages for that package
child_packages = package_list.get(current_package, {}).get(
current_package_version, {}).get('packages', {})
child_packages = (
package_list.get(current_package, {})
.get(current_package_version, {})
.get("packages", {})
)
for child_pkg, child_pkg_version in child_packages.items():
if child_pkg not in visited_packages and child_pkg not in unvisited_packages:
if (
child_pkg not in visited_packages
and child_pkg not in unvisited_packages
):
# If that package isn't already in our list of checked or list of ones to check, add it and it's minimum version
unvisited_packages.append(child_pkg)
unvisited_packages_versions.append(re.compile(
'(\d*\.\d*\.\d*\S*)').findall(child_pkg_version)[0])
unvisited_packages_versions.append(
re.compile("(\d*\.\d*\.\d*\S*)").findall(child_pkg_version)[
0
]
)
# Alter the original packages list of packages to include the downstream dependecies, prefer this order as | favours rightmost value when key clash
ver_info['packages'] = child_packages | ver_info['packages']
ver_info["packages"] = child_packages | ver_info["packages"]

# write the file out
with open("src/dbtVersions.js", "w") as fdesc:
fdesc.write('export const dbtVersions = ')
fdesc.write("export const dbtVersions = ")
json.dump(dict(sorted(package_list.items())), fdesc, indent=4)
fdesc.write('\n')
fdesc.write("\n")


if __name__ == '__main__':
if __name__ == "__main__":
main()

0 comments on commit 829a1e8

Please sign in to comment.