This repository has been archived by the owner on Aug 17, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 18
/
package_directory.nim
533 lines (438 loc) · 14.9 KB
/
package_directory.nim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
#
# Nimble package directory
#
# Copyright 2016-2023 Federico Ceratto <[email protected]> and other contributors
# Released under GPLv3 License, see LICENSE file
#
import std/[
asyncdispatch,
deques,
httpclient,
httpcore,
json,
os,
sequtils,
sets,
streams,
strutils,
tables,
times,
uri
]
from std/xmltree import escape
from std/algorithm import sort, sorted, sortedByIt, reversed
from std/marshal import store, load
from std/posix import onSignal, SIGINT, SIGTERM, getpid
#from nimblepkg import getTagsListRemote, getVersionList
import jester,
morelogging,
sdnotify,
statsd_client
import github, util, signatures, persist
const
nimble_packages_polling_time_s = 10 * 60
sdnotify_ping_time_s = 10
cache_fn = ".cache.json"
# init
type
RssItem = object
title, desc, pub_date: string
url, guid: Uri
# the pkg name is normalized
var pkgs: Pkgs = newTable[string, Pkg]()
# tag -> package name
# initialized/updated by load_packages
var packages_by_tag = newTable[string, seq[string]]()
# word -> package name
# initialized/updated by load_packages
var packages_by_description_word = newTable[string, seq[string]]()
# package access statistics
# volatile
var most_queried_packages = initCountTable[string]()
type
PkgHistoryItem = object
name: string
first_seen_time: Time
Cache = object of RootObj
# package creation/update history - new ones at bottom
pkgs_history: seq[PkgHistoryItem]
# pkgs list. Extra data from GH is embedded
#pkgs: TableRef[string, Pkg]
var cache: Cache
proc save(cache: Cache) =
let f = newFileStream(cache_fn, fmWrite)
log_debug "writing " & absolutePath(cache_fn)
f.store(cache)
f.close()
proc load_cache(): Cache =
## Load cache from disk or create empty cache
log_debug "loading cache at $#" % cache_fn
try:
# FIXME
#result.pkgs = newTable[string, Pkg]()
result.pkgs_history = @[]
load(newFileStream(cache_fn, fmRead), result)
log_debug "cache loaded"
except:
log_info "initializing new cache"
#result.pkgs = newTable[string, Pkg]()
result.pkgs_history = @[]
result.save()
log_debug "new cache created"
# HTML templates
include "templates/base.tmpl"
include "templates/home.tmpl"
include "templates/pkg.tmpl"
include "templates/pkg_list.tmpl"
include "templates/rss.tmpl"
proc search_packages*(query: string): CountTable[string] =
## Search packages by name, tag and keyword
let query = query.strip.toLowerAscii.split({' ', ','})
var found_pkg_names = initCountTable[string]()
for item in query:
# matching by pkg name, weighted for full or partial match
for pn in pkgs.keys():
if item.normalize() == pn:
found_pkg_names.inc(pn, val = 5)
elif pn.contains(item.normalize()):
found_pkg_names.inc(pn, val = 3)
if packages_by_tag.hasKey(item):
for pn in packages_by_tag[item]:
# matching by tags is weighted more than by word
found_pkg_names.inc(pn, val = 3)
# matching by description, weighted 1
if packages_by_description_word.hasKey(item.toLowerAscii):
for pn in packages_by_description_word[item.toLowerAscii]:
found_pkg_names.inc(pn, val = 1)
# sort packages by best match
found_pkg_names.sort()
return found_pkg_names
proc load_packages*() =
## Load packages.json
## Rebuild packages_by_tag, packages_by_description_word
log_debug "loading $#" % conf.packages_list_fname
pkgs.clear()
if not conf.packages_list_fname.file_exists:
log_info "packages list file not found. First run?"
let new_pkg_raw = waitFor fetch_nimble_packages()
log_info "writing $#" % absolutePath(conf.packages_list_fname)
conf.packages_list_fname.writeFile(new_pkg_raw)
let pkg_list = conf.packages_list_fname.parseFile
for pdata in pkg_list:
if not pdata.hasKey("name"):
continue
if not pdata.hasKey("tags"):
continue
# Normalize pkg name
pdata["name"].str = pdata["name"].str.normalize()
if pdata["name"].str in pkgs:
log.warn "Duplicate pkg name $#" % pdata["name"].str
continue
pkgs[pdata["name"].str] = pdata
for tag in pdata["tags"]:
if not packages_by_tag.hasKey(tag.str):
packages_by_tag[tag.str] = @[]
packages_by_tag[tag.str].add pdata["name"].str
# collect packages matching a word in their descriptions
let orig_words = pdata["description"].str.split({' ', ','})
for orig_word in orig_words:
if orig_word.len < 3:
continue # ignore short words
let word = orig_word.toLowerAscii
if not packages_by_description_word.hasKey(word):
packages_by_description_word[word] = @[]
packages_by_description_word[word].add pdata["name"].str
log_info "Loaded ", $pkgs.len, " packages"
# log_debug "writing $#" % conf.packages_list_fname
# conf.packages_list_fname.writeFile(conf.packages_list_fname.readFile)
proc translate_term_colors*(outp: string): string =
## Translate terminal colors into HTML with CSS classes
const sequences = @[
("[36m[2m", "<span>"),
("[32m[1m", """<span class="term-success">"""),
("[33m[1m", """<span class="term-red">"""),
("[31m[1m", """<span class="term-red">"""),
("[36m[1m", """<span class="term-blue">"""),
("[0m[31m[0m", "</span>"),
("[0m[32m[0m", "</span>"),
("[0m[33m[0m", "</span>"),
("[0m[36m[0m", "</span>"),
("[0m[0m", "</span>"),
("[2m", "<span>"),
("[36m", "<span>"),
("[33m", """<span class="term-blue">"""),
]
result = outp
for s in sequences:
result = result.replace(s[0], s[1])
proc sorted*[T](t: CountTable[T]): CountTable[T] =
## Return sorted CountTable
var tcopy = t
tcopy.sort()
tcopy
proc top_keys*[T](t: CountTable[T], n: int): seq[T] =
## Return CountTable most common keys
result = @[]
var tcopy = t
tcopy.sort()
for k in keys(tcopy):
result.add k
if result.len == n:
return
# Jester settings
settings:
port = conf.port
# routes
router mainRouter:
get "/about.html":
include "templates/about.tmpl"
resp base_page(request, generate_about_page())
get "/":
log_req request
stats.incr("views")
# Grab the most queried packages
var top_pkgs: seq[Pkg] = @[]
for pname in top_keys(most_queried_packages, 5):
if pkgs.hasKey(pname):
top_pkgs.add pkgs[pname]
# Grab the newest packages
log_debug "pkgs history len: $#" % $cache.pkgs_history.len
var new_pkgs: seq[Pkg] = @[]
for n in 1..min(cache.pkgs_history.len, 10):
let package_name: string =
if cache.pkgs_history[^n].name.len > 4 and cache.pkgs_history[^n].name[
0..3] == "nim-":
cache.pkgs_history[^n].name[4..^1].normalize()
else:
cache.pkgs_history[^n].name.normalize()
if pkgs.hasKey(package_name):
new_pkgs.add pkgs[package_name]
else:
log_debug "$# not found in package list" % package_name
# Grab trending packages, as measured by GitHub
let trending_pkgs = await fetch_trending_packages(request, pkgs)
resp base_page(request, generate_home_page(top_pkgs, new_pkgs,
trending_pkgs))
get "/search":
log_req request
stats.incr("views")
var searched_pkgs: seq[Pkg] = @[]
for name in search_packages(@"query").keys():
searched_pkgs.add pkgs[name]
stats.gauge("search_found_pkgs", searched_pkgs.len)
let body = generate_search_box(@"query") & generate_pkg_list_page(searched_pkgs)
resp base_page(request, body)
get "/pkg/@pkg_name/?":
log_req request
stats.incr("views")
let pname = normalize(@"pkg_name")
if not pkgs.hasKey(pname):
resp base_page(request, "Package not found")
most_queried_packages.inc pname
let pkg = pkgs[pname]
let url = pkg["url"].str
if url.startswith("https://github.com/") or url.startswith("http://github.com/"):
if not pkg.hasKey("github_last_update_time") or pkg["github_last_update_time"].num +
github_caching_time < epochTime().int:
# pkg is on GitHub and needs updating
pkg["github_last_update_time"] = newJInt epochTime().int
let owner = url.split('/')[3]
let repo_name = url.split('/')[4]
pkg["github_owner"] = newJString owner
pkg["github_readme"] = await fetch_github_readme(owner, repo_name)
pkg["doc"] = await fetch_github_doc_pages(owner, repo_name)
await pkg.fetch_github_versions(owner, repo_name)
resp base_page(request, generate_pkg_page(pkg))
post "/update_package":
## Create or update a package description
log_req request
stats.incr("views")
const required_fields = @["name", "url", "method", "tags", "description",
"license", "web", "signatures", "authorized_keys"]
var pkg_data: JsonNode
try:
pkg_data = parseJson(request.body)
except:
log_info "Unable to parse JSON payload"
halt Http400, "Unable to parse JSON payload"
for field in required_fields:
if not pkg_data.hasKey(field):
log_info "Missing required field $#" % field
halt Http400, "Missing required field $#" % field
let signature = pkg_data["signatures"][0].str
try:
let pkg_data_copy = pkg_data.copy()
pkg_data_copy.delete("signatures")
let key_id = verify_gpg_signature(pkg_data_copy, signature)
log_info "received key", key_id
except:
log_info "Invalid signature"
halt Http400, "Invalid signature"
let name = pkg_data["name"].str
# TODO: locking
load_packages()
# the package exists with identical name
let pkg_already_exists = pkgs.hasKey(name)
if not pkg_already_exists:
# scan for naming collisions
let norm_name = name.normalize()
for existing_pn in pkgs.keys():
if norm_name == existing_pn.normalize():
log.info "Another package named $# already exists" % existing_pn
halt Http400, "Another package named $# already exists" % existing_pn
if pkg_already_exists:
try:
let old_keys = pkgs[name]["authorized_keys"].getElems.mapIt(it.str)
let pkg_data_copy = pkg_data.copy()
pkg_data_copy.delete("signatures")
let key_id = verify_gpg_signature_is_allowed(pkg_data_copy, signature, old_keys)
log_info "$# updating package $#" % [key_id, name]
except:
log_info "Key not accepted"
halt Http400, "Key not accepted"
pkgs[name] = pkg_data
var new_pkgs = newJArray()
for pname in toSeq(pkgs.keys()).sorted(system.cmp):
new_pkgs.add pkgs[pname]
conf.packages_list_fname.writeFile(new_pkgs.pretty.cleanup_whitespace)
log_info if pkg_already_exists: "Updated existing package $#" % name
else: "Added new package $#" % name
resp base_page(request, "OK")
get "/packages.json":
## Serve the packages list file
log_req request
stats.incr("views")
resp conf.packages_list_fname.readFile
get "/api/v1/package_count":
## Serve the package count
log_req request
stats.incr("views")
resp $pkgs.len
get "/packages.xml":
## New and updated packages feed
log_req request
stats.incr("views_rss")
let baseurl = conf.public_baseurl.parseUri
let url = baseurl / "packages.xml"
var rss_items: seq[RssItem] = @[]
for item in cache.pkgs_history:
let pn = item.name.normalize()
if not pkgs.hasKey(pn):
#log_debug "skipping $#" % pn
continue
let pkg = pkgs[pn]
let item_url = baseurl / "pkg" / pn
let i = RssItem(
title: pn,
desc: xmltree.escape(pkg["description"].str),
url: item_url,
guid: item_url,
pub_date: $item.first_seen_time.utc.format("ddd, dd MMM yyyy hh:mm:ss zz")
)
rss_items.add i
let r = generate_rss_feed(
title = "Nim packages",
desc = "New and updated Nim packages",
url = url,
build_date = getTime().utc.format("ddd, dd MMM yyyy hh:mm:ss zz"),
pub_date = getTime().utc.format("ddd, dd MMM yyyy hh:mm:ss zz"),
ttl = 3600,
rss_items
)
resp(r, contentType = "application/rss+xml")
get "/stats":
log_req request
stats.incr("views")
resp base_page(request, """
<div class="container" style="padding-top: 10rem;">
<p class="text-center">Runtime: $#</p>
<p class="text-center">Queried packages count: $#</p>
</div>
""" % [$cpuTime(), $len(most_queried_packages)])
get "/robots.txt":
## Serve robots.txt to throttle bots
const robots = """
User-agent: DataForSeoBot
Disallow: /
User-agent: *
Disallow: /about.html
Disallow: /api
Disallow: /ci
Disallow: /docs
Disallow: /pkg
Disallow: /search
Disallow: /searchitem
Crawl-delay: 300
"""
resp(robots, contentType = "text/plain")
proc run_systemd_sdnotify_pinger(ping_time_s: int) {.async.} =
## Ping systemd watchdog using sd_notify
const msg = "NOTIFY_SOCKET env var not found - pinging to logfile"
if not existsEnv("NOTIFY_SOCKET"):
log_info msg
echo msg
while true:
log_debug "*ping*"
await sleepAsync ping_time_s * 1000
# never break
let sd = newSDNotify()
sd.notify_ready()
sd.notify_main_pid(getpid())
while true:
sd.ping_watchdog()
await sleepAsync ping_time_s * 1000
proc poll_nimble_packages(poll_time_s: int) {.async.} =
## Poll GitHub for packages.json
## Overwrites the packages.json local file!
log_debug "starting GH packages.json polling"
var first_run = true
while true:
if first_run:
first_run = false
else:
await sleepAsync poll_time_s * 1000
log_debug "Polling GitHub packages.json"
try:
let new_pkg_raw = await fetch_nimble_packages()
if new_pkg_raw == conf.packages_list_fname.readFile:
log_debug "No changes"
stats.gauge("packages_all_known", pkgs.len)
stats.gauge("packages_history", cache.pkgs_history.len)
continue
for pdata in new_pkg_raw.parseJson:
if pdata.hasKey("name"):
let pname = pdata["name"].str.normalize()
if not pkgs.hasKey(pname):
cache.pkgs_history.add PkgHistoryItem(name: pname,
first_seen_time: getTime())
log_debug "New pkg added on GH: $#" % pname
cache.save()
log_debug "writing $#" % (getCurrentDir() / conf.packages_list_fname)
conf.packages_list_fname.writeFile(new_pkg_raw)
load_packages()
for item in cache.pkgs_history:
let pname = item.name.normalize()
if not pkgs.hasKey(pname):
log_debug "$# is gone" % pname
stats.gauge("packages_all_known", pkgs.len)
stats.gauge("packages_history", cache.pkgs_history.len)
except:
log.error getCurrentExceptionMsg()
onSignal(SIGINT, SIGTERM):
## Exit signal handler
log.info "Exiting"
cache.save()
quit()
proc main() =
#setup_seccomp()
log_info "starting"
conf.tmp_nimble_root_dir.createDir()
load_packages()
cache = load_cache()
asyncCheck run_systemd_sdnotify_pinger(sdnotify_ping_time_s)
asyncCheck poll_nimble_packages(nimble_packages_polling_time_s)
log_info "starting server"
var server = initJester(mainRouter)
server.serve()
when isMainModule:
main()