-
Notifications
You must be signed in to change notification settings - Fork 13
/
random_data.py
46 lines (30 loc) · 1.43 KB
/
random_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import numpy as np
from scipy import sparse
import cPickle as pickle
'''
Generate a random copy of data
input:
output: dict with two fields:
trainset: dict with two fields
scores: a sparse matrix, each ij entry is the rating of movie j given by person i, or the count of item j in basket i
atts : a matrix, each row is a feature vector extracted from person i, or basket i
testset : [same structure as test set]
'''
def rand_data():
n_rows = 200
n_columns = 50
n_feat = 5
np.random.seed(27)
# allocate more rows than necessary, to make sure each row has at least 2 non-zero entries
score_mat = np.random.rand(n_rows * 2, n_columns)
score_mat[score_mat < 0.88] = 0
score_mat[np.logical_and(0.96 <= score_mat, score_mat < 1)] = 3
score_mat[np.logical_and(0.92 <= score_mat, score_mat < 0.96)] = 2
score_mat[np.logical_and(0.88 <= score_mat, score_mat < 0.92)] = 1
row_sum = np.sum(score_mat > 0, axis=1)
score_mat = score_mat[row_sum >= 2, ]
score_mat = score_mat[0 : n_rows, ]
feature = np.random.rand(n_rows, n_feat)
trainset = dict(scores=sparse.csr_matrix(score_mat[0:(n_rows / 2)]), atts=feature[0:(n_rows / 2)])
testset = dict(scores=sparse.csr_matrix(score_mat[(n_rows / 2):]), atts=feature[(n_rows / 2):])
return dict(trainset=trainset, testset=testset)