forked from cv-cat/Spider_XHS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
home.py
95 lines (84 loc) · 3.58 KB
/
home.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import requests
from one import OneNote
from profile import Profile
from xhs_utils.xhs_util import get_headers, get_params, js, check_cookies
class Home:
def __init__(self, cookies=None):
if cookies is None:
self.cookies = check_cookies()
else:
self.cookies = cookies
self.more_url = 'https://edith.xiaohongshu.com/api/sns/web/v1/user_posted'
self.profile = Profile(self.cookies)
self.oneNote = OneNote(self.cookies)
self.headers = get_headers()
self.params = get_params()
# 主页
def get_all_note_info(self, url):
profile = self.profile.get_profile_info(url)
note_id_list = []
user_id = profile.userId
cursor = ''
self.params['user_id'] = user_id
self.params['cursor'] = cursor
while True:
api = f"/api/sns/web/v1/user_posted?num=30&cursor={cursor}&user_id={user_id}&image_scenes="
ret = js.call('get_xs', api, '', self.cookies['a1'])
self.headers['x-s'], self.headers['x-t'] = ret['X-s'], str(ret['X-t'])
response = requests.get(self.more_url, headers=self.headers, cookies=self.cookies, params=self.params)
res = response.json()
data = res["data"]
if not data["notes"][0]:
print(f"用户{user_id}没有笔记")
break
cursor, has_more, note_list = data["cursor"], data["has_more"], data["notes"]
self.params['cursor'] = cursor
for note in note_list:
note_id_list.append(note['note_id'])
if not has_more:
break
return note_id_list, profile
# 主页
def save_all_note_info(self, url, need_cover=False):
profile = self.profile.save_profile_info(url)
user_id = profile.userId
cursor = ''
self.params['user_id'] = user_id
self.params['cursor'] = cursor
index = 0
while True:
api = f"/api/sns/web/v1/user_posted?num=30&cursor={cursor}&user_id={user_id}&image_scenes="
ret = js.call('get_xs', api, '', self.cookies['a1'])
self.headers['x-s'], self.headers['x-t'] = ret['X-s'], str(ret['X-t'])
response = requests.get(self.more_url, headers=self.headers, cookies=self.cookies, params=self.params)
res = response.json()
data = res["data"]
if not data["notes"][0]:
print(f"用户{user_id}没有笔记")
break
cursor, has_more, note_list = data["cursor"], data["has_more"], data["notes"]
self.params['cursor'] = cursor
for note in note_list:
index += 1
info = f'第{index}个笔记, '
self.oneNote.save_one_note_info(self.oneNote.detail_url + note['note_id'], need_cover, info)
if not has_more:
break
print(f'用户 {profile.nickname} 全部视频信息保存成功')
def main(self, url_list):
# url_list = [
# 'https://www.xiaohongshu.com/user/profile/6185ce66000000001000705b',
# 'https://www.xiaohongshu.com/user/profile/6034d6f20000000001006fbb',
# ]
for url in url_list:
try:
self.save_all_note_info(url)
except:
print(f'用户 {url} 查询失败')
if __name__ == '__main__':
home = Home()
url_list = [
'https://www.xiaohongshu.com/user/profile/6185ce66000000001000705b',
'https://www.xiaohongshu.com/user/profile/6034d6f20000000001006fbb',
]
home.main(url_list)