import re
import pymongo
import requests
headers = {
# Request tool identification
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (HTML, like Gecko) "
"Chrome/127.0.0.0 Safari/537.36"
}
url = '/top/?spm=C28340.PdNvWY0LYxCP.EtmP5mypaGE4.11'
res = (url, headers=headers)
con = ("utf8")
datas = (r'<ul>.*?</ul>', con, )
result = {
"Hot List": {
"name": "Hot List",
"items": []
},
"cartoon": {
"name": "cartoon",
"items": []
},
"TV drama": {
"name": "TV drama",
"items": []
},
"Documentary": {
"name": "Documentary",
"items": []
},
"Special Program": {
"name": "Special Program",
"items": []
}
}
# print(datas[1])
items = (
r'<li.*?lazy="(.*?)".*?<div class="text"><a href=".*?" target="_blank">(.*?)</a>'
r'</div>.*?<div class="column"><i class="icon_l"></i><a href=".*?" target="_blank">(.*?)</a>'
r'<i class="icon_r"></i></div>.*?</li>',
datas[1], )
for item in items:
# print(item)
result["Hot List"]["items"].append({
"img": item[0],
"title": item[1],
"category": item[2]
})
# pass
# print(datas[2])
items = (
r'<li.*?lazy="(.*?)" width="188" height="250"></a>.*?<span class="number"><i class="icon_l">'
r'</i><i class="txt">(.*?)</i><i class="icon_r"></i></span>.*?</div>.*?'
r'<a class="cover" href=".*?" target="_blank">.*?</a>.*?<div class="text">'
r'<a href=".*?" target="_blank">(.*?)</a></div>.*?<p><a href=".*?" target="_blank">(.*?)</a></p>.*?</li>'
, datas[2],
)
for item in items:
# print(item)
result["cartoon"]["items"].append({
"img": item[0],
"title": item[2],
"category": item[1],
"synopsis": item[3]
})
# pass
# print(datas[3])
items = (
r'<li.*?lazy="(.*?)" width="188" height="250"></a>.*?<span class="number">'
r'<i class="icon_l"></i><i class="txt">(.*?)</i><i class="icon_r"></i></span>.*?</div>.*?'
r'<a class="cover" href=".*?" target="_blank">.*?</a>.*?<div class="text"><a href=".*?" target="_blank">(.*?)</a>'
r'</div>.*?<p><a href=".*?" target="_blank">(.*?)</a></p>.*?</li>'
, datas[3],
)
for item in items:
# print(item)
result["TV drama"]["items"].append({
"img": item[0],
"title": item[2],
"episode": item[1],
"synopsis": item[3]
})
# pass
# print(datas[4])
items = (
r'<li.*?lazy="(.*?)" width="188" height="250"></a>.*?<span class="number">'
r'<i class="icon_l"></i><i class="txt">(.*?)</i><i class="icon_r"></i>'
r'</span>.*?</div>.*?<a class="cover" href=".*?" target="_blank">.*?</a>.*?<div class="text">'
r'<a href=".*?" target="_blank">(.*?)</a></div>.*?<p><a href=".*?" target="_blank">(.*?)</a></p>.*?</li>'
, datas[4],
)
for item in items:
# print(item)
result["Documentary"]["items"].append({
"img": item[0],
"title": item[2],
"category": item[1],
"synopsis": item[3]
})
# pass
# print(datas[5])
items = (
r'<li.*?lazy="(.*?)" width="188" height="250"></a>.*?<span class="number">'
r'<i class="icon_l"></i><i class="txt">(.*?)</i><i class="icon_r"></i></span>.*?</div>.*?'
r'<a class="cover" href=".*?" target="_blank">.*?</a>.*?<div class="text"><a href=".*?" target="_blank">(.*?)</a>'
r'</div>.*?<p><a href=".*?" target="_blank">(.*?)</a></p>.*?</li>'
, datas[5],
)
for item in items:
# print(item)
result["Special Program"]["items"].append({
"img": item[0],
"title": item[2],
"tv": item[1],
"synopsis": item[3]
})
# pass
# print(result)
client = ()
db = client.get_default_database("cctv")
collection = db.get_collection("top")
collection.insert_one(result)
()