| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546 |
- import requests
- from bs4 import BeautifulSoup
- import csv
- import datetime
- # 发起请求获取网页内容
- base_url = 'https://mxd.dvg.cn/items.php?search=1&st=1&words=&type=0&p='
- min_pages = 1
- max_pages = 1159
- # 初始化一个空列表,用于存储结果
- items = []
- for page in range(min_pages, max_pages ):
- url = base_url + str(page)
- print(url)
- response = requests.get(url)
- soup = BeautifulSoup(response.text, 'html.parser')
- # 遍历所有表格行
- for row in soup.find_all('tr'):
- cells = row.find_all('td')
- for i in range(0, len(cells), 2):
- try:
- # 提取 ID 和物品名称
- id_cell = cells[i]
- item_cell = cells[i + 1]
- br_tag = id_cell.find('br')
- if br_tag:
- item_id = br_tag.find_next_sibling(text=True)
- # item_id = id_cell.get_text(strip=True).split('\n')[-1]
- item_name = item_cell.find('a').get_text(strip=True)
-
- items.append((item_id, item_name))
- except IndexError:
- # 跳过没有配对的单元格
- continue
- now = datetime.datetime.now()
- timestamp = now.strftime("%Y-%m-%d_%H-%M-%S")
- filename = f"All-{timestamp}.csv"
- # 保存到 CSV 文件
- with open(filename, 'w', newline='', encoding='utf-8') as file:
- writer = csv.writer(file)
- writer.writerow(['ID', '物品名称']) # 写入表头
- writer.writerows(items) # 写入数据
- print("报表已成功导出为 '"+filename+"'")
|