import requests from bs4 import BeautifulSoup import csv import datetime # 发起请求获取网页内容 base_url = 'https://mxd.dvg.cn/items.php?search=1&st=1&words=&type=0&p=' min_pages = 1 max_pages = 1159 # 初始化一个空列表,用于存储结果 items = [] for page in range(min_pages, max_pages ): url = base_url + str(page) print(url) response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') # 遍历所有表格行 for row in soup.find_all('tr'): cells = row.find_all('td') for i in range(0, len(cells), 2): try: # 提取 ID 和物品名称 id_cell = cells[i] item_cell = cells[i + 1] br_tag = id_cell.find('br') if br_tag: item_id = br_tag.find_next_sibling(text=True) # item_id = id_cell.get_text(strip=True).split('\n')[-1] item_name = item_cell.find('a').get_text(strip=True) items.append((item_id, item_name)) except IndexError: # 跳过没有配对的单元格 continue now = datetime.datetime.now() timestamp = now.strftime("%Y-%m-%d_%H-%M-%S") filename = f"All-{timestamp}.csv" # 保存到 CSV 文件 with open(filename, 'w', newline='', encoding='utf-8') as file: writer = csv.writer(file) writer.writerow(['ID', '物品名称']) # 写入表头 writer.writerows(items) # 写入数据 print("报表已成功导出为 '"+filename+"'")