selectAll.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import csv
  4. import datetime
  5. # 发起请求获取网页内容
  6. base_url = 'https://mxd.dvg.cn/items.php?search=1&st=1&words=&type=0&p='
  7. min_pages = 1
  8. max_pages = 1159
  9. # 初始化一个空列表,用于存储结果
  10. items = []
  11. for page in range(min_pages, max_pages ):
  12. url = base_url + str(page)
  13. print(url)
  14. response = requests.get(url)
  15. soup = BeautifulSoup(response.text, 'html.parser')
  16. # 遍历所有表格行
  17. for row in soup.find_all('tr'):
  18. cells = row.find_all('td')
  19. for i in range(0, len(cells), 2):
  20. try:
  21. # 提取 ID 和物品名称
  22. id_cell = cells[i]
  23. item_cell = cells[i + 1]
  24. br_tag = id_cell.find('br')
  25. if br_tag:
  26. item_id = br_tag.find_next_sibling(text=True)
  27. # item_id = id_cell.get_text(strip=True).split('\n')[-1]
  28. item_name = item_cell.find('a').get_text(strip=True)
  29. items.append((item_id, item_name))
  30. except IndexError:
  31. # 跳过没有配对的单元格
  32. continue
  33. now = datetime.datetime.now()
  34. timestamp = now.strftime("%Y-%m-%d_%H-%M-%S")
  35. filename = f"All-{timestamp}.csv"
  36. # 保存到 CSV 文件
  37. with open(filename, 'w', newline='', encoding='utf-8') as file:
  38. writer = csv.writer(file)
  39. writer.writerow(['ID', '物品名称']) # 写入表头
  40. writer.writerows(items) # 写入数据
  41. print("报表已成功导出为 '"+filename+"'")