文章标题:
借助Python爬虫获取B站视频评论
文章内容:代码由deepseek协助生成
点击查看代码
import requests
import time
import re
import csv
from datetime import datetime
import os
def obtain_video_info(bvid):
"""获取视频信息(包含oid)"""
url = f"https://api.bilibili.com/x/web-interface/view?bvid={bvid}"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"Referer": "https://www.bilibili.com/"
}
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
data = response.json()
if data.get('code') == 0:
return data['data']['aid'], data['data']['title']
raise Exception(f"API错误: {data.get('message')}")
except Exception as e:
print(f"获取视频信息失败: {str(e)}")
return None, None
def get_one_page_comments(oid, page=1):
"""获取单页评论 - 使用新版API"""
url = "https://api.bilibili.com/x/v2/reply/wbi/main"
params = {
"next": page,
"type": 1,
"oid": oid,
"mode": 3, # 排序模式: 3-最新, 2-最热
"plat": 1,
"web_location": 1315875,
"wts": int(time.time())
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"Referer": f"https://www.bilibili.com/video/BV1xx411c7BF",
"Origin": "https://www.bilibili.com"
}
try:
response = requests.get(url, params=params, headers=headers, timeout=15)
response.raise_for_status()
return response.json()
except Exception as e:
print(f"获取评论失败(第{page}页): {str(e)}")
return None
def secure_get(data, keys, default=None):
"""安全获取嵌套字典值"""
for key in keys:
if isinstance(data, dict) and key in data:
data = data[key]
else:
return default
return data
def analyze_comments(comment_data):
"""解析评论数据 - 增强健壮性"""
comments = []
try:
# 处理一级评论
replies = secure_get(comment_data, ['data', 'replies']) or []
for reply in replies:
try:
# 使用安全获取方法处理可能缺失的字段
user_name = secure_get(reply, ['member', 'uname'], '未知用户')
content = secure_get(reply, ['content', 'message'], '')
ctime = secure_get(reply, ['ctime'], int(time.time()))
like_count = secure_get(reply, ['like'], 0)
comments.append({
"user": user_name,
"content": content,
"time": datetime.fromtimestamp(ctime).strftime("%Y-%m-%d %H:%M:%S"),
"like": like_count,
"level": 1
})
# 处理二级评论(楼中楼)
sub_replies = secure_get(reply, ['replies']) or []
for sub_reply in sub_replies:
try:
sub_user = secure_get(sub_reply, ['member', 'uname'], '未知用户')
sub_content = secure_get(sub_reply, ['content', 'message'], '')
sub_ctime = secure_get(sub_reply, ['ctime'], int(time.time()))
sub_like = secure_get(sub_reply, ['like'], 0)
comments.append({
"user": sub_user,
"content": sub_content,
"time": datetime.fromtimestamp(sub_ctime).strftime("%Y-%m-%d %H:%M:%S"),
"like": sub_like,
"level": 2
})
except Exception as e:
print(f"解析二级评论时出错: {str(e)}")
continue
except Exception as e:
print(f"解析一级评论时出错: {str(e)}")
continue
# 处理置顶评论 - 使用更安全的方法
top_data = secure_get(comment_data, ['data', 'top', 'reply'])
if top_data:
try:
top_user = secure_get(top_data, ['member', 'uname'], '未知用户')
top_content = secure_get(top_data, ['content', 'message'], '')
top_ctime = secure_get(top_data, ['ctime'], int(time.time()))
top_like = secure_get(top_data, ['like'], 0)
comments.insert(0, {
"user": top_user,
"content": "[置顶] " + top_content,
"time": datetime.fromtimestamp(top_ctime).strftime("%Y-%m-%d %H:%M:%S"),
"like": top_like,
"level": 0
})
except Exception as e:
print(f"解析置顶评论时出错: {str(e)}")
return comments
except Exception as e:
print(f"解析评论数据时出错: {str(e)}")
return comments
def store_comments(comments, filename="bilibili_comments.csv"):
"""保存评论到CSV文件 - 修复路径问题"""
try:
# 如果文件名包含路径,确保目录存在
if os.path.dirname(filename) and not os.path.exists(os.path.dirname(filename)):
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename, 'w', encoding='utf-8-sig', newline='') as f:
writer = csv.writer(f)
writer.writerow(["用户", "评论内容", "时间", "点赞数", "评论层级"])
for comment in comments:
# 清理内容中的换行符
clean_content = comment['content'].replace('\n', ' ').replace('\r', '')
writer.writerow([
comment['user'],
clean_content,
comment['time'],
comment['like'],
comment['level']
])
print(f"评论已保存到: {filename}")
return True
except Exception as e:
print(f"保存文件失败: {str(e)}")
return False
def main_operation():
print("=" * 50)
print("B站(Bilibili)视频评论下载工具 v2.1")
print("=" * 50)
# 获取用户输入
video_url = input("请输入B站视频链接: ").strip()
# 从URL中提取BV号
bvid_match = re.search(r"(BV[0-9A-Za-z]{10})", video_url)
if not bvid_match:
print("错误: 无法从URL中提取BV号,请确认链接格式正确")
return
bvid = bvid_match.group(0)
print(f"提取到视频BV号: {bvid}")
# 获取视频信息
oid, title = obtain_video_info(bvid)
if not oid:
print("获取视频信息失败,程序终止")
return
print(f"视频标题: {title}")
print(f"视频OID: {oid}")
# 获取评论
all_comments = []
page = 1
max_retries = 3
max_pages = 50
comment_count = 0
print("开始获取评论...")
while page <= max_pages:
retry_count = 0
success = False
while retry_count < max_retries and not success:
print(f"获取第 {page} 页评论...", end="", flush=True)
data = get_one_page_comments(oid, page)
if not data:
print(" [无响应]")
retry_count += 1
time.sleep(2)
continue
if data.get('code') != 0:
print(f" [失败: {data.get('message')}]")
retry_count += 1
time.sleep(2)
continue
comments = analyze_comments(data)
if comments:
all_comments.extend(comments)
new_count = len(comments)
comment_count += new_count
print(f" [成功获取 {new_count} 条评论]")
success = True
else:
print(" [无评论数据]")
success = True
if not success:
print(f" [连续 {max_retries} 次获取失败,跳过第 {page} 页]")
# 检查是否还有更多评论
if data and data.get('data', {}).get('cursor', {}).get('is_end', True):
print("已到达最后一页评论")
break
page += 1
time.sleep(1.2) # 防止请求过快
# 保存结果 - 修复路径问题
if all_comments:
# 创建保存目录(如果不存在)
output_dir = "bilibili_comments"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
print(f"已创建输出目录: {output_dir}")
# 生成安全的文件名
safe_title = re.sub(r'[\\/*?:"<>|]', "", title)[:50] # 移除非法字符并截断
filename = os.path.join(output_dir, f"{bvid}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_comments.csv")
if store_comments(all_comments, filename):
print(f"共获取 {comment_count} 条评论")
print(f"结果文件: {os.path.abspath(filename)}")
else:
print("评论获取成功但保存失败")
else:
print("未获取到任何评论")
# 添加暂停以便查看结果
input("\n按 Enter 键退出...")
if __name__ == "__main__":
main_operation()
相关文章
暂无评论...