daren_project/gmail/quickstart.py

249 lines
9.0 KiB
Python
Raw Normal View History

2025-03-29 12:26:50 +08:00
from apiclient import discovery
from httplib2 import Http
from oauth2client import file, client, tools
import base64
from bs4 import BeautifulSoup
import dateutil.parser as parser
from datetime import datetime
import os
import json
# 代理设置
os.environ['HTTP_PROXY'] = 'http://127.0.0.1:7890'
os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:7890'
# Gmail API 认证
2025-04-10 18:25:59 +08:00
SCOPES = ['https://mail.google.com/']
2025-03-29 12:26:50 +08:00
store = file.Storage('storage.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('client_secret.json', SCOPES)
creds = tools.run_flow(flow, store)
GMAIL = discovery.build('gmail', 'v1', http=creds.authorize(Http()))
2025-04-07 15:07:20 +08:00
def download_attachment(message_id, attachment_id, filename):
"""下载邮件附件"""
try:
attachment = GMAIL.users().messages().attachments().get(
userId='me',
messageId=message_id,
id=attachment_id
).execute()
data = attachment['data']
file_data = base64.urlsafe_b64decode(data)
# 创建附件目录
if not os.path.exists('attachments'):
os.makedirs('attachments')
# 保存附件
filepath = os.path.join('attachments', filename)
with open(filepath, 'wb') as f:
f.write(file_data)
return filepath
except Exception as e:
print(f"Error downloading attachment: {str(e)}")
return None
2025-03-29 12:26:50 +08:00
def get_email_content(message):
"""提取邮件内容"""
try:
2025-04-07 15:07:20 +08:00
message_id = message['id'] # 获取邮件ID
2025-03-29 12:26:50 +08:00
payload = message['payload']
headers = payload['headers']
# 获取邮件基本信息
email_data = {
2025-04-07 15:07:20 +08:00
'id': message_id, # 保存邮件ID
2025-03-29 12:26:50 +08:00
'subject': '',
'from': '',
'date': '',
2025-04-07 15:07:20 +08:00
'body': '',
'attachments': [] # 新增附件列表
2025-03-29 12:26:50 +08:00
}
# 提取头部信息
for header in headers:
if header['name'] == 'Subject':
email_data['subject'] = header['value']
elif header['name'] == 'From':
email_data['from'] = header['value']
elif header['name'] == 'Date':
date = parser.parse(header['value'])
email_data['date'] = date.strftime('%Y-%m-%d %H:%M:%S')
2025-04-07 15:07:20 +08:00
# 定义一个递归函数来处理所有部分和附件
def process_parts(parts):
2025-03-29 12:26:50 +08:00
for part in parts:
2025-04-07 15:07:20 +08:00
# 检查是否是附件
if 'filename' in part and part['filename']:
attachment = {
'filename': part['filename'],
'mimeType': part['mimeType'],
'size': part['body'].get('size', 0)
}
# 如果有附件内容数据可以获取附件ID
if 'attachmentId' in part['body']:
attachment['attachmentId'] = part['body']['attachmentId']
email_data['attachments'].append(attachment)
# 处理文本内容
if part['mimeType'] == 'text/plain' and not email_data['body']:
2025-03-29 12:26:50 +08:00
data = part['body'].get('data', '')
if data:
text = base64.urlsafe_b64decode(data).decode('utf-8')
email_data['body'] = text
2025-04-07 15:07:20 +08:00
# 递归处理多部分内容
if 'parts' in part:
process_parts(part['parts'])
# 处理邮件正文和附件
if 'parts' in payload:
process_parts(payload['parts'])
2025-03-29 12:26:50 +08:00
elif 'body' in payload:
data = payload['body'].get('data', '')
if data:
text = base64.urlsafe_b64decode(data).decode('utf-8')
email_data['body'] = text
return email_data
except Exception as e:
print(f"Error processing email: {str(e)}")
return None
def get_conversations(email1, email2):
"""获取两个用户之间的所有对话"""
try:
# 构建搜索查询
query = f"from:({email1} OR {email2}) to:({email1} OR {email2})"
# 获取所有匹配的邮件
response = GMAIL.users().messages().list(userId='me', q=query).execute()
messages = []
if 'messages' in response:
messages.extend(response['messages'])
# 如果有更多页,继续获取
while 'nextPageToken' in response:
page_token = response['nextPageToken']
response = GMAIL.users().messages().list(
userId='me',
q=query,
pageToken=page_token
).execute()
messages.extend(response['messages'])
# 获取每封邮件的详细内容
conversations = []
for msg in messages:
message = GMAIL.users().messages().get(userId='me', id=msg['id']).execute()
email_data = get_email_content(message)
if email_data:
conversations.append(email_data)
# 按时间排序
conversations.sort(key=lambda x: x['date'])
return conversations
except Exception as e:
print(f"Error getting conversations: {str(e)}")
return []
def save_conversations(conversations, output_file):
"""保存对话记录(覆盖模式)"""
try:
# 使用 'w' 模式覆盖内容
with open(output_file, 'w', encoding='utf-8') as f:
# 写入时间分割线
f.write("=" * 50 + "\n")
f.write(f"记录时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write("=" * 50 + "\n\n")
# 写入对话记录
for msg in conversations:
f.write(f"时间: {msg['date']}\n")
f.write(f"发件人: {msg['from']}\n")
f.write(f"主题: {msg['subject']}\n")
f.write("内容:\n")
f.write(f"{msg['body']}\n")
2025-04-07 15:07:20 +08:00
# 添加附件信息
if msg['attachments']:
f.write("\n附件:\n")
for att in msg['attachments']:
f.write(f" - {att['filename']} ({att['mimeType']}, {att['size']} 字节)\n")
2025-03-29 12:26:50 +08:00
f.write("-" * 50 + "\n")
print(f"对话记录已保存到: {output_file}")
# 保存 JSON 格式
json_file = output_file.rsplit('.', 1)[0] + '.json'
with open(json_file, 'w', encoding='utf-8') as f:
json.dump(conversations, f, ensure_ascii=False, indent=2)
print(f"JSON 格式对话记录已保存到: {json_file}")
except Exception as e:
print(f"Error saving conversations: {str(e)}")
def main():
# 设置固定的输出文件名,这样每次都会追加到同一个文件
output_file = "email_conversations.txt"
# 设置要查找的两个邮箱地址
email1 = "crushwds@gmail.com"
email2 = "ardonisierni@gmail.com"
print(f"正在获取 {email1}{email2} 之间的对话...")
# 获取对话记录
conversations = get_conversations(email1, email2)
if conversations:
print(f"找到 {len(conversations)} 条对话记录")
2025-04-07 15:07:20 +08:00
# 统计附件
total_attachments = 0
for msg in conversations:
total_attachments += len(msg['attachments'])
# 保存对话记录
2025-03-29 12:26:50 +08:00
save_conversations(conversations, output_file)
# 打印对话统计
print("\n对话统计:")
print(f"总消息数: {len(conversations)}")
2025-04-07 15:07:20 +08:00
print(f"总附件数: {total_attachments}")
2025-03-29 12:26:50 +08:00
senders = {}
for msg in conversations:
sender = msg['from']
senders[sender] = senders.get(sender, 0) + 1
for sender, count in senders.items():
print(f"{sender}: {count} 条消息")
2025-04-07 15:07:20 +08:00
# 提示用户是否下载附件
if total_attachments > 0:
download_choice = input(f"\n发现 {total_attachments} 个附件,是否下载? (y/n): ")
if download_choice.lower() == 'y':
print("\n开始下载附件...")
downloaded = 0
for msg in conversations:
for att in msg['attachments']:
if 'attachmentId' in att:
filepath = download_attachment(msg['id'], att['attachmentId'], att['filename'])
if filepath:
downloaded += 1
print(f"已下载: {att['filename']} -> {filepath}")
print(f"\n完成! 成功下载了 {downloaded}/{total_attachments} 个附件到 'attachments' 目录")
2025-03-29 12:26:50 +08:00
else:
print("未找到对话记录")
if __name__ == "__main__":
main()