使用OAuth2方式实现Python获取Outlook邮箱收件箱数据

1.在Azure 门户注册应用程序

  微软文档地址

  重定向的地址配置(微软地址):  https://login.microsoftonline.com/common/oauth2/nativeclient

注册应用地址

2.程序代码

#安装包以及需要的驱动
pip3 install playwright

playwright install
import base64
import json
import logging
from io import BytesIO

from playwright.sync_api import sync_playwright
import time
import urllib.parse
import requests

from pia.utils.cache import get_redis_value, set_redis_expire
from pia.utils.constants import OUTLOOK_CLIENT_ID, OUTLOOK_TENANT_ID, OUTLOOK_CLIENT_SECRET, OUTLOOK_REDIRECT_URI, \
    COS_OUTLOOK_DIR, OUTLOOK_TOP
from pia.utils.cos_upload import upload_stream_to_cos, check_exists
from pia.utils.reids_key import OUTLOOK_TOKEN

log = logging.getLogger(__name__)

#账号登录 官方文档 https://learn.microsoft.com/en-us/azure/active-directory/develop/v2-oauth2-auth-code-flow
def get_authorization_code(_user_name, _pass_word):
    with sync_playwright() as play_wright:
        browser = play_wright.chromium.launch(headless=True)
        context = browser.new_context(locale="zh-CN", accept_downloads=True)
        try:
            # Open new page
            page = context.new_page()
            url = f"https://login.microsoftonline.com/{OUTLOOK_TENANT_ID}/oauth2/v2.0/authorize?client_id={OUTLOOK_CLIENT_ID}&response_type=code&redirect_uri={OUTLOOK_REDIRECT_URI}&response_mode=query&scope=https%3A%2F%2Fgraph.microsoft.com%2Fmail.read&state=12345"
            page.goto(url)

            page.click("[placeholder=\"电子邮件、电话或\\ Skype\"]")

            page.fill("[placeholder=\"电子邮件、电话或\\ Skype\"]", _user_name)

            with page.expect_navigation():
                page.click("text=下一步")

            page.click("[placeholder=\"密码\"]")

            page.fill("[placeholder=\"密码\"]", _pass_word)

            # 点击登录并等待导航
            with page.expect_navigation(timeout=6000):
                page.click("text=登录", timeout=2000)
                # 如果有确认信息,点击是
                try:
                    page.click("text=是", timeout=2000)
                except:
                    pass
                # 上一步没生效在执行一次
                try:
                    page.click("text=是", timeout=2000)
                except:
                    pass
                # 如果有确认信息,点击是
                try:
                    # 点击接受按钮
                    page.click('input[type="submit"][name="idSIButton9"][value="接受"]', timeout=2000)
                except:
                    pass

            time.sleep(3)
            # 解析 URL 获取授权码
            query = dict(urllib.parse.parse_qsl(urllib.parse.urlsplit(page.url).query))
            authorization_code = query.get('code')

            if not authorization_code:
                # todo 待开发
                print(f"*****************{_user_name}: 密码错误\n\n")
                return ""
            return authorization_code
        except Exception as e:
            print(f"outlook获取code失败:::{traceback.format_exc()}")  # 输出异常信息
            # 记录错误,继续执行后续操作
            return ""
        finally:
            context.close()
            browser.close()


def get_token(authorization_code):
    param = {'client_id': OUTLOOK_CLIENT_ID,
             'code': authorization_code,
             'redirect_uri': OUTLOOK_REDIRECT_URI,
             'grant_type': 'authorization_code',
             'client_secret': OUTLOOK_CLIENT_SECRET
             }
    token_headers = {"Content-Type": "application/x-www-form-urlencoded"}
    token_url = f'https://login.microsoftonline.com/{OUTLOOK_TENANT_ID}/oauth2/v2.0/token'

    res = requests.post(url=token_url, headers=token_headers, data=param)
    access_token = json.loads(res.text).get('access_token')
    return f'Bearer {access_token}'

# api官方文档 https://learn.microsoft.com/en-us/graph/api/mailfolder-list-messages?view=graph-rest-1.0
def get_inbox(authorization, str_start, str_end):
    condition = '?$filter = '
    if str_start:
        condition += f'ReceivedDateTime ge {str_start} and '
    condition += f'receivedDateTime lt {str_end}'
    # 获取收件箱里面的 邮件
    endpoint = f"https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages{condition}&$top={OUTLOOK_TOP}"
    http_headers = {'Authorization': authorization,
                    'Accept': 'application/json',
                    'Content-Type': 'application/json'}
    data = requests.get(endpoint, headers=http_headers, stream=False).json()
    return data


def get_email_attachments(authorization, email_id):
    # 获取收件箱里面的邮件附件
    endpoint = f"https://graph.microsoft.com/v1.0/me/messages/{email_id}/attachments"
    http_headers = {'Authorization': authorization,
                    'Accept': 'application/json',
                    'Content-Type': 'application/json'}
    data = requests.get(endpoint, headers=http_headers, stream=False).json()
    return data

#程序入口
def deal_user_email(_user_name, _pass_word, str_start, str_end) -> list:
    result = []
    redis_key = f'{OUTLOOK_TOKEN}:{_user_name}'
    # 缓存
    authorization = get_redis_value(redis_key)
    if authorization:
        pass
    else:
        authorization_code = get_authorization_code(_user_name, _pass_word)
        authorization = get_token(authorization_code)
        if authorization:
            set_redis_expire(redis_key, authorization, 60 * 60)

    if authorization:
        email = get_inbox(authorization, str_start, str_end)
        if email:
            email_values = email.get("value")
            if email_values:
                for value in email_values:
                    # 邮件 id
                    email_id = value.get("id")
                    # 是否存在附件 True/False
                    has_attachments = value.get("hasAttachments")
                    value.update({"attachments": {}})
                    if has_attachments and email_id:
                        attachment_dict = upload_attachment_to_cos(authorization, email_id, _user_name)
                        value.update({"attachments": attachment_dict})
                    result.append(value)

    else:
        log.error(f"outlook user_name: {_user_name} Authorization Failed")

    return result


'''
附件上传到cos
'''


def upload_attachment_to_cos(authorization, email_id, _user_name):
    attachment_dict = {}
    attachments = get_email_attachments(authorization, email_id)
    if attachments:
        attachment_values = attachments.get("value")
        if attachment_values:
            for _value in attachment_values:
                # 附件 name
                attachment_name = _value.get("name")
                # 附件 内容
                attachment_content = _value.get("contentBytes")
                # Step 1: 解码 Base64 字符串
                decoded_data = base64.b64decode(attachment_content)
                # Step 2: 创建一个 BytesIO 对象作为文件流
                file_stream = BytesIO(decoded_data)
                object_name = f'{COS_OUTLOOK_DIR}/{_user_name}/{email_id}/{attachment_name}'
                is_exists, url = check_exists(object_name)
                if not is_exists:
                    url = upload_stream_to_cos(file_stream, object_name)
                attachment_dict.update({attachment_name: url})

    return attachment_dict
import traceback
from datetime import datetime, timedelta

from django.core.management import BaseCommand
import logging
import uuid

from django.db import transaction

from pia.models import PiaOutLookTask, PiaOutLookData
from pia.utils.aes import decrypted
from pia.utils.outlook import deal_user_email

logging = logging.getLogger('task')

#任务的方式拉取
class Command(BaseCommand):

    def add_arguments(self, parser):
        parser.add_argument('trace_id', type=str)

    def handle(self, *args, **options):
        trace_id = options["trace_id"]
        if not trace_id:
            trace_id = str(uuid.uuid4())
        self.trace_id = trace_id

        self.writeLog('outlook email start')
        outlook_task = PiaOutLookTask.objects.values("id", "user_name", "pwd", "execution_time")
        for x in outlook_task:

            _id = x.get("id")
            user_name = x.get("user_name")
            self.writeLog(f'############## outlook email user_name:{user_name} start ##############')
            pwd = x.get("pwd")
            execution_time = x.get("execution_time")
            # 获取当前时间
            current_time = datetime.now()
            # 格式化为 YYYY-MM-DD
            end_time = current_time.strftime('%Y-%m-%dT%H:%M:%S')
            try:
                if user_name and pwd:
                    _pwd = decrypted(pwd)
                    result = deal_user_email(user_name, _pwd, f'{execution_time}Z', F'{end_time}Z')
                    with transaction.atomic():
                        PiaOutLookTask.objects.filter(id=_id).update(status=0, execution_time=end_time)
                        outlook_data = PiaOutLookData.objects.filter(outlook_task_id=_id, start_time=execution_time,
                                                                     end_time=end_time)
                        if outlook_data:
                            outlook_data.update(content=result)
                        else:
                            PiaOutLookData.objects.create(outlook_task_id=_id, start_time=execution_time,
                                                          end_time=end_time, content=result)
                self.writeLog(f'############## outlook email user_name:{user_name} end ##############')
            except Exception:
                PiaOutLookTask.objects.filter(id=_id).update(status=1, execution_time=end_time)
                self.writeLog(
                    f'##############  outlook email user_name:{user_name} execution failed::::{traceback.format_exc()}')
        self.writeLog('outlook email end')

    def writeLog(self, msg: str):
        logging.info(f'[{self.trace_id}] {msg}')

作者:5年,目标200w

物联沃分享整理
物联沃-IOTWORD物联网 » 使用OAuth2方式实现Python获取Outlook邮箱收件箱数据

发表回复