Python-基于mediapipe,pyautogui,cv2和numpy的电脑手势截屏工具

前言:在我们的日常生活中,手机已经成为我们每天工作,学习,生活的一个不可或缺的部分。众所周知:为了我们的使用方便,手机里面的很多功能非常人性化,既便捷又高效,其中就有手机的截屏方式,它们花样繁多,如三指截屏,手势截屏等。那么怎么在电脑里面也实现这个功能呢?(虽然我们知到电脑也有快捷的截屏方式-Ctrl+Shift+S。但是很明显,这依然不够快捷,因为这至少需要用户的两次手动操作)。那么废话不多说,我们直接开始今天的Python学习之路-利用PyCharm手搓一个基于mediapipe,pyautogui,cv2numpy的电脑手势截屏工具。

 编程思路:本次编程我们需要pyautogui来获取屏幕的尺寸,这对于确定按钮的位置和大小非常有用;接着利用mediapipe初始化一个手部模型,用来检测视频流中的手部关键点;numpy提供了计算机在采集了用户手部姿态所得到的数据的处理,并与mediapipe所建立的数学模型进行比较等。cv2是本次编程的重头戏,它为调用计算机摄像头进行信息采集,以及用户手部模型的实时可视化展现等提供了可能。

第一步:导入库

本次编程所需调用的库:

1,标准库:cv2,numpy。

2,第三方库:mediapipe,pyautogui。

#导入必要库
import cv2
import mediapipe as mp
import pyautogui
import numpy as np

第二步:程序初始化

我们需要初始化判断模型并给出屏幕尺存等相关变量参数。

"""程序初始化"""
#初始化MediaPipe手部模型
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.7)
mp_draw = mp.solutions.drawing_utils
#绘制屏幕尺寸并控制初始状态变量
screen_w, screen_h = pyautogui.size()
waiting_for_click = False
blink_counter = 0
btn_rect = (500, 400, 200, 60)

第三步:构建内部逻辑

接下来我们需要编写程序内部判断模型等的内在逻辑,并将鼠标,按键事件与计算机采集信息所得的对比结果联系起来。

"""内部逻辑函数实现区"""
def mouse_click(event, x, y, flags, param):
    #鼠标点击回调
    global waiting_for_click
    if event == cv2.EVENT_LBUTTONDOWN:
        # 检测是否点击在按钮区域
        if (btn_rect[0] < x < btn_rect[0] + btn_rect[2] and
                btn_rect[1] < y < btn_rect[1] + btn_rect[3]):
            waiting_for_click = False
def draw_button(frame):
    #绘制继续按钮
    x, y, w, h = btn_rect
    # 按钮背景
    cv2.rectangle(frame, (x, y), (x + w, y + h), (50, 200, 50), -1)
    # 按钮边框
    cv2.rectangle(frame, (x, y), (x + w, y + h), (30, 180, 30), 2)
    # 按钮文字
    cv2.putText(frame, "CONTINUE", (x + 10, y + 40),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
def is_thumbs_up(hand_landmarks):
    #判断点赞手势是否成立
    tips = [8, 12, 16, 20]
    thumb_tip = hand_landmarks.landmark[4]
    thumb_pip = hand_landmarks.landmark[3]

    thumb_vertical = abs(thumb_tip.y - thumb_pip.y) < 0.05
    fingers_folded = True
    wrist = hand_landmarks.landmark[0]

    for tip_index in tips:
        tip = hand_landmarks.landmark[tip_index]
        if abs(tip.y - wrist.y) > 0.1:
            fingers_folded = False
    return thumb_vertical and fingers_folded

第四步:搭建屏幕及内容显示设置

这里我们需要对截屏的各个步骤进行拆分,并对每个步骤的执行结果进行相应的处理(如屏闪,实时显示,操作完成提醒等)。

"""屏幕显示区"""
cap = cv2.VideoCapture(0)
cv2.namedWindow('Gesture Screenshot')
cv2.setMouseCallback('Gesture Screenshot', mouse_click)
while cap.isOpened():
    success, frame = cap.read()
    if not success:
        continue

    frame = cv2.flip(frame, 1)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # 闪烁效果
    if blink_counter > 0:
        overlay = frame.copy()
        cv2.rectangle(overlay, (0, 0), (screen_w, screen_h), (255, 255, 255), -1)
        cv2.addWeighted(overlay, 0.3, frame, 0.7, 0, frame)
        blink_counter -= 1

    if not waiting_for_click:
        # 正常检测模式
        results = hands.process(rgb_frame)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                if is_thumbs_up(hand_landmarks):
                    cv2.putText(frame, "THUMBS UP DETECTED!", (50, 50),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

                    # 执行截屏并进入等待状态
                    screenshot = pyautogui.screenshot()
                    screenshot = cv2.cvtColor(np.array(screenshot), cv2.COLOR_RGB2BGR)
                    cv2.imwrite(f'screenshot_{cv2.getTickCount()}.png', screenshot)
                    blink_counter = 3
                    waiting_for_click = True
    else:
        # 等待点击模式
        cv2.putText(frame, "SCREENSHOT SAVED!", (10, 40),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 2)
        draw_button(frame)
        cv2.putText(frame, "Please click 'CONTINUE' to enable next capture",
                    (10, 75), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)

    cv2.imshow('Gesture Screenshot', frame)

    if cv2.waitKey(5) & 0xFF == 27:
        break

第五步:释放计算机内部资源

#释放资源
cap.release()
cv2.destroyAllWindows()

第六步:完整代码

#导入必要库
import cv2
import mediapipe as mp
import pyautogui
import numpy as np


"""程序初始化"""
#初始化MediaPipe手部模型
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.7)
mp_draw = mp.solutions.drawing_utils
#绘制屏幕尺寸并控制初始状态变量
screen_w, screen_h = pyautogui.size()
waiting_for_click = False
blink_counter = 0
btn_rect = (500, 400, 200, 60)



"""内部逻辑函数实现区"""
def mouse_click(event, x, y, flags, param):
    #鼠标点击回调
    global waiting_for_click
    if event == cv2.EVENT_LBUTTONDOWN:
        # 检测是否点击在按钮区域
        if (btn_rect[0] < x < btn_rect[0] + btn_rect[2] and
                btn_rect[1] < y < btn_rect[1] + btn_rect[3]):
            waiting_for_click = False
def draw_button(frame):
    #绘制继续按钮
    x, y, w, h = btn_rect
    # 按钮背景
    cv2.rectangle(frame, (x, y), (x + w, y + h), (50, 200, 50), -1)
    # 按钮边框
    cv2.rectangle(frame, (x, y), (x + w, y + h), (30, 180, 30), 2)
    # 按钮文字
    cv2.putText(frame, "CONTINUE", (x + 10, y + 40),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
def is_thumbs_up(hand_landmarks):
    #判断点赞手势是否成立
    tips = [8, 12, 16, 20]
    thumb_tip = hand_landmarks.landmark[4]
    thumb_pip = hand_landmarks.landmark[3]

    thumb_vertical = abs(thumb_tip.y - thumb_pip.y) < 0.05
    fingers_folded = True
    wrist = hand_landmarks.landmark[0]

    for tip_index in tips:
        tip = hand_landmarks.landmark[tip_index]
        if abs(tip.y - wrist.y) > 0.1:
            fingers_folded = False
    return thumb_vertical and fingers_folded



"""屏幕显示区"""
cap = cv2.VideoCapture(0)
cv2.namedWindow('Gesture Screenshot')
cv2.setMouseCallback('Gesture Screenshot', mouse_click)
while cap.isOpened():
    success, frame = cap.read()
    if not success:
        continue

    frame = cv2.flip(frame, 1)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # 闪烁效果
    if blink_counter > 0:
        overlay = frame.copy()
        cv2.rectangle(overlay, (0, 0), (screen_w, screen_h), (255, 255, 255), -1)
        cv2.addWeighted(overlay, 0.3, frame, 0.7, 0, frame)
        blink_counter -= 1

    if not waiting_for_click:
        # 正常检测模式
        results = hands.process(rgb_frame)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                if is_thumbs_up(hand_landmarks):
                    cv2.putText(frame, "THUMBS UP DETECTED!", (50, 50),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

                    # 执行截屏并进入等待状态
                    screenshot = pyautogui.screenshot()
                    screenshot = cv2.cvtColor(np.array(screenshot), cv2.COLOR_RGB2BGR)
                    cv2.imwrite(f'screenshot_{cv2.getTickCount()}.png', screenshot)
                    blink_counter = 3
                    waiting_for_click = True
    else:
        # 等待点击模式
        cv2.putText(frame, "SCREENSHOT SAVED!", (10, 40),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 2)
        draw_button(frame)
        cv2.putText(frame, "Please click 'CONTINUE' to enable next capture",
                    (10, 75), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)

    cv2.imshow('Gesture Screenshot', frame)

    if cv2.waitKey(5) & 0xFF == 27:
        break


#释放资源
cap.release()
cv2.destroyAllWindows()

第七步:操作指南

运行程序后等待一段模型预建立时间,计算机屏幕上会弹出一个新窗口,窗口中会显示用户手部连接点(关节)和连接线(骨骼)。用户可将手调整为点赞手势(即大拇指伸直,其余四根手指弯曲在一起),接着调整手与计算机中显示屏的距离(点赞手势对,距离合适就行,与大拇指的具体朝向无关),当计算机摄像头闪烁一下,显示屏中出现以下三条信息:1,"SCREENSHOT SAVED!"(截屏已保存) ;2,"Please click 'CONTINUE' to enable next capture"(请点击"CONTINUE"继续截屏) 3,"CONTINUE"按钮 时表明截屏已完成,你可以选择继续等待或点击"CONTINUE"按钮继续进行截屏操作。(请忽略红色警示)

第八步:运行效果展示

(我是闪云-微星,感谢你的关注/点赞)

作者:闪云-微星

物联沃分享整理
物联沃-IOTWORD物联网 » Python-基于mediapipe,pyautogui,cv2和numpy的电脑手势截屏工具

发表回复