Python-基于mediapipe,pyautogui,cv2和numpy的电脑手势截屏工具
前言:在我们的日常生活中,手机已经成为我们每天工作,学习,生活的一个不可或缺的部分。众所周知:为了我们的使用方便,手机里面的很多功能非常人性化,既便捷又高效,其中就有手机的截屏方式,它们花样繁多,如三指截屏,手势截屏等。那么怎么在电脑里面也实现这个功能呢?(虽然我们知到电脑也有快捷的截屏方式-Ctrl+Shift+S。但是很明显,这依然不够快捷,因为这至少需要用户的两次手动操作)。那么废话不多说,我们直接开始今天的Python学习之路-利用PyCharm手搓一个基于mediapipe,pyautogui,cv2和numpy的电脑手势截屏工具。
编程思路:本次编程我们需要pyautogui来获取屏幕的尺寸,这对于确定按钮的位置和大小非常有用;接着利用mediapipe初始化一个手部模型,用来检测视频流中的手部关键点;numpy提供了计算机在采集了用户手部姿态所得到的数据的处理,并与mediapipe所建立的数学模型进行比较等。cv2是本次编程的重头戏,它为调用计算机摄像头进行信息采集,以及用户手部模型的实时可视化展现等提供了可能。
第一步:导入库
本次编程所需调用的库:
1,标准库:cv2,numpy。
2,第三方库:mediapipe,pyautogui。
#导入必要库
import cv2
import mediapipe as mp
import pyautogui
import numpy as np
第二步:程序初始化
我们需要初始化判断模型并给出屏幕尺存等相关变量参数。
"""程序初始化"""
#初始化MediaPipe手部模型
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.7)
mp_draw = mp.solutions.drawing_utils
#绘制屏幕尺寸并控制初始状态变量
screen_w, screen_h = pyautogui.size()
waiting_for_click = False
blink_counter = 0
btn_rect = (500, 400, 200, 60)
第三步:构建内部逻辑
接下来我们需要编写程序内部判断模型等的内在逻辑,并将鼠标,按键事件与计算机采集信息所得的对比结果联系起来。
"""内部逻辑函数实现区"""
def mouse_click(event, x, y, flags, param):
#鼠标点击回调
global waiting_for_click
if event == cv2.EVENT_LBUTTONDOWN:
# 检测是否点击在按钮区域
if (btn_rect[0] < x < btn_rect[0] + btn_rect[2] and
btn_rect[1] < y < btn_rect[1] + btn_rect[3]):
waiting_for_click = False
def draw_button(frame):
#绘制继续按钮
x, y, w, h = btn_rect
# 按钮背景
cv2.rectangle(frame, (x, y), (x + w, y + h), (50, 200, 50), -1)
# 按钮边框
cv2.rectangle(frame, (x, y), (x + w, y + h), (30, 180, 30), 2)
# 按钮文字
cv2.putText(frame, "CONTINUE", (x + 10, y + 40),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
def is_thumbs_up(hand_landmarks):
#判断点赞手势是否成立
tips = [8, 12, 16, 20]
thumb_tip = hand_landmarks.landmark[4]
thumb_pip = hand_landmarks.landmark[3]
thumb_vertical = abs(thumb_tip.y - thumb_pip.y) < 0.05
fingers_folded = True
wrist = hand_landmarks.landmark[0]
for tip_index in tips:
tip = hand_landmarks.landmark[tip_index]
if abs(tip.y - wrist.y) > 0.1:
fingers_folded = False
return thumb_vertical and fingers_folded
第四步:搭建屏幕及内容显示设置
这里我们需要对截屏的各个步骤进行拆分,并对每个步骤的执行结果进行相应的处理(如屏闪,实时显示,操作完成提醒等)。
"""屏幕显示区"""
cap = cv2.VideoCapture(0)
cv2.namedWindow('Gesture Screenshot')
cv2.setMouseCallback('Gesture Screenshot', mouse_click)
while cap.isOpened():
success, frame = cap.read()
if not success:
continue
frame = cv2.flip(frame, 1)
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# 闪烁效果
if blink_counter > 0:
overlay = frame.copy()
cv2.rectangle(overlay, (0, 0), (screen_w, screen_h), (255, 255, 255), -1)
cv2.addWeighted(overlay, 0.3, frame, 0.7, 0, frame)
blink_counter -= 1
if not waiting_for_click:
# 正常检测模式
results = hands.process(rgb_frame)
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
if is_thumbs_up(hand_landmarks):
cv2.putText(frame, "THUMBS UP DETECTED!", (50, 50),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# 执行截屏并进入等待状态
screenshot = pyautogui.screenshot()
screenshot = cv2.cvtColor(np.array(screenshot), cv2.COLOR_RGB2BGR)
cv2.imwrite(f'screenshot_{cv2.getTickCount()}.png', screenshot)
blink_counter = 3
waiting_for_click = True
else:
# 等待点击模式
cv2.putText(frame, "SCREENSHOT SAVED!", (10, 40),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 2)
draw_button(frame)
cv2.putText(frame, "Please click 'CONTINUE' to enable next capture",
(10, 75), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)
cv2.imshow('Gesture Screenshot', frame)
if cv2.waitKey(5) & 0xFF == 27:
break
第五步:释放计算机内部资源
#释放资源
cap.release()
cv2.destroyAllWindows()
第六步:完整代码
#导入必要库
import cv2
import mediapipe as mp
import pyautogui
import numpy as np
"""程序初始化"""
#初始化MediaPipe手部模型
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.7)
mp_draw = mp.solutions.drawing_utils
#绘制屏幕尺寸并控制初始状态变量
screen_w, screen_h = pyautogui.size()
waiting_for_click = False
blink_counter = 0
btn_rect = (500, 400, 200, 60)
"""内部逻辑函数实现区"""
def mouse_click(event, x, y, flags, param):
#鼠标点击回调
global waiting_for_click
if event == cv2.EVENT_LBUTTONDOWN:
# 检测是否点击在按钮区域
if (btn_rect[0] < x < btn_rect[0] + btn_rect[2] and
btn_rect[1] < y < btn_rect[1] + btn_rect[3]):
waiting_for_click = False
def draw_button(frame):
#绘制继续按钮
x, y, w, h = btn_rect
# 按钮背景
cv2.rectangle(frame, (x, y), (x + w, y + h), (50, 200, 50), -1)
# 按钮边框
cv2.rectangle(frame, (x, y), (x + w, y + h), (30, 180, 30), 2)
# 按钮文字
cv2.putText(frame, "CONTINUE", (x + 10, y + 40),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
def is_thumbs_up(hand_landmarks):
#判断点赞手势是否成立
tips = [8, 12, 16, 20]
thumb_tip = hand_landmarks.landmark[4]
thumb_pip = hand_landmarks.landmark[3]
thumb_vertical = abs(thumb_tip.y - thumb_pip.y) < 0.05
fingers_folded = True
wrist = hand_landmarks.landmark[0]
for tip_index in tips:
tip = hand_landmarks.landmark[tip_index]
if abs(tip.y - wrist.y) > 0.1:
fingers_folded = False
return thumb_vertical and fingers_folded
"""屏幕显示区"""
cap = cv2.VideoCapture(0)
cv2.namedWindow('Gesture Screenshot')
cv2.setMouseCallback('Gesture Screenshot', mouse_click)
while cap.isOpened():
success, frame = cap.read()
if not success:
continue
frame = cv2.flip(frame, 1)
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# 闪烁效果
if blink_counter > 0:
overlay = frame.copy()
cv2.rectangle(overlay, (0, 0), (screen_w, screen_h), (255, 255, 255), -1)
cv2.addWeighted(overlay, 0.3, frame, 0.7, 0, frame)
blink_counter -= 1
if not waiting_for_click:
# 正常检测模式
results = hands.process(rgb_frame)
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
if is_thumbs_up(hand_landmarks):
cv2.putText(frame, "THUMBS UP DETECTED!", (50, 50),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# 执行截屏并进入等待状态
screenshot = pyautogui.screenshot()
screenshot = cv2.cvtColor(np.array(screenshot), cv2.COLOR_RGB2BGR)
cv2.imwrite(f'screenshot_{cv2.getTickCount()}.png', screenshot)
blink_counter = 3
waiting_for_click = True
else:
# 等待点击模式
cv2.putText(frame, "SCREENSHOT SAVED!", (10, 40),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 2)
draw_button(frame)
cv2.putText(frame, "Please click 'CONTINUE' to enable next capture",
(10, 75), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)
cv2.imshow('Gesture Screenshot', frame)
if cv2.waitKey(5) & 0xFF == 27:
break
#释放资源
cap.release()
cv2.destroyAllWindows()
第七步:操作指南
运行程序后等待一段模型预建立时间,计算机屏幕上会弹出一个新窗口,窗口中会显示用户手部连接点(关节)和连接线(骨骼)。用户可将手调整为点赞手势(即大拇指伸直,其余四根手指弯曲在一起),接着调整手与计算机中显示屏的距离(点赞手势对,距离合适就行,与大拇指的具体朝向无关),当计算机摄像头闪烁一下,显示屏中出现以下三条信息:1,"SCREENSHOT SAVED!"(截屏已保存) ;2,"Please click 'CONTINUE' to enable next capture"(请点击"CONTINUE"继续截屏) 3,"CONTINUE"按钮 时表明截屏已完成,你可以选择继续等待或点击"CONTINUE"按钮继续进行截屏操作。(请忽略红色警示)
第八步:运行效果展示
(我是闪云-微星,感谢你的关注/点赞)
作者:闪云-微星