Reference
AI 홈 트레이너를 만들기 위해 mediapipe 라는 구글의 AI 프레임워크를 사용했다.
본격적으로 개발하기 전 mediapipe에 대해 공부하려고 본 영상으로 OpenCV와 mediapipe의 hand pose 모델을 이용해 손바닥을 탐지하고 손가락 각도를 계산하는 영상이다.
이 채널에 들어가면 다양한 OpenCV 영상들이 많으니 OpenCV 공부, 활용하고 싶다면 추천한다.
Code
!pip install mediapipe opencv-python
import mediapipe as mp
import cv2
import numpy as np
import uuid
import os
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
실시간처리 코드
cap = cv2.VideoCapture(0)
with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5) as hands:
while cap.isOpened():
ret, frame = cap.read()
# BGR 2 RGB
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Flip on horizontal
image = cv2.flip(image, 1)
# Set flag
image.flags.writeable = False
# Detections
results = hands.process(image)
# Set flag to true
image.flags.writeable = True
# RGB 2 BGR
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
# Detections
print(results)
# Rendering results
if results.multi_hand_landmarks:
for num, hand in enumerate(results.multi_hand_landmarks):
mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2),
)
cv2.imshow('Hand Tracking', image)#팝업창에 웹캠을 띄우고 손바닥의 키포인트 연결(그리기)
if cv2.waitKey(10) & 0xFF == ord('q'):#q눌러 창 닫기
break
cap.release()
cv2.destroyAllWindows()
mp_drawing.DrawingSpec
mediapipe.python.solutions.drawing_utils.DrawingSpec
os.mkdir('Output Images')
cap = cv2.VideoCapture(0)
#mediapipe의 hands(손동작 인식모듈) option을 지정
with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5) as hands:
while cap.isOpened():
ret, frame = cap.read()
# BGR 2 RGB
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Flip on horizontal
image = cv2.flip(image, 1)
# Set flag
image.flags.writeable = False
# Detections : MediaPipe의 hands 모듈을 이용해서 손동작을 인식
results = hands.process(image)
# Set flag to true
image.flags.writeable = True
# RGB 2 BGR
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
# Detections
print(results)
# Rendering results
if results.multi_hand_landmarks:
for num, hand in enumerate(results.multi_hand_landmarks):
mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2),
)
# Save our image
#cv2.imwrite(os.path.join('Output Images', '{}.jpg'.format(uuid.uuid1())), image)
cv2.imshow('Hand Tracking', image)
if cv2.waitKey(10) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
Detect Left and Right Hands
results.multi_hand_landmarks[0].landmark[mp_hands.HandLandmark.WRIST]#(0번)손의 키포인트 중 0번이 손목관절(mp_hands.HandLandmark.WRIST : 0)인데 이렇게 특정 관절에 대한 렌드마크 추출
Out[14]:
x: 0.27364352345466614
y: 0.9096192717552185
z: 7.019327199486725e-07
In [15]:
results.multi_handedness #윈도우에 찍힌 손들이 어떤 쪽 손인지, index, label정보 획득
Out[15]:
[classification {
index: 0
score: 0.9747926592826843
label: "Left"
}]
In [16]:
mp_hands.HandLandmark.WRIST #0, WRIST 대신 특정 관절의 이름을 넣으면 그 지점의 랜드마크(키포인트) 번호 알려줌
Out[16]:
<HandLandmark.WRIST: 0>
In [18]:
results.multi_hand_landmarks[0]#미디어 파이프로 추적한 결과(0번 째 손의 landmark 별 xyz좌표)
Out[18]:
landmark {
x: 0.27364352345466614
y: 0.9096192717552185
z: 7.019327199486725e-07
}
landmark {
x: 0.38018321990966797
y: 0.8540207743644714
z: -0.038173794746398926
}
landmark {
x: 0.45915377140045166
y: 0.732987105846405
z: -0.042039256542921066
}
landmark {
x: 0.5025613903999329
y: 0.6182169914245605
z: -0.04342743754386902
}
landmark {
x: 0.541510820388794
y: 0.5378124117851257
z: -0.0420575886964798
}
landmark {
x: 0.41051363945007324
y: 0.5414581298828125
z: 0.006366197019815445
}
landmark {
x: 0.4504733979701996
y: 0.40215539932250977
z: -0.0037480045575648546
}
landmark {
x: 0.47080016136169434
y: 0.3195309042930603
z: -0.018755869939923286
}
landmark {
x: 0.4854212999343872
y: 0.24869224429130554
z: -0.030455784872174263
}
landmark {
x: 0.35118648409843445
y: 0.5141069293022156
z: 0.009581044316291809
}
landmark {
x: 0.3829210698604584
y: 0.35697340965270996
z: 0.006407495122402906
}
landmark {
x: 0.40112513303756714
y: 0.2565094530582428
z: -0.00741950748488307
}
landmark {
x: 0.41316142678260803
y: 0.17668086290359497
z: -0.01903487555682659
}
landmark {
x: 0.28920114040374756
y: 0.5163957476615906
z: 0.004774598404765129
}
landmark {
x: 0.30650538206100464
y: 0.36824721097946167
z: -0.004213959909975529
}
landmark {
x: 0.31972819566726685
y: 0.27921971678733826
z: -0.021862700581550598
}
landmark {
x: 0.33055397868156433
y: 0.20589753985404968
z: -0.035758987069129944
}
landmark {
x: 0.2221979796886444
y: 0.5407060980796814
z: -0.004935352597385645
}
landmark {
x: 0.20761287212371826
y: 0.4362854063510895
z: -0.0205098707228899
}
landmark {
x: 0.1967395693063736
y: 0.3641899526119232
z: -0.03204301372170448
}
landmark {
x: 0.1896628886461258
y: 0.2958180010318756
z: -0.03953040391206741
}
In [21]:
results.multi_handedness[0].classification[0]
Out[21]:
index: 0
score: 0.9747926592826843
label: "Left"
In [22]:
results.multi_handedness[0].classification[0].index
Out[22]:
0
In [19]:
results.multi_handedness[0].classification[0].index == num
Out[19]:
True
In [20]:
round(results.multi_handedness[0].classification[0].score, 2)#소수점2째자리까지표현
Out[20]:
0.97
In [23]:
def get_label(index, hand, results):
output = None# return할 변수
for idx, classification in enumerate(results.multi_handedness):
if classification.classification[0].index == index:
# Process results
label = classification.classification[0].label#어느 쪽 손인지
score = classification.classification[0].score#그 확률
text = '{} {}'.format(label, round(score, 2))#결과를 하나의 문자열로 저장
# Extract Coordinates(좌표 추출) : multiply 추출한 손목의 랜드마크(x,y)랑 웹캠 윈도우크기
coords = tuple(np.multiply(
np.array((hand.landmark[mp_hands.HandLandmark.WRIST].x, hand.landmark[mp_hands.HandLandmark.WRIST].y)),
[640,480]).astype(int))
output = text, coords
return output
In [24]:
get_label(num, hand, results)#(결과(어느 쪽 손인지, 그 확률), 그 손목의 좌표값)
Out[24]:
('Left 0.97', (175, 436))
cap = cv2.VideoCapture(0)
with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5) as hands:
while cap.isOpened():
ret, frame = cap.read()
# BGR 2 RGB
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Flip on horizontal
image = cv2.flip(image, 1)
# Set flag
image.flags.writeable = False
# Detections
results = hands.process(image)
# Set flag to true
image.flags.writeable = True
# RGB 2 BGR
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
# Detections
print(results)
# Rendering results
if results.multi_hand_landmarks:
for num, hand in enumerate(results.multi_hand_landmarks):
mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2),
)
# Render left or right detection(어느 쪽 손인지, 좌표값 )
if get_label(num, hand, results):#실제 감지된 손이 있는지 체크
text, coord = get_label(num, hand, results)
#얻어낸 값들을 이미지에 렌더링(각 손목 좌표에 "text" 같이 보여주기)
cv2.putText(image, text, coord, cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
# Save our image
#cv2.imwrite(os.path.join('Output Images', '{}.jpg'.format(uuid.uuid1())), image)
cv2.imshow('Hand Tracking', image)
if cv2.waitKey(10) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
각도 계산
!pip install matplotlib
from matplotlib import pyplot as plt
렌더링할 관절 조합(joint_list)

joint_list = [[8,7,6], [12,11,10], [16,15,14], [20,19,18]]#2 3 4 5번째 손가락 각도 계산 예정
In [30]:
joint_list[3]
Out[30]:
[20, 19, 18]
hand = results.multi_hand_landmarks[0]
joint = joint_list[0] #[8,7,6]
In [35]:
hand.landmark#손 하나의 모든 렌드마크 추출
Out[35]:
[x: 0.7496120929718018
y: 0.9975559711456299
z: 5.051584821558208e-07
, x: 0.8249234557151794
y: 0.9698334336280823
z: -0.029117818921804428
, x: 0.8834525346755981
y: 0.8944849371910095
z: -0.04313282668590546
, x: 0.922241747379303
y: 0.8271883726119995
z: -0.057437218725681305
, x: 0.9601700305938721
y: 0.7736033797264099
z: -0.07138820737600327
, x: 0.8532766103744507
y: 0.7288336157798767
z: -0.015552874654531479
, x: 0.8800777792930603
y: 0.6179497241973877
z: -0.04041074961423874
, x: 0.9005030393600464
y: 0.5505247712135315
z: -0.062258727848529816
, x: 0.9206345081329346
y: 0.4913448691368103
z: -0.07846494764089584
, x: 0.8103947639465332
y: 0.7051475048065186
z: -0.022324832156300545
, x: 0.8249076008796692
y: 0.5707515478134155
z: -0.044776104390621185
, x: 0.839984118938446
y: 0.48568087816238403
z: -0.06687182188034058
, x: 0.8547935485839844
y: 0.4157031774520874
z: -0.08333905786275864
, x: 0.767013669013977
y: 0.7085256576538086
z: -0.034401360899209976
, x: 0.7778387069702148
y: 0.5817782282829285
z: -0.05775671452283859
, x: 0.7894881367683411
y: 0.4965099096298218
z: -0.07700387388467789
, x: 0.8023354411125183
y: 0.4208310842514038
z: -0.09064148366451263
, x: 0.7226788997650146
y: 0.7328854203224182
z: -0.049741435796022415
, x: 0.720589280128479
y: 0.6354767084121704
z: -0.0740007534623146
, x: 0.7204594612121582
y: 0.5663331747055054
z: -0.08717162162065506
, x: 0.7229540348052979
y: 0.49893712997436523
z: -0.09584037959575653
]
In [37]:
hand.landmark[joint[0]]#8th landmark coord
Out[37]:
x: 0.9206345081329346
y: 0.4913448691368103
z: -0.07846494764089584
#원하는 렌드마크의 좌표(x,y)를 하나의 배열로 저장
#a = np.array([hand.landmark[joint[0]].x, hand.landmark[joint[0]].y])
실시간으로 렌더링하면서 각도를 계산할 것, 변환작업이 필요하고 표준 삼각법을 사용할 예정
- 라디안 계산(arctan2함수:두 점 사이의 절대 각도(radian) 계산)
- 라디안을 각도로 변환 : np.abs(radians*180.0/np.pi)
- 렌더링-화면에 해당 손가락에 각도도 함께 보여주기
In [38]:
def draw_finger_angles(image, results, joint_list):#이미지에 손가락 각도 렌더링(실시간)
# Loop through hands
for hand in results.multi_hand_landmarks:#손 인식 체크
#Loop through joint sets
for joint in joint_list:#알고 싶은 손가락 각도를 이룬 렌드마크 별 좌표 값 추출(a,b,c)
a = np.array([hand.landmark[joint[0]].x, hand.landmark[joint[0]].y]) # First coord
b = np.array([hand.landmark[joint[1]].x, hand.landmark[joint[1]].y]) # Second coord
c = np.array([hand.landmark[joint[2]].x, hand.landmark[joint[2]].y]) # Third coord
radians = np.arctan2(c[1] - b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
angle = np.abs(radians*180.0/np.pi)
if angle > 180.0:#180도 이상의 각도를 원하지 않으니까
angle = 360-angle
#OpenCV로 이미지, 각도, 각도를 띄울 위치(화면 상 b의 좌표), 기타 설정
cv2.putText(image, str(round(angle, 2)), tuple(np.multiply(b, [640, 480]).astype(int)),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA)
return image
In [39]:
test_image = draw_finger_angles(image, results, joint_list)
In [40]:
test_image
Out[40]:
array([[[166, 167, 171],
[166, 167, 171],
[166, 167, 170],
...,
[197, 185, 181],
[199, 187, 183],
[202, 190, 186]],
[[165, 166, 170],
[165, 166, 170],
[166, 166, 169],
...,
[201, 189, 185],
[203, 191, 187],
[206, 194, 190]],
[[166, 167, 171],
[166, 166, 170],
[166, 166, 169],
...,
[202, 190, 186],
[202, 190, 186],
[203, 191, 187]],
...,
[[181, 174, 177],
[181, 174, 177],
[182, 175, 178],
...,
[178, 168, 172],
[174, 168, 172],
[174, 170, 173]],
[[179, 172, 175],
[178, 171, 174],
[179, 172, 175],
...,
[178, 166, 172],
[175, 168, 173],
[173, 169, 174]],
[[180, 173, 176],
[179, 172, 175],
[179, 172, 175],
...,
[178, 166, 172],
[174, 167, 172],
[173, 168, 173]]], dtype=uint8)
plt.imshow(cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB))
plt.show()
실시간으로 손가락 각도 계산
cap = cv2.VideoCapture(0)
with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5) as hands:
while cap.isOpened():
ret, frame = cap.read()
# BGR 2 RGB
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Flip on horizontal
image = cv2.flip(image, 1)
# Set flag
image.flags.writeable = False
# Detections
results = hands.process(image)
# Set flag to true
image.flags.writeable = True
# RGB 2 BGR
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
# Detections
print(results)
# Rendering results
if results.multi_hand_landmarks:
for num, hand in enumerate(results.multi_hand_landmarks):
mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2),
)
# Render left or right detection
if get_label(num, hand, results):
text, coord = get_label(num, hand, results)
cv2.putText(image, text, coord, cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
# Draw angles to image from joint list
draw_finger_angles(image, results, joint_list)
# Save our image
#cv2.imwrite(os.path.join('Output Images', '{}.jpg'.format(uuid.uuid1())), image)
cv2.imshow('Hand Tracking', image)
if cv2.waitKey(10) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()