Reference

AI 홈 트레이너를 만들기 위해 mediapipe 라는 구글의 AI 프레임워크를 사용했다.

본격적으로 개발하기 전 mediapipe에 대해 공부하려고 본 영상으로 OpenCV와 mediapipe의 hand pose 모델을 이용해 손바닥을 탐지하고 손가락 각도를 계산하는 영상이다.

이 채널에 들어가면 다양한 OpenCV 영상들이 많으니 OpenCV 공부, 활용하고 싶다면 추천한다.

Code

!pip install mediapipe opencv-python

import mediapipe as mp
import cv2
import numpy as np
import uuid
import os

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

실시간처리 코드

cap = cv2.VideoCapture(0)

with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5) as hands: 
    while cap.isOpened():
        ret, frame = cap.read()
        
        # BGR 2 RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Flip on horizontal
        image = cv2.flip(image, 1)
        
        # Set flag
        image.flags.writeable = False
        
        # Detections
        results = hands.process(image)
        
        # Set flag to true
        image.flags.writeable = True
        
        # RGB 2 BGR
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # Detections
        print(results)
        
        # Rendering results
        if results.multi_hand_landmarks:
            for num, hand in enumerate(results.multi_hand_landmarks):
                mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS, 
                                        mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                        mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2),
                                         )
            
        
        cv2.imshow('Hand Tracking', image)#팝업창에 웹캠을 띄우고 손바닥의 키포인트 연결(그리기)

        if cv2.waitKey(10) & 0xFF == ord('q'):#q눌러 창 닫기
            break

cap.release()
cv2.destroyAllWindows()

mp_drawing.DrawingSpec

mediapipe.python.solutions.drawing_utils.DrawingSpec

os.mkdir('Output Images')

cap = cv2.VideoCapture(0)
#mediapipe의 hands(손동작 인식모듈) option을 지정
with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5) as hands: 
    while cap.isOpened():
        ret, frame = cap.read()
        
        # BGR 2 RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Flip on horizontal
        image = cv2.flip(image, 1)
        
        # Set flag
        image.flags.writeable = False
        
        # Detections : MediaPipe의 hands 모듈을 이용해서 손동작을 인식
        results = hands.process(image)
        
        # Set flag to true
        image.flags.writeable = True
        
        # RGB 2 BGR
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # Detections
        print(results)
        
        # Rendering results
        if results.multi_hand_landmarks:
            for num, hand in enumerate(results.multi_hand_landmarks):
                mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS, 
                                        mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                        mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2),
                                         )
            
        # Save our image    
        #cv2.imwrite(os.path.join('Output Images', '{}.jpg'.format(uuid.uuid1())), image)
        cv2.imshow('Hand Tracking', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

Detect Left and Right Hands

results.multi_hand_landmarks[0].landmark[mp_hands.HandLandmark.WRIST]#(0번)손의 키포인트 중 0번이 손목관절(mp_hands.HandLandmark.WRIST : 0)인데 이렇게 특정 관절에 대한 렌드마크 추출

Out[14]:

x: 0.27364352345466614
y: 0.9096192717552185
z: 7.019327199486725e-07

In [15]:

results.multi_handedness #윈도우에 찍힌 손들이 어떤 쪽 손인지, index, label정보 획득

Out[15]:

[classification {
   index: 0
   score: 0.9747926592826843
   label: "Left"
 }]

In [16]:

mp_hands.HandLandmark.WRIST #0, WRIST 대신 특정 관절의 이름을 넣으면 그 지점의 랜드마크(키포인트) 번호 알려줌

Out[16]:

<HandLandmark.WRIST: 0>

In [18]:

results.multi_hand_landmarks[0]#미디어 파이프로 추적한 결과(0번 째 손의 landmark 별 xyz좌표)

Out[18]:

landmark {
  x: 0.27364352345466614
  y: 0.9096192717552185
  z: 7.019327199486725e-07
}
landmark {
  x: 0.38018321990966797
  y: 0.8540207743644714
  z: -0.038173794746398926
}
landmark {
  x: 0.45915377140045166
  y: 0.732987105846405
  z: -0.042039256542921066
}
landmark {
  x: 0.5025613903999329
  y: 0.6182169914245605
  z: -0.04342743754386902
}
landmark {
  x: 0.541510820388794
  y: 0.5378124117851257
  z: -0.0420575886964798
}
landmark {
  x: 0.41051363945007324
  y: 0.5414581298828125
  z: 0.006366197019815445
}
landmark {
  x: 0.4504733979701996
  y: 0.40215539932250977
  z: -0.0037480045575648546
}
landmark {
  x: 0.47080016136169434
  y: 0.3195309042930603
  z: -0.018755869939923286
}
landmark {
  x: 0.4854212999343872
  y: 0.24869224429130554
  z: -0.030455784872174263
}
landmark {
  x: 0.35118648409843445
  y: 0.5141069293022156
  z: 0.009581044316291809
}
landmark {
  x: 0.3829210698604584
  y: 0.35697340965270996
  z: 0.006407495122402906
}
landmark {
  x: 0.40112513303756714
  y: 0.2565094530582428
  z: -0.00741950748488307
}
landmark {
  x: 0.41316142678260803
  y: 0.17668086290359497
  z: -0.01903487555682659
}
landmark {
  x: 0.28920114040374756
  y: 0.5163957476615906
  z: 0.004774598404765129
}
landmark {
  x: 0.30650538206100464
  y: 0.36824721097946167
  z: -0.004213959909975529
}
landmark {
  x: 0.31972819566726685
  y: 0.27921971678733826
  z: -0.021862700581550598
}
landmark {
  x: 0.33055397868156433
  y: 0.20589753985404968
  z: -0.035758987069129944
}
landmark {
  x: 0.2221979796886444
  y: 0.5407060980796814
  z: -0.004935352597385645
}
landmark {
  x: 0.20761287212371826
  y: 0.4362854063510895
  z: -0.0205098707228899
}
landmark {
  x: 0.1967395693063736
  y: 0.3641899526119232
  z: -0.03204301372170448
}
landmark {
  x: 0.1896628886461258
  y: 0.2958180010318756
  z: -0.03953040391206741
}

In [21]:

results.multi_handedness[0].classification[0]

Out[21]:

index: 0
score: 0.9747926592826843
label: "Left"

In [22]:

results.multi_handedness[0].classification[0].index

Out[22]:

In [19]:

results.multi_handedness[0].classification[0].index == num

Out[19]:

True

In [20]:

round(results.multi_handedness[0].classification[0].score, 2)#소수점2째자리까지표현

Out[20]:

0.97

In [23]:

def get_label(index, hand, results):
    output = None# return할 변수
    for idx, classification in enumerate(results.multi_handedness):
        if classification.classification[0].index == index:
            
            # Process results
            label = classification.classification[0].label#어느 쪽 손인지
            score = classification.classification[0].score#그 확률
            text = '{} {}'.format(label, round(score, 2))#결과를 하나의 문자열로 저장
            
            # Extract Coordinates(좌표 추출) : multiply 추출한 손목의 랜드마크(x,y)랑 웹캠 윈도우크기
            coords = tuple(np.multiply(
                np.array((hand.landmark[mp_hands.HandLandmark.WRIST].x, hand.landmark[mp_hands.HandLandmark.WRIST].y)),
            [640,480]).astype(int))
            
            output = text, coords
            
    return output

In [24]:

get_label(num, hand, results)#(결과(어느 쪽 손인지, 그 확률), 그 손목의 좌표값)

Out[24]:

('Left 0.97', (175, 436))

cap = cv2.VideoCapture(0)

with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5) as hands: 
    while cap.isOpened():
        ret, frame = cap.read()
        
        # BGR 2 RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Flip on horizontal
        image = cv2.flip(image, 1)
        
        # Set flag
        image.flags.writeable = False
        
        # Detections
        results = hands.process(image)
        
        # Set flag to true
        image.flags.writeable = True
        
        # RGB 2 BGR
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # Detections
        print(results)
        
        # Rendering results
        if results.multi_hand_landmarks:
            for num, hand in enumerate(results.multi_hand_landmarks):
                mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS, 
                                        mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                        mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2),
                                         )
                
                # Render left or right detection(어느 쪽 손인지, 좌표값 )
                if get_label(num, hand, results):#실제 감지된 손이 있는지 체크
                    text, coord = get_label(num, hand, results)
                    #얻어낸 값들을 이미지에 렌더링(각 손목 좌표에 "text" 같이 보여주기)
                    cv2.putText(image, text, coord, cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
        # Save our image    
        #cv2.imwrite(os.path.join('Output Images', '{}.jpg'.format(uuid.uuid1())), image)
        cv2.imshow('Hand Tracking', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

각도 계산

!pip install matplotlib

from matplotlib import pyplot as plt

렌더링할 관절 조합(joint_list)

joint_list = [[8,7,6], [12,11,10], [16,15,14], [20,19,18]]#2 3 4 5번째 손가락 각도 계산 예정

In [30]:

joint_list[3]

Out[30]:

[20, 19, 18]

hand = results.multi_hand_landmarks[0]
joint = joint_list[0] #[8,7,6]

In [35]:

hand.landmark#손 하나의 모든 렌드마크 추출

Out[35]:

[x: 0.7496120929718018
y: 0.9975559711456299
z: 5.051584821558208e-07
, x: 0.8249234557151794
y: 0.9698334336280823
z: -0.029117818921804428
, x: 0.8834525346755981
y: 0.8944849371910095
z: -0.04313282668590546
, x: 0.922241747379303
y: 0.8271883726119995
z: -0.057437218725681305
, x: 0.9601700305938721
y: 0.7736033797264099
z: -0.07138820737600327
, x: 0.8532766103744507
y: 0.7288336157798767
z: -0.015552874654531479
, x: 0.8800777792930603
y: 0.6179497241973877
z: -0.04041074961423874
, x: 0.9005030393600464
y: 0.5505247712135315
z: -0.062258727848529816
, x: 0.9206345081329346
y: 0.4913448691368103
z: -0.07846494764089584
, x: 0.8103947639465332
y: 0.7051475048065186
z: -0.022324832156300545
, x: 0.8249076008796692
y: 0.5707515478134155
z: -0.044776104390621185
, x: 0.839984118938446
y: 0.48568087816238403
z: -0.06687182188034058
, x: 0.8547935485839844
y: 0.4157031774520874
z: -0.08333905786275864
, x: 0.767013669013977
y: 0.7085256576538086
z: -0.034401360899209976
, x: 0.7778387069702148
y: 0.5817782282829285
z: -0.05775671452283859
, x: 0.7894881367683411
y: 0.4965099096298218
z: -0.07700387388467789
, x: 0.8023354411125183
y: 0.4208310842514038
z: -0.09064148366451263
, x: 0.7226788997650146
y: 0.7328854203224182
z: -0.049741435796022415
, x: 0.720589280128479
y: 0.6354767084121704
z: -0.0740007534623146
, x: 0.7204594612121582
y: 0.5663331747055054
z: -0.08717162162065506
, x: 0.7229540348052979
y: 0.49893712997436523
z: -0.09584037959575653
]

In [37]:

hand.landmark[joint[0]]#8th landmark coord

Out[37]:

x: 0.9206345081329346
y: 0.4913448691368103
z: -0.07846494764089584

#원하는 렌드마크의 좌표(x,y)를 하나의 배열로 저장
#a = np.array([hand.landmark[joint[0]].x, hand.landmark[joint[0]].y])

실시간으로 렌더링하면서 각도를 계산할 것, 변환작업이 필요하고 표준 삼각법을 사용할 예정

라디안 계산(arctan2함수:두 점 사이의 절대 각도(radian) 계산)
라디안을 각도로 변환 : np.abs(radians*180.0/np.pi)
렌더링-화면에 해당 손가락에 각도도 함께 보여주기

In [38]:

def draw_finger_angles(image, results, joint_list):#이미지에 손가락 각도 렌더링(실시간)
    
    # Loop through hands
    for hand in results.multi_hand_landmarks:#손 인식 체크
        #Loop through joint sets 
        for joint in joint_list:#알고 싶은 손가락 각도를 이룬 렌드마크 별 좌표 값 추출(a,b,c)
            a = np.array([hand.landmark[joint[0]].x, hand.landmark[joint[0]].y]) # First coord
            b = np.array([hand.landmark[joint[1]].x, hand.landmark[joint[1]].y]) # Second coord
            c = np.array([hand.landmark[joint[2]].x, hand.landmark[joint[2]].y]) # Third coord
            
            radians = np.arctan2(c[1] - b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
            angle = np.abs(radians*180.0/np.pi)
            
            if angle > 180.0:#180도 이상의 각도를 원하지 않으니까
                angle = 360-angle
            #OpenCV로 이미지, 각도, 각도를 띄울 위치(화면 상 b의 좌표), 기타 설정 
            cv2.putText(image, str(round(angle, 2)), tuple(np.multiply(b, [640, 480]).astype(int)),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA)
    return image

In [39]:

test_image = draw_finger_angles(image, results, joint_list)

In [40]:

test_image

Out[40]:

array([[[166, 167, 171],
        [166, 167, 171],
        [166, 167, 170],
        ...,
        [197, 185, 181],
        [199, 187, 183],
        [202, 190, 186]],

       [[165, 166, 170],
        [165, 166, 170],
        [166, 166, 169],
        ...,
        [201, 189, 185],
        [203, 191, 187],
        [206, 194, 190]],

       [[166, 167, 171],
        [166, 166, 170],
        [166, 166, 169],
        ...,
        [202, 190, 186],
        [202, 190, 186],
        [203, 191, 187]],

       ...,

       [[181, 174, 177],
        [181, 174, 177],
        [182, 175, 178],
        ...,
        [178, 168, 172],
        [174, 168, 172],
        [174, 170, 173]],

       [[179, 172, 175],
        [178, 171, 174],
        [179, 172, 175],
        ...,
        [178, 166, 172],
        [175, 168, 173],
        [173, 169, 174]],

       [[180, 173, 176],
        [179, 172, 175],
        [179, 172, 175],
        ...,
        [178, 166, 172],
        [174, 167, 172],
        [173, 168, 173]]], dtype=uint8)

plt.imshow(cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB))
plt.show()

실시간으로 손가락 각도 계산

cap = cv2.VideoCapture(0)

with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5) as hands: 
    while cap.isOpened():
        ret, frame = cap.read()
        
        # BGR 2 RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Flip on horizontal
        image = cv2.flip(image, 1)
        
        # Set flag
        image.flags.writeable = False
        
        # Detections
        results = hands.process(image)
        
        # Set flag to true
        image.flags.writeable = True
        
        # RGB 2 BGR
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # Detections
        print(results)
        
        # Rendering results
        if results.multi_hand_landmarks:
            for num, hand in enumerate(results.multi_hand_landmarks):
                mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS, 
                                        mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                        mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2),
                                         )
                
                # Render left or right detection
                if get_label(num, hand, results):
                    text, coord = get_label(num, hand, results)
                    cv2.putText(image, text, coord, cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
            # Draw angles to image from joint list
            draw_finger_angles(image, results, joint_list)
            
        # Save our image    
        #cv2.imwrite(os.path.join('Output Images', '{}.jpg'.format(uuid.uuid1())), image)
        cv2.imshow('Hand Tracking', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

So What?

실시간 손바닥 추적, 손가락 관절 각도 계산(mediapipe hands)