728x90
반응형

 

Darknet 프레임 워크를 이용하여 Python3 환경에서

Video 영상을 입력으로 Object Detection을 하는 방법이다. 

 

이 때, Darknet 소스코드에 있는 darknet/python/darknet.py 파일을 이용하여 바로 video 영상을 입력 받을 수 없다. 

소스코드를 보면 알겠지만, 단지 이미지 패스를 통해 detect 함수에서 이미지를 불러오고 있다. 

 

이 때 nparray to image 과정이 필요하다.

바로 video 영상을 입력 받을 수 있도록 하는 방법을 소개한다. 

 

 

1. 소스코드 다운

 

$ git clone https://github.com/pjreddie/darknet.git

https://github.com/pjreddie/darknet.git

 

pjreddie/darknet

Convolutional Neural Networks. Contribute to pjreddie/darknet development by creating an account on GitHub.

github.com

 

 

2. 소스코드 수정

 

  • darknet/src/image.c
  • darknet/src/image.h
  • darknet/Makefile
  • darknet/python/darknet.py

 

 

 

2.1 image.c 

 

적절한 곳에  ifdef 문 붙여넣기 

#ifdef NUMPY
image ndarray_to_image(unsigned char* src, long* shape, long* strides)
{
    int h = shape[0];
    int w = shape[1];
    int c = shape[2];
    int step_h = strides[0];
    int step_w = strides[1];
    int step_c = strides[2];
    image im = make_image(w, h, c);
    int i, j, k;
    int index1, index2 = 0;

    for(i = 0; i < h; ++i){
            for(k= 0; k < c; ++k){
                for(j = 0; j < w; ++j){

                    index1 = k*w*h + i*w + j;
                    index2 = step_h*i + step_w*j + step_c*k;
                    //fprintf(stderr, "w=%d h=%d c=%d step_w=%d step_h=%d step_c=%d \n", w, h, c, step_w, step_h, step_c);
                    //fprintf(stderr, "im.data[%d]=%u data[%d]=%f \n", index1, src[index2], index2, src[index2]/255.);
                    im.data[index1] = src[index2]/255.;
                }
            }
        }

    rgbgr_image(im);

    return im;
}
#endif

 

 

 

2.2 image.h

 

이 소스 코드 또한 적절한 곳에 붙여넣기 

#ifdef NUMPY
image ndarray_to_image(unsigned char* src, long* shape, long* strides);
#endif

 

 

 

2.3 Makefile

 

아래 코드 또한 적절한 곳에 붙여넣기

ifeq ($(NUMPY), 1) 
COMMON+= -DNUMPY -I/usr/include/python2.7/ -I/usr/lib/python2.7/dist-packages/numpy/core/include/numpy/
CFLAGS+= -DNUMPY
endif

 

그리고 아래와 같이, Makefile 윗부분 수정하기

GPU=1
CUDNN=1
OPENCV=1
OPENMP=0
NUMPY=1
DEBUG=0

GPU, CUDNN, OPENCV, NUMPY(image.c 및 image.h 에 수정한 내역) 을 사용한다는 의미이다.

opencv 는 python 내장함수인 import cv2 를 이용하려면 0으로 설정해도 된다. 

 

 

 

 

 

3. 컴파일

 

~~~~/darknet/ 위치에서 make  하여 컴파일하면

~/darknet$ make

libdarknet.so 라는 파일이 생성된다. 

 

 

 

 

4. darknet.py 

from ctypes import *
import math
import random
import cv2
import numpy as np

def c_array(ctype, values):
    arr = (ctype*len(values))()
    arr[:] = values
    return arr

class BOX(Structure):
    _fields_ = [("x", c_float),
                ("y", c_float),
                ("w", c_float),
                ("h", c_float)]

class DETECTION(Structure):
    _fields_ = [("bbox", BOX),
                ("classes", c_int),
                ("prob", POINTER(c_float)),
                ("mask", POINTER(c_float)),
                ("objectness", c_float),
                ("sort_class", c_int)]


class IMAGE(Structure):
    _fields_ = [("w", c_int),
                ("h", c_int),
                ("c", c_int),
                ("data", POINTER(c_float))]

class METADATA(Structure):
    _fields_ = [("classes", c_int),
                ("names", POINTER(c_char_p))]

    

lib = CDLL("/host_temp/darknet/libdarknet.so", RTLD_GLOBAL)
lib.network_width.argtypes = [c_void_p]
lib.network_width.restype = c_int
lib.network_height.argtypes = [c_void_p]
lib.network_height.restype = c_int

predict = lib.network_predict
predict.argtypes = [c_void_p, POINTER(c_float)]
predict.restype = POINTER(c_float)

set_gpu = lib.cuda_set_device
set_gpu.argtypes = [c_int]

make_image = lib.make_image
make_image.argtypes = [c_int, c_int, c_int]
make_image.restype = IMAGE

get_network_boxes = lib.get_network_boxes
get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)]
get_network_boxes.restype = POINTER(DETECTION)

make_network_boxes = lib.make_network_boxes
make_network_boxes.argtypes = [c_void_p]
make_network_boxes.restype = POINTER(DETECTION)

free_detections = lib.free_detections
free_detections.argtypes = [POINTER(DETECTION), c_int]

free_ptrs = lib.free_ptrs
free_ptrs.argtypes = [POINTER(c_void_p), c_int]

network_predict = lib.network_predict
network_predict.argtypes = [c_void_p, POINTER(c_float)]

reset_rnn = lib.reset_rnn
reset_rnn.argtypes = [c_void_p]

load_net = lib.load_network
load_net.argtypes = [c_char_p, c_char_p, c_int]
load_net.restype = c_void_p

do_nms_obj = lib.do_nms_obj
do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]

do_nms_sort = lib.do_nms_sort
do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]

free_image = lib.free_image
free_image.argtypes = [IMAGE]

letterbox_image = lib.letterbox_image
letterbox_image.argtypes = [IMAGE, c_int, c_int]
letterbox_image.restype = IMAGE

load_meta = lib.get_metadata
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA

load_image = lib.load_image_color
load_image.argtypes = [c_char_p, c_int, c_int]
load_image.restype = IMAGE

rgbgr_image = lib.rgbgr_image
rgbgr_image.argtypes = [IMAGE]

predict_image = lib.network_predict_image
predict_image.argtypes = [c_void_p, IMAGE]
predict_image.restype = POINTER(c_float)

# add
ndarray_image = lib.ndarray_to_image
ndarray_image.argtypes = [POINTER(c_ubyte), POINTER(c_long), POINTER(c_long)]
ndarray_image.restype = IMAGE

# add
def nparray_to_image(img):
    data = img.ctypes.data_as(POINTER(c_ubyte))
    image = ndarray_image(data, img.ctypes.shape, img.ctypes.strides)

    return image


def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45):

	# add
    im = nparray_to_image(image)
    
    
    num = c_int(0)
    pnum = pointer(num)
    predict_image(net, im)
    dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum)
    num = pnum[0]
    if (nms): do_nms_obj(dets, num, meta.classes, nms);

    res = []
    for j in range(num):
        for i in range(meta.classes):
            if dets[j].prob[i] > 0:
                b = dets[j].bbox
                res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h)))

    res = sorted(res, key=lambda x: -x[1])
    free_image(im)
    free_detections(dets, num)

    return res

# add
def convert_box_value(r):

    boxes = []

    for k in range(len(r)):
        width =  r[k][2][2]
        height = r[k][2][3]
        center_x = r[k][2][0]
        center_y = r[k][2][1]
        bottomLeft_x = center_x - (width / 2)
        bottomLeft_y = center_y - (height / 2)


        x, y, w, h = bottomLeft_x, bottomLeft_y, width, height

        boxes.append((x, y, w, h))
        
    return boxes

# add
def draw(image, boxes):

    for k in range(len(boxes)):

        x, y, w, h = boxes[k]

        top = max(0, np.floor(x + 0.5).astype(int))
        left = max(0, np.floor(y + 0.5).astype(int))
        right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
        bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))

        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)

        cv2.line(image, (top + int(w / 2), left), (top + int(w / 2), left + int(h)), (0,255,0), 3)
        cv2.line(image, (top, left + int(h / 2)), (top + int(w), left + int(h / 2)), (0,255,0), 3)
        cv2.circle(image, (top + int(w / 2), left + int(h / 2)), 2, tuple((0,0,255)), 5)
       

# add
if __name__ == "__main__":

    net = load_net(b"./yolov3.cfg", b"./yolov3.weights", 0)
    meta = load_meta(b"./person.data")

    cap = cv2.VideoCapture('./video.mp4')

    while(cap.isOpened()):

        ret, frame = cap.read()

        if not ret:
            break

        #print(type(frame))


        r = detect(net, meta, frame)

        boxes = convert_box_value(r)
    
        draw(frame, boxes)

        cv2.imshow('frame', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break





 

 

darknet 에서 내뱉는 bbox 형식은 center x, center y, width, height 라서

convert_box_value 함수를 통해 이를 x, y, width, height 로 바꾸어주었다.

 

 

 

그리고 python2에서는 아래와 같이 사용하고,

    net = load_net("./yolov3.cfg", "./yolov3.weights", 0)
    meta = load_meta("./person.data")

 

pyhton3 에서는 아래와 같이 쌍따옴표 앞에 b 를 붙여 사용한다.

    net = load_net(b"./yolov3.cfg", b"./yolov3.weights", 0)
    meta = load_meta(b"./person.data")

 

load_meta, load_net 에 들어가는 cfg, weights, .data 파일은 넣고 싶은 파일을 넣길 바란다. 

 

 

 

 

 

 

참고자료

https://github.com/pjreddie/darknet/issues/289

불러오는 중입니다...

 

728x90
반응형