前言
回顾上文,在这里的话,我完成了基本的GUI界面的设计,和窗口图像的工具代码。当然这个绘制窗口的代码有点问题,那就是,绘制的窗口有闪烁。我看了很多解决方案,都试了,包括双缓存方案,都存在一定的问题,导致绘制的窗口存在闪烁问题。当然也有可能是游戏问题导致的,具体是啥,我实在是没有时间去搞了。
Yolov5 Detect
ok,现在我们可以开始准备整合我们的算法了。当然现在值得一提的是,现在我还没有游戏人物的数据集,因此,我这里做的还是coco,后面再换成游戏的。只需要重新训练即可,然后写一下过滤的标签,就可以了。
class YoloDectect():
def __init__(self,
model_pb_path=r'F:projectsPythonProjectYOLOv5-Lite-masterweightsv5lite-s.onnx',
label_path='coco.names',
confThreshold=0.6,
nmsThreshold=0.3,
objThreshold=0.6):
so = ort.SessionOptions()
so.log_severity_level = 3
self.net = ort.InferenceSession(model_pb_path, so)
self.classes = list(map(lambda x: x.strip(), open(label_path, 'r').readlines()))
self.num_classes = len(self.classes)
"""
数据集聚类得到的anchors的参数
"""
anchors = [[10, 13, 16, 30, 33, 23],
[30, 61, 62, 45, 59, 119],
[116, 90, 156, 198, 373, 326]
]
self.nl = len(anchors)
self.na = len(anchors[0]) // 2
self.no = self.num_classes + 5
self.grid = [np.zeros(1)] * self.nl
self.stride = np.array([8., 16., 32.])
self.anchor_grid = np.asarray(anchors, dtype=np.float32).reshape(self.nl, -1, 2)
self.confThreshold = confThreshold
self.nmsThreshold = nmsThreshold
self.objThreshold = objThreshold
self.input_shape = (self.net.get_inputs()[0].shape[2], self.net.get_inputs()[0].shape[3])
def resize_image(self, srcimg, keep_ratio=True):
"""
修改图像尺寸为目标网络尺寸
:param srcimg:
:param keep_ratio:
:return:
"""
top, left, newh, neww = 0, 0, self.input_shape[0], self.input_shape[1]
if keep_ratio and srcimg.shape[0] != srcimg.shape[1]:
hw_scale = srcimg.shape[0] / srcimg.shape[1]
if hw_scale > 1:
newh, neww = self.input_shape[0], int(self.input_shape[1] / hw_scale)
img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
left = int((self.input_shape[1] - neww) * 0.5)
img = cv2.copyMakeBorder(img, 0, 0, left, self.input_shape[1] - neww - left, cv2.BORDER_CONSTANT,
value=0) # add border
else:
newh, neww = int(self.input_shape[0] * hw_scale), self.input_shape[1]
img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
top = int((self.input_shape[0] - newh) * 0.5)
img = cv2.copyMakeBorder(img, top, self.input_shape[0] - newh - top, 0, 0, cv2.BORDER_CONSTANT, value=0)
else:
img = cv2.resize(srcimg, self.input_shape, interpolation=cv2.INTER_AREA)
return img, newh, neww, top, left
def _make_grid(self, nx=20, ny=20):
xv, yv = np.meshgrid(np.arange(ny), np.arange(nx))
return np.stack((xv, yv), 2).reshape((-1, 2)).astype(np.float32)
def postprocess(self, frame, outs, pad_hw):
"""
完成目标识别和NMS,并且得到目标的左上角坐标和宽高,
:param frame:
:param outs:
:param pad_hw:
:return: results = [{'box':box,'cls':cls,'conf':conf,'id':id}]
"""
newh, neww, padh, padw = pad_hw
frameHeight = frame.shape[0]
frameWidth = frame.shape[1]
ratioh, ratiow = frameHeight / newh, frameWidth / neww
classIds = []
confidences = []
box_index = []
boxes = []
outs = outs[outs[:, 4] > self.objThreshold]
for detection in outs:
scores = detection[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > self.confThreshold: # and detection[4] > self.objThreshold:
center_x = int((detection[0] - padw) * ratiow)
center_y = int((detection[1] - padh) * ratioh)
width = int(detection[2] * ratiow)
height = int(detection[3] * ratioh)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
classIds.append(classId)
confidences.append(float(confidence))
boxes.append([left, top, width, height])
indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confThreshold, self.nmsThreshold)
for ix in indices:
box_index.append(ix)
result = []
for i in box_index:
box = boxes[i]
cls = self.classes[classIds[i]]
conf = confidences[i]
item = {'box':box,'cls':cls,'conf':conf,'id':classIds[i]}
result.append(item)
return result
def detect(self, srcimg):
img, newh, neww, top, left = self.resize_image(srcimg)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.astype(np.float32) / 255.0
blob = np.expand_dims(np.transpose(img, (2, 0, 1)), axis=0)
outs = self.net.run(None, {self.net.get_inputs()[0].name: blob})[0].squeeze(axis=0)
row_ind = 0
for i in range(self.nl):
h, w = int(self.input_shape[0] / self.stride[i]), int(self.input_shape[1] / self.stride[i])
length = int(self.na * h * w)
if self.grid[i].shape[2:4] != (h, w):
self.grid[i] = self._make_grid(w, h)
outs[row_ind:row_ind + length, 0:2] = (outs[row_ind:row_ind + length, 0:2] * 2. - 0.5 + np.tile(
self.grid[i], (self.na, 1))) * int(self.stride[i])
outs[row_ind:row_ind + length, 2:4] = (outs[row_ind:row_ind + length, 2:4] * 2) ** 2 * np.repeat(
self.anchor_grid[i], h * w, axis=0)
row_ind += length
results = self.postprocess(srcimg, outs, (newh, neww, top, left))
return results
算法的整合部分其实,就是先前我玩的 YOLOV5-Lite的部署代码。只是我把输出数据改了。
鼠标控制
之后还是我们的鼠标控制功能。昨天我们虽然做了,但是那是很简陋的,没有把实际的功能进行整合。在这里我们主要有两个块。
监听
这里我要单独说是因为,这里遇到了bug,我这里使用的是pynput 进行监听,里面有一个Listener,但是当我整合GUI的时候,发生线程的问题。先前我的解决方案是,打算直接用win32造轮子的,但是代码都写完了,很难改了。最后没办法,找了个折中的方案。
#停止枪口定位的,因为很多功能和鼠标左键绑定,需要进行操作的时候,就需要处理好这个
def __func_stop_gan(self, key):
# 检测是否按下 F12 键
if key == keyboard.Key.f12 and self.listener_btn_mu:
self.listener_left_mu=False
def __start_listener(self):
while (self.star_listener_flag):
if(not self.listener):
self.listener = Listener(on_click=self.__func_stay_gan)
else:
self.listener.start()
# time.sleep(0.1)
def __start_listener_right(self):
while (self.star_listener_right_flag):
if(not self.listener_right):
self.listener_right = Listener(on_click=self.__func_stay_track)
else:
self.listener_right.start()
# time.sleep(0.1)
def __start_listener_btn(self):
while (self.star_listener_btn_flag):
if(not self.listener_btn):
self.listener_btn = keyboard.Listener(on_press=self.__func_stop_gan)
else:
self.listener_btn.start()
# time.sleep(0.1)
def func_run_mouse(self):
"""
启动鼠标部分的功能
:return:
"""
if(self.listener_started):
return
self.listener_btn_mu = True
self.listener_left_mu = True
self.listener_right_mu = True
self.star_listener_flag = True
self.star_listener_right_flag = True
self.star_listener_btn_flag = True
#这两个是按下鼠标之后才可以启动的
# self.tracking_open = True
# self.stop_stay_location_flag = False
self.listener_started = True
self.draw_helper_pool.execute(self.__start_listener)
self.draw_helper_pool.execute(self.__start_listener_right)
self.draw_helper_pool.execute(self.__start_listener_btn)
再开启一个线程,然后写进死循环,通过结束变量控制结束。然后一致轮询start()。来手动确保在进行监听。
目标跟踪
之后是目标跟踪。
这里的话,我们项目启动的时候,将启动大概两个线程。一个是识别的,然后是跟踪的。识别的话不断更新最新的目标位置,然后我跟踪就不断到那里去。由于我的目的是保证尽可能到最新的位置上去,因此,完全不用担心读写锁的问题,没有必要当我移动到那个位置之后,在识别,然后移动。直接读就好了,一来是实现简单,而来是这样看起来有一点连贯性,同时算法的算力消耗底。而且本身在这里我也是设置了两个帧数,一个是算法识别的帧数,一个是窗口绘制的帧数。
def func_track_ing(self):
"""
计算得到离得最近的目标,然后进行跟踪,这个也是异步的
设置self.tracking_open=False结束线程,结束跟踪
这里的话,后面还可以过滤一下,辅助瞄准头部,还是身体,目前这边是跟踪
离当前鼠标位置最近的。
"""
def tracking():
while(self.tracking_open):
# 这个是100%来得到的,items里面的是125%得到的,因为它是直接截屏得到的
x, y = pydirectinput.position()
target_x, target_y = x, y
dist = float('inf')
flag_t = False
for it in self.items:
if(ScreenUtils.fitiler(it)):
centerx, centery = (it['box'][0] + it['box'][0] + it['box'][2]) // 2, (
it['box'][1] + it['box'][1] + it['box'][3]) // 2
c_dist = ((centerx - x) ** 2 + (centery - y) ** 2) ** 0.5
if (c_dist < dist):
dist = c_dist
target_x, target_y = centerx, centery
flag_t = True
if(flag_t):
self.move_mouse(target_x,target_y,0.5,1)
self.draw_helper_pool.execute(tracking)
控制器
之后就是我们的控制器了,这个控制器主要是做整合。
class MyController:
"""
负责获全局控制。
"""
def __init__(self):
self.pools = ThreadPoolManager(max_workers=3)
self.net = YoloDectect()
self.drawer = RectangleDrawer()
self.go = True
self.move_mouse = MoveMouse()
self.drawer_flag = True
self.alg_open = True
def check_open_gan_gui(self):
"""
为GUI提供开启枪口定位的功能
:return:
"""
self.move_mouse.listener_left_mu = True
def check_stop_gan_gui(self):
self.move_mouse.listener_left_mu = False
def check_open_draw_gui(self):
self.drawer_flag = True
def check_stop_draw_gui(self):
self.drawer_flag = False
def check_open_alg_gui(self):
self.alg_open = True
def check_stop_alg_gui(self):
self.alg_open = False
def check_open_track_gui(self):
self.move_mouse.listener_right_mu = True
def check_stop_track_gui(self):
self.move_mouse.listener_right_mu = False
def start(self):
#全部功能
#1. 开启鼠标控制的功能
self.move_mouse.func_run_mouse()
#2. 开启算法和绘制图像的功能
self.drawer_flag = True
self.alg_open = True
self.go = True
self.move_mouse.listener_started = False
# listener_thread = threading.Thread(target=self.runing,args=(10,60,))
# listener_thread.start()
self.pools.execute(self.runing,10,70)
def runing(self,fps,draw_fps):
"""
:param fps: 扫描帧率,这个不需要太高,差不多就可以
:return:
"""
#这里的fps是指,绘制fps,这个可以高一点
w, h = ScreenUtils.get_real_resolution()
self.monitor = {"top": 0, "left": 0, "width": w, "height": h}
self.mss_obj = mss.mss()
self.drawer.drawRectanglesBySelf(fps=draw_fps)
need_p_time = 1/fps
while self.go:
start_time = time.time()
#开启算法
if(self.alg_open):
# 获取屏幕截图
screenshot = self.mss_obj.grab(self.monitor)
# 将截图转换为OpenCV格式
screenshot_cv = cv2.cvtColor(np.array(screenshot), cv2.COLOR_RGB2BGR)
results = self.net.detect(screenshot_cv)
self.drawer.setItems(results)
self.move_mouse.setItems(results)
else:
self.drawer.setItems([])
self.move_mouse.setItems([])
#开启绘图
if(self.drawer_flag):
self.drawer.draw_recgs = True
else:
self.drawer.draw_recgs = False
# self.drawer.drawRectangles()
end_time = time.time()
real_time = end_time-start_time
dt = need_p_time - real_time
if(dt>0):
time.sleep(dt)
def start_pause(self):
#挂起,修改一下标志位就好了,先不要停止
self.move_mouse.listener_right_mu = False
self.move_mouse.listener_left_mu = False
self.move_mouse.listener_btn_mu = False
self.drawer_flag = False
self.alg_open = False
def stop_pause(self):
#恢复
self.move_mouse.listener_right_mu = True
self.move_mouse.listener_left_mu = True
self.move_mouse.listener_btn_mu = True
self.drawer_flag = True
self.alg_open = True
def stop_all(self):
#所有的算法程序
self.move_mouse.func_stop_mouse()
self.go = False
self.drawer_flag = False
self.alg_open = False
self.move_mouse.listener_started = True
到这里,我们的控制器就基本做了了。
里面主要包括了如下功能
算法整合
这个部分 的代码主要是这里:
def runing(self,fps,draw_fps):
"""
:param fps: 扫描帧率,这个不需要太高,差不多就可以
:return:
"""
#这里的fps是指,绘制fps,这个可以高一点
w, h = ScreenUtils.get_real_resolution()
self.monitor = {"top": 0, "left": 0, "width": w, "height": h}
self.mss_obj = mss.mss()
self.drawer.drawRectanglesBySelf(fps=draw_fps)
need_p_time = 1/fps
while self.go:
start_time = time.time()
#开启算法
if(self.alg_open):
# 获取屏幕截图
screenshot = self.mss_obj.grab(self.monitor)
# 将截图转换为OpenCV格式
screenshot_cv = cv2.cvtColor(np.array(screenshot), cv2.COLOR_RGB2BGR)
results = self.net.detect(screenshot_cv)
self.drawer.setItems(results)
self.move_mouse.setItems(results)
else:
self.drawer.setItems([])
self.move_mouse.setItems([])
#开启绘图
if(self.drawer_flag):
self.drawer.draw_recgs = True
else:
self.drawer.draw_recgs = False
# self.drawer.drawRectangles()
end_time = time.time()
real_time = end_time-start_time
dt = need_p_time - real_time
if(dt>0):
time.sleep(dt)
然后我们有很多标志位去控制。
总结
ok,那么这篇文章就先到这里,我们后面完成全部整合