

我有一个虚拟示例,我想在其中应用多重处理。考虑一个场景,其中有一串数字(我称之为帧)逐一传入。我想将其分配给当前可用的任何单个进程。所以我创建了 4 个正在运行的进程while循环,查看队列中是否有任何元素,然后对其应用函数。



# step 1, 4 processes
import multiprocessing as mp
import os
import time

class MpListOperations:
    def __init__(self):
        self.results_queue = mp.Manager().Queue()
        self.frames_queue = mp.Manager().Queue()
        self.flag = mp.Manager().Value(typecode='b',value=True)
        self.list_nums = list(range(0,5000))

    def process_list(self):
        print(f"Process id {os.getpid()} started")
        while self.flag.value:
#             print(self.flag.value)
            if self.frames_queue.qsize():

    def create_processes(self, no_of_processes = mp.cpu_count()):
        print("Creating Processes")
        self.processes = [mp.Process(target=self.process_list) for _ in range(no_of_processes)]

    def start_processes(self):
        print(f"starting processes")
        for process in self.processes:

    def join_process(self):
        print("Joining Processes")
        while True:
            if not self.frames_queue.qsize():
                print("JOININNG HERE")
                for process in self.processes:
                    exit_code = process.join()
                print("BREAKING DONE")

    def stream_frames(self):
        print("Streaming Frames")
        for frame in self.list_nums:

if __name__=="__main__":
    start = time.time()
    mp_ops = MpListOperations()

现在如果我添加一个超时参数join, even 0, i.e exit_code = process.join(0)有用。我想了解在这种情况下,如果这段代码是正确的,超时的值应该是多少?为什么它在超时的情况下工作而不是在没有超时的情况下工作?用它实现多处理的正确方法是什么?

如果您查看托管队列的文档,您将看到qsize方法仅返回近似大小。因此,当所有项目都已从帧队列中取出时,我不会使用它进行测试。大概您希望让进程运行直到处理完所有帧。我知道的最简单的方法就是把 Nsentinel放置实际帧后帧队列上的项目,其中 N 是从队列获取的进程数。哨兵项是一个特殊值,不能被误认为是实际帧,并向进程发出信号,表明没有更多项可以从队列中获取(即准文件结束项)。在这种情况下我们可以使用None作为哨兵项目。然后,每个进程继续对队列执行获取操作,直到看到哨兵项,然后终止。因此没有必要self.flag属性。


import multiprocessing as mp
import os
import time

class MpListOperations:
    def __init__(self):
        # Only create one manager process:
        manager = mp.Manager()
        self.results_queue = manager.Queue()
        self.frames_queue = manager.Queue()
        # No need to convert range to a list:
        self.list_nums = range(0, 5000)

    def process_list(self):
        print(f"Process id {os.getpid()} started")
        while True:
            frame = self.frames_queue.get()
            if frame is None: # Sentinel?
                # Yes, we are done:
            self.results_queue.put(frame ** 2)

    def create_processes(self, no_of_processes = mp.cpu_count()):
        print("Creating Processes")
        self.no_of_processes = no_of_processes
        self.processes = [mp.Process(target=self.process_list) for _ in range(no_of_processes)]

    def start_processes(self):
        print("Starting Processes")
        for process in self.processes:

    def join_processes(self):
        print("Joining Processes")
        for process in self.processes:
            # join returns None:

    def stream_frames(self):
        print("Streaming Frames")
        for frame in self.list_nums:
        # Put sentinels:
        for _ in range(self.no_of_processes):

if __name__== "__main__":
    start = time.time()
    mp_ops = MpListOperations()


Creating Processes
Starting Processes
Process id 28 started
Process id 29 started
Streaming Frames
Process id 33 started
Process id 31 started
Process id 38 started
Process id 44 started
Process id 42 started
Process id 45 started
Joining Processes

Windows 的注意事项

我修改了方法start_processes临时设置属性self.processes to None:

    def start_processes(self):
        print("Starting Processes")
        processes = self.processes
        # Don't try to pickle list of processes:
        self.processes = None
        for process in processes:
        # Restore attribute:
        self.processes = processes

否则,在 Windows 下,我们会在尝试序列化/反序列化包含两个或多个进程的进程列表时遇到 pickle 错误multiprocessing.Process实例。错误是“TypeError:无法腌制'weakref'对象。”这可以通过以下代码进行演示,我们首先尝试pickle 1 个进程的列表,然后是2 个进程的列表:

import multiprocessing as mp
import os

class Foo:
    def __init__(self, number_of_processes):
        self.processes = [mp.Process(target=self.worker) for _ in range(number_of_processes)]

    def start_processes(self):
        processes = self.processes
        for process in self.processes:

    def join_processes(self):
        for process in self.processes:

    def worker(self):
        print(f"Process id {os.getpid()} started")
        print(f"Process id {os.getpid()} ended")

if __name__== "__main__":
    foo = Foo(1)
    foo = Foo(2)


Process id 7540 started
Process id 7540 ended
Traceback (most recent call last):
  File "C:\Booboo\test\test.py", line 26, in <module>
    foo = Foo(2)
  File "C:\Booboo\test\test.py", line 7, in __init__
  File "C:\Booboo\test\test.py", line 13, in start_processes
  File "C:\Program Files\Python38\lib\multiprocessing\process.py", line 121, in start
    self._popen = self._Popen(self)
  File "C:\Program Files\Python38\lib\multiprocessing\context.py", line 224, in _Popen
    return _default_context.get_context().Process._Popen(process_obj)
  File "C:\Program Files\Python38\lib\multiprocessing\context.py", line 327, in _Popen
    return Popen(process_obj)
  File "C:\Program Files\Python38\lib\multiprocessing\popen_spawn_win32.py", line 93, in __init__
    reduction.dump(process_obj, to_child)
  File "C:\Program Files\Python38\lib\multiprocessing\reduction.py", line 60, in dump
    ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle 'weakref' object
Process id 18152 started
Process id 18152 ended
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "C:\Program Files\Python38\lib\multiprocessing\spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "C:\Program Files\Python38\lib\multiprocessing\spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
EOFError: Ran out of input

