python 多线程
1.GIL 和 同步锁的区别
GIL是面向python解释器的,同步锁是面向运行代码的
GIL是保证同一个时间只有一个线程在运行,当运行过程中遇到I/O堵塞超过一定时间(默认是15ms)或者sleep后,会暂时switch到其他的线程中运行,并保存当前线程的上下文,后续会通过内部调度切换回来。
同步锁是保证线程在运行指定被锁代码块时,串行执行,不允许中途switch 其他线程
多线程操作数据问题简单例子,当运行到sleep时,线程会切换到其他线程,temp为100,没有-1,其他线程运行完后,操作temp-1进行num赋值,输出结果为99,预期应该是0的
import logging
import threading
import time
from random import random
logging.basicConfig(level=logging.INFO)
num = 100
worker_list = []
class Worker(threading.Thread):
def run(self):
self.solution()
@staticmethod
def solution():
global num
temp = num
time.sleep(0.5 * random())
temp -= 1
num = temp
def main():
for i in range(100):
worker = Worker()
worker.start()
worker_list.append(worker)
for worker in worker_list:
worker.join()
if __name__ == "__main__":
main()
logging.info(num) #output:INFO:root:99
多线程访问线程不安全单例模式实例,实例化的是不同的对象
import logging
import threading
import time
from random import random
logging.basicConfig(level=logging.INFO)
worker_list = []
class Singleton:
_instance = None
def __init__(self):
pass
def __new__(cls):
if cls._instance is None:
time.sleep(0.1)
cls._instance = object.__new__(cls)
return cls._instance
class Worker(threading.Thread):
def run(self):
s = Singleton()
logging.info(id(s))
def main():
for i in range(100):
worker = Worker()
worker.start()
worker_list.append(worker)
for worker in worker_list:
worker.join()
if __name__ == "__main__":
main()
"""
output:
INFO:root:47983984
INFO:root:47984144
INFO:root:47984240
INFO:root:47984368
INFO:root:47984496
INFO:root:47984624
INFO:root:48083088
INFO:root:47984528
...
"""
增加同步锁, 访问和操作_instance 为同步操作,拿到锁的线程执行完之后才会可能切换到其他线程
class Singleton:
_instance = None
_lock = threading.Lock()
def __init__(self):
pass
def __new__(cls):
cls._lock.acquire()
if cls._instance is None:
time.sleep(0.1)
cls._instance = object.__new__(cls)
cls._lock.release()
return cls._instance
"""
output:
INFO:root:60046704
INFO:root:60046704
INFO:root:60046704
INFO:root:60046704
INFO:root:60046704
...
"""
优化下争锁逻辑,原先是每次实例化时都会进行争锁,判断_instance的值,再决定是否return,影响性能,改为之后_instance 为 None 时,线程才会进行同步操作,否则直接return
class Singleton:
_instance = None
_lock = threading.Lock()
def __init__(self):
pass
def __new__(cls):
if cls._instance is None:
cls._lock.acquire()
if cls._instance is None:
time.sleep(0.1)
cls._instance = object.__new__(cls)
cls._lock.release()
return cls._instance
上述线程安全单例模式仍然存在一个问题,如果存在初始化操作时,比如读取文件,每次实例化,虽然始终是一个对象,但是都会进行一次读取文件操作,可以通过一个标识来确认是否需要初始化操作,
但是初始化时又同样出现_did_init线程不安全问题,
FILE = './temp.txt'
class Singleton:
_instance = None
_did_init = False
_lock = threading.Lock()
def __init__(self):
if not self._did_init:
self.config = None
self._read_file()
self._did_init = True
def _read_file(self):
with open(FILE, "r") as f:
self.config = f.read()
def __new__(cls):
if cls._instance is None:
cls._lock.acquire()
if cls._instance is None:
time.sleep(0.1)
cls._instance = object.__new__(cls)
cls._lock.release()
return cls._instance
"""
output:
INFO:root:do init
INFO:root:do init
"""
同样的需要加锁,这里要注意,由于锁中包含了文件读取操作,除拿到锁的线程外,同时实例Singleton对象时,其他线程都会等待拿到锁的线程读取文件操作完之后通过调度被切换,所有如果文件过大,将会影响所有的线程实例化时间,这也是同步锁的缺点
FILE = './temp.txt'
class Singleton:
_instance = None
_did_init = False
_new_lock = threading.Lock()
_init_lock = threading.Lock()
def __init__(self):
if not self._did_init:
self._init_lock.acquire()
if not self._did_init:
logging.info("do init")
self.config = None
self._read_file()
self._did_init = True
self._init_lock.release()
def _read_file(self):
with open(FILE, "r") as f:
self.config = f.read()
def __new__(cls):
if cls._instance is None:
cls._lock.acquire()
if cls._instance is None:
time.sleep(0.1)
cls._instance = object.__new__(cls)
cls._lock.release()
return cls._instance
个人理解,应该把读取文件的初始化动作放在类变量中,当模块第一次加载后,就已经初始化该类变量,后续的类实例化,不存在耗时
import logging
import threading
import time
from os.path import dirname, abspath, join
logging.basicConfig(level=logging.INFO)
worker_list = []
FILE = join(dirname(abspath(__file__)), "3.zip")
def _read_file():
with open(FILE, "rb") as f:
temp = f.read()
return temp
class Singleton:
_instance = None
_did_init = False
_new_lock = threading.Lock()
_init_lock = threading.Lock()
start = time.clock()
content = _read_file()
end = time.clock()
logging.info("init content " + str(end - start))
def __init__(self):
if not self._did_init:
self._init_lock.acquire()
if not self._did_init:
logging.info("do init")
self.config = self.content
self._did_init = True
self._init_lock.release()
def __new__(cls, *args, **kwargs):
if cls._instance is None:
cls._new_lock.acquire()
if cls._instance is None:
cls._instance = object.__new__(cls)
cls._new_lock.release()
return cls._instance
class Worker(threading.Thread):
def run(self):
start = time.clock()
s = Singleton()
end = time.clock()
logging.info("time " + str(end - start))
def main():
for i in range(100):
worker = Worker()
worker.start()
worker_list.append(worker)
for worker in worker_list:
worker.join()
if __name__ == "__main__":
main()
s = time.clock()
a = Singleton()
e = time.clock()
logging.info("main " + str(e - s))
"""
ouput:
INFO:root:init content 2.8052793
INFO:root:do init
INFO:root:time 0.0002140000000001585
INFO:root:time 1.8999999999991246e-06
INFO:root:time 1.7999999997186933e-06
INFO:root:time 1.6999999998823512e-06
INFO:root:time 2.7999999998584713e-06
....
INFO:root:time 1.600000000046009e-06
INFO:root:main 6.200000000067263e-06
"""