From 2b9ced373f9a015169fd4b0203140d0c1851a0e8 Mon Sep 17 00:00:00 2001 From: Christian Date: Tue, 27 Aug 2019 17:42:10 +0100 Subject: [PATCH] Database sync now throttled such that only 20 threads do work at once - no longer tries to do ALL threads at the same time as that was a little extreme and offered no benefit. --- sync_database.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sync_database.py b/sync_database.py index e02c8c3..cc58365 100644 --- a/sync_database.py +++ b/sync_database.py @@ -14,7 +14,7 @@ def sync_database(): import lxml import multiprocessing import sys - + import time sys.setrecursionlimit(100000) #Get old links by parsing properly, and download + save to file! @@ -37,7 +37,6 @@ def sync_database(): def hasstylenotclass(tag): return tag.has_attr('style') and not tag.has_attr('class') - for col in soup.tbody.find_all("tr"): #print(row) #for col in row("tr"): @@ -88,14 +87,13 @@ def sync_database(): q = multiprocessing.Queue() pool = multiprocessing.Pool(len(eventlinks),process,(q,)) for i,link in enumerate(eventlinks): + while q.qsize() > 20: + time.sleep(1) q.put([link,eventnames[i]]) - pool.close() pool.join() - print('rm files') for file in os.listdir(): if 'ToRead' in file: os.remove(file) -# if i == 0: -# break \ No newline at end of file +#sync_database() \ No newline at end of file -- GitLab