changed index num to pk from manifest
This commit is contained in:
parent
71f946b8b0
commit
2a7ec01dcd
@ -11,6 +11,15 @@ def insert(r):
|
|||||||
"content": r["content"],
|
"content": r["content"],
|
||||||
"checksum": r["check"],
|
"checksum": r["check"],
|
||||||
"index": r["index"]})
|
"index": r["index"]})
|
||||||
|
def update_pk(r):
|
||||||
|
lib.db.update_one(
|
||||||
|
{
|
||||||
|
"checksum": r["check"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$set": {"index": r["index"]}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
def exists(r):
|
def exists(r):
|
||||||
global existing, duplicates
|
global existing, duplicates
|
||||||
@ -35,38 +44,14 @@ def parse():
|
|||||||
record = {"title": document["fields"]["title"],
|
record = {"title": document["fields"]["title"],
|
||||||
"content": document["fields"]["content"],
|
"content": document["fields"]["content"],
|
||||||
"check": document["fields"]["checksum"],
|
"check": document["fields"]["checksum"],
|
||||||
"index": inum}
|
"index": document["pk"]}
|
||||||
|
|
||||||
if lib.sys.getsizeof(record["content"]) < 16777216:
|
if lib.sys.getsizeof(record["content"]) < 16777216:
|
||||||
if record["content"] != "" and record["title"] != "":
|
if record["content"] != "" and record["title"] != "":
|
||||||
if not exists(record):
|
if not exists(record):
|
||||||
insert(record)
|
insert(record)
|
||||||
inserted = inserted + 1
|
inserted = inserted + 1
|
||||||
inum = inum + 1
|
update_pk(record)
|
||||||
else: big = big + 1
|
else: big = big + 1
|
||||||
except KeyError:
|
except KeyError:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
def parse():
|
|
||||||
global inserted, big, inum
|
|
||||||
print("Parsing manifest json...")
|
|
||||||
# for every document in the export
|
|
||||||
for document in manifest:
|
|
||||||
#if the title and content tags aren't blank
|
|
||||||
try:
|
|
||||||
record = {"title": document["fields"]["title"],
|
|
||||||
"content": document["fields"]["content"],
|
|
||||||
"check": document["fields"]["checksum"],
|
|
||||||
"index": inum}
|
|
||||||
|
|
||||||
if lib.sys.getsizeof(record["content"]) < 16777216:
|
|
||||||
if record["content"] != "" and record["title"] != "":
|
|
||||||
if not exists(record):
|
|
||||||
insert(record)
|
|
||||||
inserted = inserted + 1
|
|
||||||
index_num = index_num + 1
|
|
||||||
else: big = big + 1
|
|
||||||
except KeyError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user