Compare commits

..

2 Commits

Author SHA1 Message Date
Daniel
2a7ec01dcd changed index num to pk from manifest 2024-10-01 08:12:04 -05:00
Daniel
71f946b8b0 changed indexing 2024-10-01 07:47:31 -05:00

View File

@ -4,8 +4,6 @@ import lib
raw_manifest = open("/mnt/user/media/paperless/media/backup/manifest.json")
manifest = lib.json.load(raw_manifest)
inum = 1
existing, inserted, big, duplicates = 0, 0, 0, 0
def insert(r):
@ -13,6 +11,15 @@ def insert(r):
"content": r["content"],
"checksum": r["check"],
"index": r["index"]})
def update_pk(r):
lib.db.update_one(
{
"checksum": r["check"]
},
{
"$set": {"index": r["index"]}
}
)
def exists(r):
global existing, duplicates
@ -28,7 +35,7 @@ def exists(r):
else: return False
def parse():
global inserted, big, inum
global inserted, big
print("Parsing manifest json...")
# for every document in the export
for document in manifest:
@ -37,38 +44,14 @@ def parse():
record = {"title": document["fields"]["title"],
"content": document["fields"]["content"],
"check": document["fields"]["checksum"],
"index": inum}
"index": document["pk"]}
if lib.sys.getsizeof(record["content"]) < 16777216:
if record["content"] != "" and record["title"] != "":
if not exists(record):
insert(record)
inserted = inserted + 1
inum = inum + 1
update_pk(record)
else: big = big + 1
except KeyError:
continue
def parse():
global inserted, big, inum
print("Parsing manifest json...")
# for every document in the export
for document in manifest:
#if the title and content tags aren't blank
try:
record = {"title": document["fields"]["title"],
"content": document["fields"]["content"],
"check": document["fields"]["checksum"],
"index": inum}
if lib.sys.getsizeof(record["content"]) < 16777216:
if record["content"] != "" and record["title"] != "":
if not exists(record):
insert(record)
inserted = inserted + 1
index_num = index_num + 1
else: big = big + 1
except KeyError:
continue