Compare commits

...

2 Commits

Author SHA1 Message Date
Daniel
2a7ec01dcd changed index num to pk from manifest 2024-10-01 08:12:04 -05:00
Daniel
71f946b8b0 changed indexing 2024-10-01 07:47:31 -05:00

View File

@ -4,8 +4,6 @@ import lib
raw_manifest = open("/mnt/user/media/paperless/media/backup/manifest.json") raw_manifest = open("/mnt/user/media/paperless/media/backup/manifest.json")
manifest = lib.json.load(raw_manifest) manifest = lib.json.load(raw_manifest)
inum = 1
existing, inserted, big, duplicates = 0, 0, 0, 0 existing, inserted, big, duplicates = 0, 0, 0, 0
def insert(r): def insert(r):
@ -13,6 +11,15 @@ def insert(r):
"content": r["content"], "content": r["content"],
"checksum": r["check"], "checksum": r["check"],
"index": r["index"]}) "index": r["index"]})
def update_pk(r):
lib.db.update_one(
{
"checksum": r["check"]
},
{
"$set": {"index": r["index"]}
}
)
def exists(r): def exists(r):
global existing, duplicates global existing, duplicates
@ -28,7 +35,7 @@ def exists(r):
else: return False else: return False
def parse(): def parse():
global inserted, big, inum global inserted, big
print("Parsing manifest json...") print("Parsing manifest json...")
# for every document in the export # for every document in the export
for document in manifest: for document in manifest:
@ -37,38 +44,14 @@ def parse():
record = {"title": document["fields"]["title"], record = {"title": document["fields"]["title"],
"content": document["fields"]["content"], "content": document["fields"]["content"],
"check": document["fields"]["checksum"], "check": document["fields"]["checksum"],
"index": inum} "index": document["pk"]}
if lib.sys.getsizeof(record["content"]) < 16777216: if lib.sys.getsizeof(record["content"]) < 16777216:
if record["content"] != "" and record["title"] != "": if record["content"] != "" and record["title"] != "":
if not exists(record): if not exists(record):
insert(record) insert(record)
inserted = inserted + 1 inserted = inserted + 1
inum = inum + 1 update_pk(record)
else: big = big + 1 else: big = big + 1
except KeyError: except KeyError:
continue continue
def parse():
global inserted, big, inum
print("Parsing manifest json...")
# for every document in the export
for document in manifest:
#if the title and content tags aren't blank
try:
record = {"title": document["fields"]["title"],
"content": document["fields"]["content"],
"check": document["fields"]["checksum"],
"index": inum}
if lib.sys.getsizeof(record["content"]) < 16777216:
if record["content"] != "" and record["title"] != "":
if not exists(record):
insert(record)
inserted = inserted + 1
index_num = index_num + 1
else: big = big + 1
except KeyError:
continue