Compare commits

..

No commits in common. "2a7ec01dcd495135f533d05b51b559c2914485f5" and "816510f4def43596c9c07ccd09c3721698813ff6" have entirely different histories.

View File

@ -4,6 +4,8 @@ import lib
raw_manifest = open("/mnt/user/media/paperless/media/backup/manifest.json") raw_manifest = open("/mnt/user/media/paperless/media/backup/manifest.json")
manifest = lib.json.load(raw_manifest) manifest = lib.json.load(raw_manifest)
inum = 1
existing, inserted, big, duplicates = 0, 0, 0, 0 existing, inserted, big, duplicates = 0, 0, 0, 0
def insert(r): def insert(r):
@ -11,15 +13,6 @@ def insert(r):
"content": r["content"], "content": r["content"],
"checksum": r["check"], "checksum": r["check"],
"index": r["index"]}) "index": r["index"]})
def update_pk(r):
lib.db.update_one(
{
"checksum": r["check"]
},
{
"$set": {"index": r["index"]}
}
)
def exists(r): def exists(r):
global existing, duplicates global existing, duplicates
@ -35,7 +28,7 @@ def exists(r):
else: return False else: return False
def parse(): def parse():
global inserted, big global inserted, big, inum
print("Parsing manifest json...") print("Parsing manifest json...")
# for every document in the export # for every document in the export
for document in manifest: for document in manifest:
@ -44,14 +37,38 @@ def parse():
record = {"title": document["fields"]["title"], record = {"title": document["fields"]["title"],
"content": document["fields"]["content"], "content": document["fields"]["content"],
"check": document["fields"]["checksum"], "check": document["fields"]["checksum"],
"index": document["pk"]} "index": inum}
if lib.sys.getsizeof(record["content"]) < 16777216: if lib.sys.getsizeof(record["content"]) < 16777216:
if record["content"] != "" and record["title"] != "": if record["content"] != "" and record["title"] != "":
if not exists(record): if not exists(record):
insert(record) insert(record)
inserted = inserted + 1 inserted = inserted + 1
update_pk(record) inum = inum + 1
else: big = big + 1 else: big = big + 1
except KeyError: except KeyError:
continue continue
def parse():
global inserted, big, inum
print("Parsing manifest json...")
# for every document in the export
for document in manifest:
#if the title and content tags aren't blank
try:
record = {"title": document["fields"]["title"],
"content": document["fields"]["content"],
"check": document["fields"]["checksum"],
"index": inum}
if lib.sys.getsizeof(record["content"]) < 16777216:
if record["content"] != "" and record["title"] != "":
if not exists(record):
insert(record)
inserted = inserted + 1
index_num = index_num + 1
else: big = big + 1
except KeyError:
continue