bookstack #1

Merged
dcronin05 merged 8 commits from bookstack into master 2024-10-05 03:15:11 -07:00
3 changed files with 38 additions and 47 deletions
Showing only changes of commit 333246e653 - Show all commits

View File

@ -1,18 +1,20 @@
import lib import lib
from lib import json_cursor, json_file
# import paperless database export existing, inserted, duplicates = 0, 0, 0
# manifest_path = "/mnt/user/media/paperless/media/backup/manifest.json"
manifest_path = "/mnt/tower/media/paperless/media/backup/manifest.json"
raw_manifest = open(manifest_path)
manifest = lib.json.load(raw_manifest)
existing, inserted, big, duplicates = 0, 0, 0, 0
def insert(r): def insert(r):
lib.db.insert_one({"title": r["title"], global inserted
"content": r["content"], pk = r['pk']
"checksum": r["check"], check = r['check']
"index": r["index"]}) content = r['content']
title = r['title']
lib.db.insert_one({'title': title,"content": content,
"checksum": check,
"pk": pk})
inserted = inserted + 1
def update_pk(r): def update_pk(r):
lib.db.update_one( lib.db.update_one(
{ {
@ -23,7 +25,7 @@ def update_pk(r):
} }
) )
def exists(r): def rec_exists(r):
global existing, duplicates global existing, duplicates
record = lib.db.find_one({"checksum": r["check"]}) record = lib.db.find_one({"checksum": r["check"]})
dupe = lib.db.find_one({"content": r["content"], "title": r["title"]}) dupe = lib.db.find_one({"content": r["content"], "title": r["title"]})
@ -37,23 +39,19 @@ def exists(r):
else: return False else: return False
def parse(): def parse():
global inserted, big f = json_file()
print("Parsing manifest json...") print("Parsing manifest json...")
r = {}
# for every document in the export # for every document in the export
for document in manifest: for doc in json_cursor(f):
#if the title and content tags aren't blank fields = doc['fields']
try: if 'title' in fields and 'content' in fields:
record = {"title": document["fields"]["title"], r['pk'] = doc['pk']
"content": document["fields"]["content"], r['title'] = fields['title']
"check": document["fields"]["checksum"], r['content'] = fields['content']
"index": document["pk"]} r['check'] = fields['checksum']
if rec_exists(r):
if lib.sys.getsizeof(record["content"]) < 16777216: continue
if record["content"] != "" and record["title"] != "": else:
if not exists(record): insert(r)
insert(record) f.close()
inserted = inserted + 1
update_pk(record)
else: big = big + 1
except KeyError:
continue

View File

@ -5,24 +5,18 @@ import sys
import bookstack import bookstack
def json_file():
return open("/mnt/tower/media/paperless/media/backup/test.json")
# import paperless database export # import paperless database export
# manifest_path = "/mnt/user/media/paperless/media/manifest.json" # manifest_path = "/mnt/user/media/paperless/media/manifest.json"
f = open("/mnt/tower/media/paperless/media/backup/manifest.json")
manifest = ijson.items(f, 'item')
paperless_docs = {}
for doc in manifest:
fields = doc['fields']
if 'title' in fields and 'content' in fields:
pk = doc['pk']
title = fields['title']
content = fields['content']
check = fields['checksum']
paperless_docs[pk] = {'checksum': check, 'title': title, 'content': content}
def json_cursor(f):
items = ijson.items(f, 'item')
docs = (doc for doc in items \
if doc['model'] == "documents.document" \
if sys.getsizeof(doc['fields']['content']) < 16777216)
return docs
db = pymongo.MongoClient("10.0.0.59", 27017).paperless.content db = pymongo.MongoClient("10.0.0.59", 27017).paperless.content

View File

@ -5,8 +5,8 @@ def start():
extract.parse() extract.parse()
print("Existing:", extract.existing) print("Existing:", extract.existing)
print("Inserted:", extract.inserted) print("Inserted:", extract.inserted)
print("To big:", extract.big)
print("Dupes:", extract.duplicates) print("Dupes:", extract.duplicates)
extract.existing, extract.inserted, extract.duplicates = 0, 0, 0
menu() menu()
@ -16,8 +16,7 @@ def prompt():
def menu(): def menu():
value = prompt() value = prompt()
if value == "reload": if value == "reload":
extract.parse() start()
value = prompt()
while value != "quit" and value != "reload": while value != "quit" and value != "reload":
results = search.query(value) results = search.query(value)
if type(results) == dict: if type(results) == dict: