Compare commits

..

2 Commits

Author SHA1 Message Date
26791a48ba working on tag and chapter creation in bookstack 2024-10-05 07:44:09 -05:00
d233df3d94 working on tag generation 2024-10-05 06:52:57 -05:00
2 changed files with 59 additions and 22 deletions

View File

@ -3,28 +3,32 @@ from lib import json_cursor, json_file
existing, inserted, duplicates = 0, 0, 0 existing, inserted, duplicates = 0, 0, 0
def insert_tag(r):
type = "tag"
pk = r['pk']
name = r['name']
lib.db.insert_one({
'type': type,
'pk': pk,
'name': name
})
def insert(r): def insert(r):
global inserted global inserted
type = "document"
pk = r['pk'] pk = r['pk']
check = r['check'] check = r['check']
content = r['content'] content = r['content']
title = r['title'] title = r['title']
lib.db.insert_one({'title': title,"content": content, lib.db.insert_one({'type': type,
'title': title,
"content": content,
"checksum": check, "checksum": check,
"pk": pk}) "pk": pk})
inserted = inserted + 1 inserted = inserted + 1
def update_pk(r):
lib.db.update_one(
{
"checksum": r["check"]
},
{
"$set": {"index": r["index"]}
}
)
def rec_exists(r): def rec_exists(r):
global existing, duplicates global existing, duplicates
record = lib.db.find_one({"checksum": r["check"]}) record = lib.db.find_one({"checksum": r["check"]})
@ -39,20 +43,47 @@ def rec_exists(r):
else: return False else: return False
def create_page(r): def create_page(r):
tags = r['tags']
pk = r['pk'] pk = r['pk']
name = r['title'] name = r['title']
markdown = r['content'] markdown = r['content']
check = r['check'] check = r['check']
t_list = []
for tag in tags:
t_list.append(
{"name": "pk", "value": str(tag)}
)
mongo_tag = lib.db.find_one({"type": tag, "pk": pk})
if mongo_tag:
chapter_id = lib.api.post_chapters_create({
'book_id': 3,
'name': mongo_tag['name'],
'tags': [{"name": "pk", "value": pk}]
})
try: chapter_id
except NameError: chapter_id = None
request = lib.api.post_pages_create({ request = lib.api.post_pages_create({
'chapter_id': chapter_id if chapter_id else "",
'tags': t_list if len(tags) > 0 else "[]",
'book_id': 3, 'book_id': 3,
'page_id': pk, 'page_id': pk,
'name': name, 'name': name,
'markdown': markdown, 'markdown': markdown,
}) })
# if 'message' in request: if 'error' in request:
print(request) print("\n\ndidn't work: ")
print(request['error']['message'])
print(t_list)
else: print("\n\nWorked: \n", t_list, "\n", request['name'])
def tag_exists(t):
tag = lib.db.find_one({"pk": t['pk']})
return True if tag else False
def parse(): def parse():
f = json_file() f = json_file()
@ -61,7 +92,18 @@ def parse():
# for every document in the export # for every document in the export
for doc in json_cursor(f): for doc in json_cursor(f):
fields = doc['fields'] fields = doc['fields']
if 'title' in fields and 'content' in fields: if doc['model'] == 'documents.tag':
tag = {'name': fields['name'], 'pk': doc['pk']}
if tag_exists(tag):
pass
else:
insert_tag(tag)
if 'title' in fields and 'content' in fields \
and doc['model'] == 'documents.document' \
and lib.sys.getsizeof(fields['content']) < 16777216:
r['tags'] = doc['fields']['tags']
r['pk'] = doc['pk'] r['pk'] = doc['pk']
r['title'] = fields['title'] r['title'] = fields['title']
r['content'] = fields['content'] r['content'] = fields['content']
@ -69,6 +111,7 @@ def parse():
create_page(r) create_page(r)
if rec_exists(r): if rec_exists(r):
continue continue
else: else:

View File

@ -6,17 +6,11 @@ import bookstack
def json_file(): def json_file():
return open("/mnt/tower/media/paperless/media/backup/test.json") return open("/mnt/tower/media/paperless/media/backup/manifest.json")
# import paperless database export
# manifest_path = "/mnt/user/media/paperless/media/manifest.json"
def json_cursor(f): def json_cursor(f):
items = ijson.items(f, 'item') items = ijson.items(f, 'item')
docs = (doc for doc in items \ return (doc for doc in items )
if doc['model'] == "documents.document" \
if sys.getsizeof(doc['fields']['content']) < 16777216)
return docs
db = pymongo.MongoClient("10.0.0.59", 27017).paperless.content db = pymongo.MongoClient("10.0.0.59", 27017).paperless.content