Compare commits

..

9 Commits

Author SHA1 Message Date
8097fcef60 Merge pull request 'bookstack' (#1) from bookstack into master
Reviewed-on: #1
2024-10-05 05:15:10 -05:00
6b619c747f ignore venvs 2024-10-05 05:02:13 -05:00
333246e653 got mongodb insertion working again 2024-10-05 04:55:33 -05:00
1afd622196 got ijson working 2024-10-05 02:03:26 -05:00
c3cd29430f change pymongo host to remote ip 2024-10-05 00:13:07 -05:00
748609919b add constant for manifest path 2024-10-04 23:13:58 -05:00
Daniel
2a7ec01dcd changed index num to pk from manifest 2024-10-01 08:12:04 -05:00
Daniel
71f946b8b0 changed indexing 2024-10-01 07:47:31 -05:00
Daniel
816510f4de remove mysql experiments 2024-10-01 07:25:04 -05:00
7 changed files with 137 additions and 40 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
.idea
__pycache__
*.code-workspace
.venv*

9
src/bstack.py Normal file
View File

@ -0,0 +1,9 @@
from lib import *
def retrieve(id_num):
result = api.get_pages_read({"id": id_num})
print(result)
def insert(id_num, name, text):
pass

63
src/bstack_api_calls.json Normal file
View File

@ -0,0 +1,63 @@
{
'post_books_create',
'get_attachments_read',
'get_chapters_list',
'get_content_permissions_read',
'get_chapters_export_markdown',
'post_roles_create',
'put_recycle_bin_restore',
'put_image_gallery_update',
'put_books_update',
'post_shelves_create',
'get_books_read',
'get_attachments_list',
'put_content_permissions_update',
'get_recycle_bin_list',
'delete_users_delete',
'get_users_list',
'get_docs_json',
'get_books_export_html',
'put_chapters_update',
'get_chapters_export_pdf',
'get_pages_export_markdown',
'delete_pages_delete',
'post_attachments_create',
'put_attachments_update',
'get_roles_read',
'get_chapters_export_plain_text',
'get_users_read',
'delete_chapters_delete',
'put_pages_update',
'post_chapters_create',
'get_chapters_read',
'get_pages_list',
'get_pages_export_plain_text',
'get_roles_list',
'get_pages_export_html',
'get_image_gallery_read',
'delete_attachments_delete',
'post_users_create',
'get_audit_log_list',
'get_pages_export_pdf',
'delete_books_delete',
'get_shelves_read',
'delete_roles_delete',
'get_pages_read',
'put_shelves_update',
'get_books_export_plain_text',
'delete_shelves_delete',
'get_books_export_markdown',
'delete_image_gallery_delete',
'get_search_all',
'get_books_list',
'post_image_gallery_create',
'get_books_export_pdf',
'post_pages_create',
'get_shelves_list',
'get_docs_display',
'put_users_update',
'put_roles_update',
'delete_recycle_bin_destroy',
'get_chapters_export_html',
'get_image_gallery_list'
}

View File

@ -1,20 +1,31 @@
import lib
from lib import json_cursor, json_file
# import paperless database export
raw_manifest = open("/mnt/user/media/paperless/media/backup/manifest.json")
manifest = lib.json.load(raw_manifest)
index_num = 1
existing, inserted, big, duplicates = 0, 0, 0, 0
existing, inserted, duplicates = 0, 0, 0
def insert(r):
lib.db.insert_one({"title": r["title"],
"content": r["content"],
"checksum": r["check"],
"index": r["index"]})
global inserted
pk = r['pk']
check = r['check']
content = r['content']
title = r['title']
def exists(r):
lib.db.insert_one({'title': title,"content": content,
"checksum": check,
"pk": pk})
inserted = inserted + 1
def update_pk(r):
lib.db.update_one(
{
"checksum": r["check"]
},
{
"$set": {"index": r["index"]}
}
)
def rec_exists(r):
global existing, duplicates
record = lib.db.find_one({"checksum": r["check"]})
dupe = lib.db.find_one({"content": r["content"], "title": r["title"]})
@ -28,25 +39,19 @@ def exists(r):
else: return False
def parse():
global inserted, big, index_num
f = json_file()
print("Parsing manifest json...")
r = {}
# for every document in the export
for document in manifest:
#if the title and content tags aren't blank
try:
record = {"title": document["fields"]["title"],
"content": document["fields"]["content"],
"check": document["fields"]["checksum"],
"index": index_num}
if lib.sys.getsizeof(record["content"]) < 16777216:
if record["content"] != "" and record["title"] != "":
if not exists(record):
insert(record)
inserted = inserted + 1
index_num = index_num + 1
else: big = big + 1
except KeyError:
continue
for doc in json_cursor(f):
fields = doc['fields']
if 'title' in fields and 'content' in fields:
r['pk'] = doc['pk']
r['title'] = fields['title']
r['content'] = fields['content']
r['check'] = fields['checksum']
if rec_exists(r):
continue
else:
insert(r)
f.close()

View File

@ -1,2 +0,0 @@
import lib

View File

@ -1,6 +1,29 @@
import pymongo
import os
import json
import ijson
import sys
import bookstack
db = pymongo.MongoClient("localhost", 27017).paperless.content
def json_file():
return open("/mnt/tower/media/paperless/media/backup/test.json")
# import paperless database export
# manifest_path = "/mnt/user/media/paperless/media/manifest.json"
def json_cursor(f):
items = ijson.items(f, 'item')
docs = (doc for doc in items \
if doc['model'] == "documents.document" \
if sys.getsizeof(doc['fields']['content']) < 16777216)
return docs
db = pymongo.MongoClient("10.0.0.59", 27017).paperless.content
url = 'http://10.0.0.59:6875'
token = 'RVSO8xZXOjRYJntNYPRd3E9iT2qXm11C'
secret = 'qR5r2EyKT09ogz8VSolS12ispAV5QrT0'
api = bookstack.BookStack(url, token, secret)
methods = api.generate_api_methods()

View File

@ -1,13 +1,12 @@
import extract
import search
import insert
def start():
extract.parse()
print("Existing:", extract.existing)
print("Inserted:", extract.inserted)
print("To big:", extract.big)
print("Dupes:", extract.duplicates)
extract.existing, extract.inserted, extract.duplicates = 0, 0, 0
menu()
@ -17,8 +16,7 @@ def prompt():
def menu():
value = prompt()
if value == "reload":
extract.parse()
value = prompt()
start()
while value != "quit" and value != "reload":
results = search.query(value)
if type(results) == dict: