finished basic functionality
This commit is contained in:
parent
b0e8bd5f76
commit
d48dc97eda
@ -1,4 +1,7 @@
|
|||||||
import pymongo
|
import pymongo
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
|
import pprint
|
||||||
|
|
||||||
|
db = pymongo.MongoClient("localhost", 27017).paperless.content
|
||||||
|
@ -4,24 +4,52 @@ import lib
|
|||||||
raw_manifest = open("/mnt/user/media/paperless/export/manifest.json")
|
raw_manifest = open("/mnt/user/media/paperless/export/manifest.json")
|
||||||
manifest = lib.json.load(raw_manifest)
|
manifest = lib.json.load(raw_manifest)
|
||||||
|
|
||||||
mongo = lib.pymongo.MongoClient("localhost", 27017)
|
|
||||||
db = mongo.paperless
|
|
||||||
|
|
||||||
# export directory for created md files
|
# export directory for created md files
|
||||||
directory = '/mnt/user/repos/docs/paperless_export'
|
directory = '/mnt/user/repos/docs/paperless_export'
|
||||||
|
|
||||||
def output(t, c):
|
index_num = 1
|
||||||
db.content.insert_one({"title": t, "content": c})
|
|
||||||
|
existing, inserted, big, duplicates = 0, 0, 0, 0
|
||||||
|
|
||||||
|
def insert(r):
|
||||||
|
lib.db.insert_one({"title": r["title"],
|
||||||
|
"content": r["content"],
|
||||||
|
"checksum": r["check"],
|
||||||
|
"index": r["index"]})
|
||||||
|
|
||||||
|
def exists(r):
|
||||||
|
global existing, duplicates
|
||||||
|
record = lib.db.find_one({"checksum": r["check"]})
|
||||||
|
dupe = lib.db.find_one({"content": r["content"], "title": r["title"]})
|
||||||
|
|
||||||
|
if dupe:
|
||||||
|
if record:
|
||||||
|
existing = existing + 1
|
||||||
|
return True
|
||||||
|
duplicates = duplicates + 1
|
||||||
|
return True
|
||||||
|
else: return False
|
||||||
|
|
||||||
def parse():
|
def parse():
|
||||||
|
global inserted, big, index_num
|
||||||
print("entering parse()")
|
print("entering parse()")
|
||||||
# for every document in the export
|
# for every document in the export
|
||||||
for field in manifest:
|
for document in manifest:
|
||||||
#if the title and content tags aren't blank
|
#if the title and content tags aren't blank
|
||||||
if "title" in field["fields"]:
|
try:
|
||||||
title = field["fields"]["title"]
|
record = {"title": document["fields"]["title"],
|
||||||
if "content" in field["fields"]:
|
"content": document["fields"]["content"],
|
||||||
content = field["fields"]["content"]
|
"check": document["fields"]["checksum"],
|
||||||
if content != "" and title != "" and lib.sys.getsizeof(content) < 999999:
|
"index": index_num}
|
||||||
print(title)
|
|
||||||
output(title, content)
|
if lib.sys.getsizeof(record["content"]) < 16777216:
|
||||||
|
if record["content"] != "" and record["title"] != "":
|
||||||
|
if not exists(record):
|
||||||
|
insert(record)
|
||||||
|
inserted = inserted + 1
|
||||||
|
index_num = index_num + 1
|
||||||
|
else: big = big + 1
|
||||||
|
except KeyError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
24
src/main.py
24
src/main.py
@ -1,9 +1,31 @@
|
|||||||
import lib
|
|
||||||
import extract
|
import extract
|
||||||
|
import search
|
||||||
|
import pprint
|
||||||
|
|
||||||
|
def prompt():
|
||||||
|
return input("Search Value: ")
|
||||||
|
|
||||||
|
def menu():
|
||||||
|
value = prompt()
|
||||||
|
while value != "quit":
|
||||||
|
results = search.query(value)
|
||||||
|
if type(results) == dict:
|
||||||
|
print(results["content"])
|
||||||
|
else:
|
||||||
|
for doc in results:
|
||||||
|
print(doc["index"], " : ", doc["title"])
|
||||||
|
value = prompt()
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
||||||
extract.parse()
|
extract.parse()
|
||||||
|
print("Existing:", extract.existing)
|
||||||
|
print("Inserted:", extract.inserted)
|
||||||
|
print("To big:", extract.big)
|
||||||
|
print("Dupes:", extract.duplicates)
|
||||||
|
|
||||||
|
menu()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
@ -0,0 +1,22 @@
|
|||||||
|
from pprint import pprint
|
||||||
|
|
||||||
|
import lib
|
||||||
|
|
||||||
|
def query(s):
|
||||||
|
|
||||||
|
q = lib.db.find(
|
||||||
|
{
|
||||||
|
"$or":
|
||||||
|
[
|
||||||
|
{"content": {"$regex": s}},
|
||||||
|
{"title": {"$regex": s}},
|
||||||
|
{"index": s if not s.isdigit() else int(s)}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
results = [document for document in q]
|
||||||
|
for item in results:
|
||||||
|
if item["index"] == s if not s.isdigit() else int(s):
|
||||||
|
return lib.db.find_one({"index": int(s)})
|
||||||
|
return results
|
Loading…
Reference in New Issue
Block a user