finished basic functionality
This commit is contained in:
parent
b0e8bd5f76
commit
d48dc97eda
@ -1,4 +1,7 @@
|
||||
import pymongo
|
||||
import os
|
||||
import json
|
||||
import sys
|
||||
import sys
|
||||
import pprint
|
||||
|
||||
db = pymongo.MongoClient("localhost", 27017).paperless.content
|
||||
|
@ -4,24 +4,52 @@ import lib
|
||||
raw_manifest = open("/mnt/user/media/paperless/export/manifest.json")
|
||||
manifest = lib.json.load(raw_manifest)
|
||||
|
||||
mongo = lib.pymongo.MongoClient("localhost", 27017)
|
||||
db = mongo.paperless
|
||||
|
||||
# export directory for created md files
|
||||
directory = '/mnt/user/repos/docs/paperless_export'
|
||||
|
||||
def output(t, c):
|
||||
db.content.insert_one({"title": t, "content": c})
|
||||
index_num = 1
|
||||
|
||||
existing, inserted, big, duplicates = 0, 0, 0, 0
|
||||
|
||||
def insert(r):
|
||||
lib.db.insert_one({"title": r["title"],
|
||||
"content": r["content"],
|
||||
"checksum": r["check"],
|
||||
"index": r["index"]})
|
||||
|
||||
def exists(r):
|
||||
global existing, duplicates
|
||||
record = lib.db.find_one({"checksum": r["check"]})
|
||||
dupe = lib.db.find_one({"content": r["content"], "title": r["title"]})
|
||||
|
||||
if dupe:
|
||||
if record:
|
||||
existing = existing + 1
|
||||
return True
|
||||
duplicates = duplicates + 1
|
||||
return True
|
||||
else: return False
|
||||
|
||||
def parse():
|
||||
global inserted, big, index_num
|
||||
print("entering parse()")
|
||||
# for every document in the export
|
||||
for field in manifest:
|
||||
for document in manifest:
|
||||
#if the title and content tags aren't blank
|
||||
if "title" in field["fields"]:
|
||||
title = field["fields"]["title"]
|
||||
if "content" in field["fields"]:
|
||||
content = field["fields"]["content"]
|
||||
if content != "" and title != "" and lib.sys.getsizeof(content) < 999999:
|
||||
print(title)
|
||||
output(title, content)
|
||||
try:
|
||||
record = {"title": document["fields"]["title"],
|
||||
"content": document["fields"]["content"],
|
||||
"check": document["fields"]["checksum"],
|
||||
"index": index_num}
|
||||
|
||||
if lib.sys.getsizeof(record["content"]) < 16777216:
|
||||
if record["content"] != "" and record["title"] != "":
|
||||
if not exists(record):
|
||||
insert(record)
|
||||
inserted = inserted + 1
|
||||
index_num = index_num + 1
|
||||
else: big = big + 1
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
|
||||
|
24
src/main.py
24
src/main.py
@ -1,9 +1,31 @@
|
||||
import lib
|
||||
import extract
|
||||
import search
|
||||
import pprint
|
||||
|
||||
def prompt():
|
||||
return input("Search Value: ")
|
||||
|
||||
def menu():
|
||||
value = prompt()
|
||||
while value != "quit":
|
||||
results = search.query(value)
|
||||
if type(results) == dict:
|
||||
print(results["content"])
|
||||
else:
|
||||
for doc in results:
|
||||
print(doc["index"], " : ", doc["title"])
|
||||
value = prompt()
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
extract.parse()
|
||||
print("Existing:", extract.existing)
|
||||
print("Inserted:", extract.inserted)
|
||||
print("To big:", extract.big)
|
||||
print("Dupes:", extract.duplicates)
|
||||
|
||||
menu()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,22 @@
|
||||
from pprint import pprint
|
||||
|
||||
import lib
|
||||
|
||||
def query(s):
|
||||
|
||||
q = lib.db.find(
|
||||
{
|
||||
"$or":
|
||||
[
|
||||
{"content": {"$regex": s}},
|
||||
{"title": {"$regex": s}},
|
||||
{"index": s if not s.isdigit() else int(s)}
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
results = [document for document in q]
|
||||
for item in results:
|
||||
if item["index"] == s if not s.isdigit() else int(s):
|
||||
return lib.db.find_one({"index": int(s)})
|
||||
return results
|
Loading…
Reference in New Issue
Block a user