Management of a bunch of documents.
Module whintpy.deposit
Class DocumentsManager
Description
Constructor
Initialize the DocumentsManager.
The given folder is used to collect the documents from and write documents into. It must be a relative path, not an absolute one. The absolute path to this folder is indicated separately.
It must exist.
Example
>>> manager = DocumentsManager('test_folder')
>>> manager.collect_docs()
>>> print(manager.get_docs_sorted_by_newest())
>>> # Assuming your_document is a document in the folder
>>> manager.get_doc_content(your_document)
>>> print(your_document.content) # The content of the document is printed
>>> manager.delete(your_document)
>>> print(manager.get_docs_sorted_by_newest()) # The document is deleted
Parameters
- folder_path: (str) The relative path of a folder
Raises
- TypeError: Invalid folder path type
- FileNotFoundError: The specified folder does not exist at the specified location
View Source
def __init__(self, folder_path: str='.', absolute_path: str=HERE):
"""Initialize the DocumentsManager.
The given folder is used to collect the documents from and write
documents into. It must be a relative path, not an absolute one.
The absolute path to this folder is indicated separately.
It must exist.
:example:
>>> manager = DocumentsManager('test_folder')
>>> manager.collect_docs()
>>> print(manager.get_docs_sorted_by_newest())
>>> # Assuming your_document is a document in the folder
>>> manager.get_doc_content(your_document)
>>> print(your_document.content) # The content of the document is printed
>>> manager.delete(your_document)
>>> print(manager.get_docs_sorted_by_newest()) # The document is deleted
:param folder_path: (str) The relative path of a folder
:raises: TypeError: Invalid folder path type
:raises: FileNotFoundError: The specified folder does not exist at the specified location
"""
TypesDealer.check_types('DocumentsManager.__init__', [(folder_path, str), (absolute_path, str)])
full_path = os.path.join(absolute_path, folder_path)
if os.path.exists(full_path) is False:
raise FileNotFoundError(f"The specified folder does not exist at the specified location: '{full_path}'.")
self.__folder_path = folder_path
self.__absolute_path = absolute_path
self.__docs = list()
Public functions
get_folder_path
Return the folder path: the relative path to documents.
Example
>>> doc1 = DocumentsManager('test_folder')
>>> print(doc1.get_folder_path()) # test_folder
Returns
- (str) The folder path
View Source
def get_folder_path(self) -> str:
"""Return the folder path: the relative path to documents.
:example:
>>> doc1 = DocumentsManager('test_folder')
>>> print(doc1.get_folder_path()) # test_folder
:return: (str) The folder path
"""
return self.__folder_path
get_absolute_folder_path
Return the folder path: the abosolute path to documents.
Returns
- (str) The path to documents
View Source
def get_absolute_folder_path(self) -> str:
"""Return the folder path: the abosolute path to documents.
:return: (str) The path to documents
"""
return os.path.join(self.__absolute_path, self.__folder_path)
collect_docs
Collect all documents from the folder path.
Example
>>> manager = DocumentsManager('test_folder')
>>> manager.collect_docs()
>>> print([doc for doc in manager])
Parameters
- mutable: (bool) False to store only ImmutableDocument() instances instead of Document() instance ones
Raises
- FileNotFoundError: The specified folder does not exist at the specified location
View Source
def collect_docs(self, mutable: bool=True) -> None:
"""Collect all documents from the folder path.
:example:
>>> manager = DocumentsManager('test_folder')
>>> manager.collect_docs()
>>> print([doc for doc in manager])
:param mutable: (bool) False to store only ImmutableDocument() instances instead of Document() instance ones
:raises: FileNotFoundError: The specified folder does not exist at the specified location
"""
if os.path.isdir(self.absolute_path) is False:
raise IOError(f'The specified folder does not exist at the specified location: {self.absolute_path}')
for folder_name in os.listdir(self.absolute_path):
full_path = os.path.join(self.absolute_path, folder_name)
if os.path.isdir(full_path) is True:
try:
description = self.__read_info(folder_name, ws.DESCRIPTION_FILENAME)
d = self.__read_info(folder_name, ws.DOWNLOADS_FILENAME)
downloads = 0 if len(d) == 0 else int(d)
idoc = Document.create_document_by_folder_name(folder_name, description, downloads)
if mutable is True:
self.__docs.append(self.__immutable_to_document(idoc))
else:
self.__docs.append(idoc)
except Exception as e:
logging.error(f'Failed to collect a document for folder {folder_name}: {e}')
if len(self.__docs) > 0:
logging.info(f'Collected {len(self.__docs)} documents in {self.absolute_path}')
else:
logging.info(f'No documents found in {self.absolute_path}.')
clear_docs
Clear the list of documents.
Example
>>> manager = DocumentsManager('test_folder')
>>> manager.collect_docs()
>>> manager.clear_docs()
>>> print([doc for doc in manager])
> []
View Source
def clear_docs(self):
"""Clear the list of documents.
:example:
>>> manager = DocumentsManager('test_folder')
>>> manager.collect_docs()
>>> manager.clear_docs()
>>> print([doc for doc in manager])
[]
"""
self.__docs.clear()
add
Create and add a document to the list of documents.
Example
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> # Assuming your_document is a document
>>> doc1.add_doc(your_document)
>>> print(doc1.is_in_docs(your_document))
> True
Parameters
- author: (str) The document author
- filename: (str) The document filename
- kwargs: (dict) The keyword arguments to create the Document()
Raises
- TypeError: Cant create the document
- ValueError: Cant create the document
Returns
- (ImmutableDocument) The created document
View Source
def add(self, author: str, filename: str, **kwargs) -> ImmutableDocument:
"""Create and add a document to the list of documents.
:example:
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> # Assuming your_document is a document
>>> doc1.add_doc(your_document)
>>> print(doc1.is_in_docs(your_document))
True
:param author: (str) The document author
:param filename: (str) The document filename
:param kwargs: (dict) The keyword arguments to create the Document()
:raises: TypeError: Cant create the document
:raises: ValueError: Cant create the document
:return: (ImmutableDocument) The created document
"""
doc = Document(author, filename, **kwargs)
self.__docs.append(doc)
return ImmutableDocument(author, filename, **kwargs)
add_doc
Add a document to the list of documents.
Example
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> # Assuming your_document is a document
>>> doc1.add_doc(your_document)
>>> print(doc1.is_in_docs(your_document)) # True
Parameters
- doc: (Document) The document to add
Raises
- TypeError: Invalid document type
View Source
def add_doc(self, doc: Document | ImmutableDocument) -> None:
"""Add a document to the list of documents.
:example:
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> # Assuming your_document is a document
>>> doc1.add_doc(your_document)
>>> print(doc1.is_in_docs(your_document)) # True
:param doc: (Document) The document to add
:raises: TypeError: Invalid document type
"""
TypesDealer.check_types('DocumentsManager.add_doc', [(doc, (Document, ImmutableDocument))])
if isinstance(doc, ImmutableDocument) is True:
doc = self.__immutable_to_document(doc)
self.__docs.append(doc)
add_docs
Add a list of documents into the actual list of documents.
Do not add anything if any element in the list is incorrect.
Example
>>> manager = DocumentsManager('test_folder')
>>> manager.collect_docs()
>>> # Assuming doc1 and doc2 are Document() instances
>>> manager.add_docs([doc1, doc2])
>>> manager.is_in_docs(doc1))
> True
Parameters
- docs: (list) The list of documents to add
Raises
- TypeError: Invalid document type
View Source
def add_docs(self, docs: list) -> None:
"""Add a list of documents into the actual list of documents.
Do not add anything if any element in the list is incorrect.
:example:
>>> manager = DocumentsManager('test_folder')
>>> manager.collect_docs()
>>> # Assuming doc1 and doc2 are Document() instances
>>> manager.add_docs([doc1, doc2])
>>> manager.is_in_docs(doc1))
True
:param docs: (list) The list of documents to add
:raises: TypeError: Invalid document type
"""
TypesDealer.check_types('DocumentsManager.add_docs', [(docs, (list, tuple))])
for doc in docs:
TypesDealer.check_types('DocumentsManager.add_doc', [(doc, (Document, ImmutableDocument))])
for doc in docs:
if isinstance(doc, ImmutableDocument) is True:
doc = self.__immutable_to_document(doc)
self.add_doc(doc)
get_docs_sorted_by_newest
Get documents sorted by date from the most recent to the oldest.
Return the list of ImmutableDocument() instances sorted from the most recent to the oldest.
Example
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> sorted_docs = doc1.get_docs_sorted_by_newest()
>>> for doc in sorted_docs:
>>> print(doc)
>>> # The documents are printed from the most recent to the oldest
Returns
- (list) The list of sorted documents
View Source
def get_docs_sorted_by_newest(self) -> list:
"""Get documents sorted by date from the most recent to the oldest.
Return the list of ImmutableDocument() instances sorted
from the most recent to the oldest.
:example:
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> sorted_docs = doc1.get_docs_sorted_by_newest()
>>> for doc in sorted_docs:
>>> print(doc)
>>> # The documents are printed from the most recent to the oldest
:return: (list) The list of sorted documents
"""
sorted_docs = sorted(self.__docs, key=lambda doc: doc.date, reverse=True)
return [doc.to_immutable() for doc in sorted_docs]
get_docs_sorted_by_oldest
Get documents sorted by date from the oldest to the most recent.
Return the list of ImmutableDocument() instances sorted from the oldest to the most recent.
Example
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> sorted_docs = doc1.get_docs_sorted_by_oldest()
>>> # The documents are printed from the oldest to the most recent
>>> for doc in sorted_docs:
>>> print(doc)
Returns
- (list) The list of sorted documents
View Source
def get_docs_sorted_by_oldest(self) -> list:
"""Get documents sorted by date from the oldest to the most recent.
Return the list of ImmutableDocument() instances sorted
from the oldest to the most recent.
:example:
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> sorted_docs = doc1.get_docs_sorted_by_oldest()
>>> # The documents are printed from the oldest to the most recent
>>> for doc in sorted_docs:
>>> print(doc)
:return: (list) The list of sorted documents
"""
sorted_docs = sorted(self.__docs, key=lambda doc: doc.date)
return [doc.to_immutable() for doc in sorted_docs]
get_docs_sorted_by_most_viewed
Get documents sorted by the number of views.
Return the list of ImmutableDocument() instances sorted from the most viewed to the least viewed.
Example
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> sorted_docs = doc1.get_docs_by_most_viewed()
>>> # The documents are printed from the most viewed to the least viewed
>>> for doc in sorted_docs:
>>> print(doc)
Returns
- (list) The sorted list of documents
View Source
def get_docs_sorted_by_most_viewed(self) -> list:
"""Get documents sorted by the number of views.
Return the list of ImmutableDocument() instances sorted
from the most viewed to the least viewed.
:example:
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> sorted_docs = doc1.get_docs_by_most_viewed()
>>> # The documents are printed from the most viewed to the least viewed
>>> for doc in sorted_docs:
>>> print(doc)
:return: (list) The sorted list of documents
"""
sorted_docs = sorted(self.__docs, key=lambda doc: doc.downloads, reverse=True)
return [doc.to_immutable() for doc in sorted_docs]
get_docs_sorted_by_least_viewed
Get documents reversely sorted by the number of views.
Return the list of ImmutableDocument() instances sorted from the least viewed to the most viewed.
Example
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> sorted_docs = doc1.get_docs_by_least_viewed()
>>> for doc in sorted_docs:
>>> print(doc)
Returns
- (list) The list of sorted documents
View Source
def get_docs_sorted_by_least_viewed(self) -> list:
"""Get documents reversely sorted by the number of views.
Return the list of ImmutableDocument() instances sorted
from the least viewed to the most viewed.
:example:
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> sorted_docs = doc1.get_docs_by_least_viewed()
>>> for doc in sorted_docs:
>>> print(doc)
:return: (list) The list of sorted documents
"""
sorted_docs = sorted(self.__docs, key=lambda doc: doc.downloads)
return [doc.to_immutable() for doc in sorted_docs]
filter_docs
Return the list of documents matching the given filters.
Each filter is a tuple (filter function name, comparator name, [value1, value2, ...]). Applicable filter functions are "filename", "filetype", "author" and "date".
Example
>>> manager = DocumentsManager('test_folder')
>>> manager.collect_docs()
>>> # Get all documents of Brigitte Bigi
>>> manager.filter_docs(("author", "iexact", ["Brigitte Bigi"]))
>>> # Get all PDF or TXT documents of Brigitte Bigi
>>> _docs = manager.filter_docs(("author", "iexact", ["Brigitte Bigi"]), ("filetype", "iexact", ["pdf", "txt"]), match_all=True)
>>> # Get all PDF or TXT documents of Brigitte Bigi or John Doe
>>> _fdocs = manager.filter_docs(("author", "iexact", ["Brigitte Bigi", "John Doe"]), ("filetype", "iexact", ["pdf", "txt"]), match_all=True)
Parameters
- filters: (list of tuple) List of filters to be applied on the documents.
- match_all: (bool) If True, returned documents must match all the given criteria
- out_filterset: (bool) If True, return the FilteredSet. If False, return a list of documents.
Raises
- ValueError: If a malformed filter.
- ValueError: If no value is provided in a filter.
- TypeError: invalid type for match_all parameter -- if used only
Returns
- (list|FilteredSet) The list of documents matching the given criteria
View Source
def filter_docs(self, filters, match_all: bool=False, out_filterset: bool=False):
"""Return the list of documents matching the given filters.
Each filter is a tuple (filter function name, comparator name, [value1, value2, ...]).
Applicable filter functions are "filename", "filetype", "author" and "date".
:example:
>>> manager = DocumentsManager('test_folder')
>>> manager.collect_docs()
>>> # Get all documents of Brigitte Bigi
>>> manager.filter_docs(("author", "iexact", ["Brigitte Bigi"]))
>>> # Get all PDF or TXT documents of Brigitte Bigi
>>> _docs = manager.filter_docs(("author", "iexact", ["Brigitte Bigi"]), ("filetype", "iexact", ["pdf", "txt"]), match_all=True)
>>> # Get all PDF or TXT documents of Brigitte Bigi or John Doe
>>> _fdocs = manager.filter_docs(("author", "iexact", ["Brigitte Bigi", "John Doe"]), ("filetype", "iexact", ["pdf", "txt"]), match_all=True)
:param filters: (list of tuple) List of filters to be applied on the documents.
:param match_all: (bool) If True, returned documents must match all the given criteria
:param out_filterset: (bool) If True, return the FilteredSet. If False, return a list of documents.
:raises: ValueError: If a malformed filter.
:raises: ValueError: If no value is provided in a filter.
:raises: TypeError: invalid type for match_all parameter -- if used only
:return: (list|FilteredSet) The list of documents matching the given criteria
"""
doc_filter = DocumentsFilters(self.__docs)
filtered_sets = list()
cast_filters = self.__cast_filters(filters)
for f in cast_filters:
value = f[2][0]
logging.info(' >>> filter.{:s}({:s}={!s:s})'.format(f[0], f[1], value))
files_set = getattr(doc_filter, f[0])(**{f[1]: value})
for i in range(1, len(f[2])):
value = doc_filter.cast_data(f[0], f[2][i])
if 'not' in f[1]:
logging.info(' >>> & filter.{:s}({:s}={!s:s})'.format(f[0], f[1], value))
files_set = files_set & getattr(doc_filter, f[0])(**{f[1]: value})
else:
logging.info(' >>> | filter.{:s}({:s}={!s:s})'.format(f[0], f[1], value))
files_set = files_set | getattr(doc_filter, f[0])(**{f[1]: value})
filtered_sets.append(files_set)
if len(filtered_sets) == 0:
return list()
files_set = doc_filter.merge_data(filtered_sets, match_all)
if out_filterset is True:
return files_set
return sorted(files_set, key=lambda doc: doc.date)
get_doc_absolute_path
Return the full filename to get access to the document.
Parameters
- document: (Document | ImmutableDocument) The document to invalidate
Returns
- (str) The full path of the document
View Source
def get_doc_absolute_path(self, document: Document | ImmutableDocument) -> str:
"""Return the full filename to get access to the document.
:param document: (Document | ImmutableDocument) The document to invalidate
:return: (str) The full path of the document
"""
if len(self.__docs) == 0:
raise AttributeError('DocumentsManager.invalidate_doc exception: No documents found. Please run collect_docs() first')
TypesDealer.check_types('DocumentsManager.invalidate_doc', [(document, (Document, ImmutableDocument))])
doc = self.__find_doc(document)
if doc is not None:
return os.path.join(self.__absolute_path, self.__folder_path, doc.folder_name, doc.filename + '.' + doc.filetype)
return ''
get_doc_relative_path
Return the filename with a relative path to get access to the document.
Parameters
- document: (Document | ImmutableDocument) The document to invalidate
Returns
- (str) The full path of the document
View Source
def get_doc_relative_path(self, document: Document | ImmutableDocument) -> str:
"""Return the filename with a relative path to get access to the document.
:param document: (Document | ImmutableDocument) The document to invalidate
:return: (str) The full path of the document
"""
if len(self.__docs) == 0:
raise AttributeError('DocumentsManager.invalidate_doc exception: No documents found. Please run collect_docs() first')
TypesDealer.check_types('DocumentsManager.invalidate_doc', [(document, (Document, ImmutableDocument))])
doc = self.__find_doc(document)
if doc is not None:
return os.path.join(self.__folder_path, doc.folder_name, doc.filename + '.' + doc.filetype)
return ''
invalidate_doc
Delete a document of the disk and remove it of the managed ones.
Example
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> # Assuming your_document is a document in the folder
>>> doc1.invalidate_doc(your_document)
Parameters
- document: (Document | ImmutableDocument) The document to invalidate
Raises
- ValueError: The document was not found
- AttributeError: No documents found. Please run collect_docs() first
- TypeError: Invalid document type
View Source
def invalidate_doc(self, document: Document | ImmutableDocument) -> None:
"""Delete a document of the disk and remove it of the managed ones.
:example:
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> # Assuming your_document is a document in the folder
>>> doc1.invalidate_doc(your_document)
:param document: (Document | ImmutableDocument) The document to invalidate
:raises: ValueError: The document was not found
:raises: AttributeError: No documents found. Please run collect_docs() first
:raises: TypeError: Invalid document type
"""
if len(self.__docs) == 0:
raise AttributeError('DocumentsManager.invalidate_doc exception: No documents found. Please run collect_docs() first')
TypesDealer.check_types('DocumentsManager.invalidate_doc', [(document, (Document, ImmutableDocument))])
doc = self.__find_doc(document)
if doc is not None:
self.__docs.remove(doc)
directory_path = os.path.join(self.__absolute_path, self.__folder_path, doc.folder_name)
try:
shutil.rmtree(directory_path)
logging.info(f'Directory {directory_path} deleted.')
except Exception as e:
logging.error(f'Directory {directory_path} not deleted: {e}')
else:
raise ValueError(f'DocumentsManager.invalidate_doc exception: Document {document.filename} not found.')
set_doc_description
Set and save a description for a document.
Example
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> doc1.set_doc_description(your_document, "This is a description")
Parameters
- document: (Document | ImmutableDocument) The document
- description: (str) The description to set
Raises
- AttributeError: No documents found. Please run collect_docs() first
- TypeError: Invalid document type
View Source
def set_doc_description(self, document: Document | ImmutableDocument, description: str):
"""Set and save a description for a document.
:example:
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> doc1.set_doc_description(your_document, "This is a description")
:param document: (Document | ImmutableDocument) The document
:param description: (str) The description to set
:raises: AttributeError: No documents found. Please run collect_docs() first
:raises: TypeError: Invalid document type
"""
if len(self.__docs) == 0:
raise AttributeError('DocumentsManager.set_description exception: No documents found. collect_docs() should be called first.')
TypesDealer.check_types('DocumentsManager.delete', [(document, (Document, ImmutableDocument))])
doc = self.__find_doc(document)
if doc is not None:
if isinstance(doc, ImmutableDocument) is True:
raise ValueError('Attempted to set description of an immutable document.')
doc.description = description
self.__save_description(doc)
else:
raise AttributeError(f'DocumentsManager.set_description exception: Document {document.filename} not found.')
increment_doc_downloads
Increment the number of downloads of a document.
Example
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> doc1.increment_doc_downloads(your_document)
Parameters
- document: (Document | ImmutableDocument) The document
Raises
- AttributeError: Document not found.
- TypeError: Invalid document type
Returns
- (int) New number of donwloads
View Source
def increment_doc_downloads(self, document: Document | ImmutableDocument) -> int:
"""Increment the number of downloads of a document.
:example:
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> doc1.increment_doc_downloads(your_document)
:param document: (Document | ImmutableDocument) The document
:raises: AttributeError: Document not found.
:raises: TypeError: Invalid document type
:return: (int) New number of donwloads
"""
if len(self.__docs) == 0:
raise AttributeError('DocumentsManager.increment_doc_downloads exception: Document not found. ')
TypesDealer.check_types('DocumentsManager.increment_doc_downloads', [(document, (Document, ImmutableDocument))])
doc = self.__find_doc(document)
if doc is None:
raise AttributeError(f'DocumentsManager.increment_doc_downloads exception: Document {document.filename} not found.')
if isinstance(doc, ImmutableDocument) is True:
raise TypeError('Attempted to increment downloads of an immutable document.')
nb = doc.increment_downloads()
self.__save_downloads(doc)
return nb
save_doc
Save a document.
Example
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> # You can save the document in a different folder
>>> doc1.save_doc(your_document, 'new_folder')
>>> # Or in the folder with the DocumentsManager was initialized
>>> doc1.save_doc(your_document)
Parameters
- document: (Document | ImmutableDocument) The document
Raises
- TypeError: Invalid document type or folder path type
Returns
- (bool) Success
View Source
def save_doc(self, document: Document | ImmutableDocument):
"""Save a document.
:example:
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> # You can save the document in a different folder
>>> doc1.save_doc(your_document, 'new_folder')
>>> # Or in the folder with the DocumentsManager was initialized
>>> doc1.save_doc(your_document)
:param document: (Document | ImmutableDocument) The document
:raises: TypeError: Invalid document type or folder path type
:return: (bool) Success
"""
folder_path = os.path.join(self.__absolute_path, self.__folder_path, document.folder_name)
os.makedirs(folder_path, exist_ok=True)
logging.debug('Created folder: {}'.format(os.path.join(self.__absolute_path, self.__folder_path, document.folder_name)))
file_path = self.get_doc_absolute_path(document)
try:
if isinstance(document.content, (bytes, bytearray)) is True:
with open(file_path, 'wb') as fp:
fp.write(document.content)
else:
with codecs.open(file_path, 'w', 'utf-8') as fp:
fp.write(document.content)
if os.path.exists(file_path) is True:
self.__save_description(document)
self.__save_downloads(document)
except:
shutil.rmtree(folder_path)
raise
get_doc_content
Get the content of a document.
Example
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> # Assuming your_document is a document in the folder
>>> doc1.get_doc_content(your_document)
>>> print(your_document.content)
Parameters
- document: (Document|ImmutableDocument) The document
Raises
- FileNotFoundError: The file was not found
- AttributeError: No documents found. Please run collect_docs() first
- TypeError: Invalid document type
Returns
- (str|bytes|None) The content of the document
View Source
def get_doc_content(self, document: Document | ImmutableDocument) -> str | bytes:
"""Get the content of a document.
:example:
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> # Assuming your_document is a document in the folder
>>> doc1.get_doc_content(your_document)
>>> print(your_document.content)
:param document: (Document|ImmutableDocument) The document
:raises: FileNotFoundError: The file was not found
:raises: AttributeError: No documents found. Please run collect_docs() first
:raises: TypeError: Invalid document type
:return: (str|bytes|None) The content of the document
"""
if len(self.__docs) == 0:
raise AttributeError('DocumentsManager.get_doc_content exception: No documents found. collect_docs() should be called first.')
TypesDealer.check_types('DocumentsManager.get_doc_content', [(document, Document)])
doc = self.__find_doc(document)
content = ''
if doc is not None:
if len(doc.content) == 0:
file_path = self.get_doc_absolute_path(doc)
if os.path.exists(file_path) is True:
with open(file_path, 'r') as file:
content = file.read()
doc.content = content
else:
logging.error(f'DocumentsManager.get_doc_content error: Document file path {file_path} not found. ')
else:
content = doc.content
else:
logging.error(f'DocumentsManager.get_doc_content error: Document {document.filename} not found. ')
return content
get_doc_description
Get the description of a document.
Example
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> # Assuming your_document is a document in the folder
>>> doc1.get_doc_description(your_document)
>>> print(your_document.description)
Parameters
- document: (Document | ImmutableDocument) The document
Raises
- FileNotFoundError: The file was not found
- AttributeError: No documents found. Please run collect_docs() first
- TypeError: Invalid document type
Returns
- (str|None) The description of the document
View Source
def get_doc_description(self, document: Document | ImmutableDocument) -> str | None:
"""Get the description of a document.
:example:
>>> doc1 = DocumentsManager('test_folder')
>>> doc1.collect_docs()
>>> # Assuming your_document is a document in the folder
>>> doc1.get_doc_description(your_document)
>>> print(your_document.description)
:param document: (Document | ImmutableDocument) The document
:raises: FileNotFoundError: The file was not found
:raises: AttributeError: No documents found. Please run collect_docs() first
:raises: TypeError: Invalid document type
:return: (str|None) The description of the document
"""
if len(self.__docs) == 0:
raise AttributeError('DocumentsManager.get_doc_description exception: No documents found. collect_docs() should be called first.')
TypesDealer.check_types('DocumentsManager.get_doc_description', [(document, (Document, ImmutableDocument))])
doc = self.__find_doc(document)
if doc is not None:
return doc.description
raise ValueError(f'DocumentsManager.get_doc_description exception: Document {document.filename} not found.')
Protected functions
__read_info
Return the content of the given file in the given folder.
Parameters
- folder_name
- fn
View Source
def __read_info(self, folder_name: str, fn: str) -> str:
"""Return the content of the given file in the given folder."""
info = ''
file_path = os.path.join(self.__absolute_path, self.__folder_path, folder_name, fn)
if os.path.exists(file_path) is True:
with codecs.open(file_path, 'r', encoding='utf-8') as file:
info = file.read().strip()
return info
__save_description
Save description into its file.
Parameters
- document: (Document | ImmutableDocument) The document
View Source
def __save_description(self, document: Document | ImmutableDocument):
"""Save description into its file.
:param document: (Document | ImmutableDocument) The document
"""
self.__save_info(document.folder_name, ws.DESCRIPTION_FILENAME, document.description)
__save_downloads
Save downloads into its file.
Parameters
- document: (Document | ImmutableDocument) The document
View Source
def __save_downloads(self, document: Document | ImmutableDocument):
"""Save downloads into its file.
:param document: (Document | ImmutableDocument) The document
"""
self.__save_info(document.folder_name, ws.DOWNLOADS_FILENAME, str(document.downloads))
__save_info
Save the content into the given file of the specified folder.
Parameters
- folder_name
- filename
- content
View Source
def __save_info(self, folder_name, filename, content):
"""Save the content into the given file of the specified folder.
"""
directory_path = os.path.join(self.__absolute_path, self.__folder_path, folder_name)
if os.path.exists(directory_path) is False:
raise FileNotFoundError(f'The directory {directory_path} does not exist.')
try:
destination = os.path.join(directory_path, filename)
with codecs.open(destination, 'w', 'utf-8') as file:
file.write(content)
logging.debug(f'Saved information into file: {destination}')
except Exception as e:
raise Exception(f'Information of document {folder_name} not saved: {e}')
__find_doc
Search for a document in the list of stored documents.
Find the instance of Document which is matching the given document in the list of stored docs.i If it finds a matching document, it returns the document instance; otherwise, it returns None.
Parameters
- document: (Document | ImmutableDocument) The document to find
Returns
- (Document | None) The document found or None if not found or invalid
View Source
def __find_doc(self, document: Document | ImmutableDocument) -> Document | None:
"""Search for a document in the list of stored documents.
Find the instance of Document which is matching the given document
in the list of stored docs.i If it finds a matching document, it
returns the document instance; otherwise, it returns None.
:param document: (Document | ImmutableDocument) The document to find
:return: (Document | None) The document found or None if not found or invalid
"""
return next((doc for doc in self.__docs if doc == document), None)
__immutable_to_document
Convert an ImmutableDocument into a Document.
Parameters
- idoc
View Source
@staticmethod
def __immutable_to_document(idoc: ImmutableDocument) -> Document:
"""Convert an ImmutableDocument into a Document."""
return Document(idoc.author, idoc.filename + '.' + idoc.filetype, idoc.date, content=idoc.content, description=idoc.description, downloads=idoc.downloads)
__cast_filters
Return the value-typed of given filters.
Parameters
- filters: (list of tuple) List of filters to be applied on the documents.
Raises
- ValueError: If a malformed filter.
- ValueError: If an invalid field is provided in a filter.
Returns
- (list of tuple) List of filters to be applied on the documents with typed values.
View Source
def __cast_filters(self, filters: list) -> list:
"""Return the value-typed of given filters.
:param filters: (list of tuple) List of filters to be applied on the documents.
:raises: ValueError: If a malformed filter.
:raises: ValueError: If an invalid field is provided in a filter.
:return: (list of tuple) List of filters to be applied on the documents with typed values.
"""
cast_filters = list()
doc_filter = DocumentsFilters(self.__docs)
for f in filters:
if isinstance(f, (list, tuple)) and len(f) == 3:
if None in f or any((len(f[i]) == 0 for i in range(len(f)))):
raise ValueError('Invalid field defined for filter {:s}'.format(str(f)))
casted_values = list()
for value in f[2]:
casted_values.append(doc_filter.cast_data(f[0], value))
cast_filters.append((f[0], f[1], casted_values))
else:
raise ValueError('Filter must have 3 arguments: function, comparator, value.Got {:d} instead.'.format(len(f)))
return cast_filters
Overloads
__len__
View Source
def __len__(self):
return len(self.__docs)
__iter__
View Source
def __iter__(self):
for doc in self.__docs:
yield doc.to_immutable()
__contains__
View Source
def __contains__(self, document):
if isinstance(document, (Document, ImmutableDocument)) is False:
return False
for doc in self.__docs:
if doc == document:
return True
return False