parent
0b6164e93e
commit
3bf5f24a01
@ -0,0 +1,13 @@
|
|||||||
|
"""
|
||||||
|
ChatterBot is a machine learning, conversational dialog engine.
|
||||||
|
"""
|
||||||
|
from .chatterbot import ChatBot
|
||||||
|
|
||||||
|
__version__ = '0.8.4'
|
||||||
|
__author__ = 'Gunther Cox'
|
||||||
|
__email__ = 'gunthercx@gmail.com'
|
||||||
|
__url__ = 'https://github.com/gunthercox/ChatterBot'
|
||||||
|
|
||||||
|
__all__ = (
|
||||||
|
'ChatBot',
|
||||||
|
)
|
@ -0,0 +1,23 @@
|
|||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import importlib
|
||||||
|
|
||||||
|
if '--version' in sys.argv:
|
||||||
|
chatterbot = importlib.import_module('chatterbot')
|
||||||
|
print(chatterbot.__version__)
|
||||||
|
|
||||||
|
if 'list_nltk_data' in sys.argv:
|
||||||
|
import os
|
||||||
|
import nltk.data
|
||||||
|
|
||||||
|
data_directories = []
|
||||||
|
|
||||||
|
# Find each data directory in the NLTK path that has content
|
||||||
|
for path in nltk.data.path:
|
||||||
|
if os.path.exists(path):
|
||||||
|
if os.listdir(path):
|
||||||
|
data_directories.append(path)
|
||||||
|
|
||||||
|
print(os.linesep.join(data_directories))
|
@ -0,0 +1,47 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
class Adapter(object):
|
||||||
|
"""
|
||||||
|
A superclass for all adapter classes.
|
||||||
|
|
||||||
|
:param logger: A python logger.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
self.logger = kwargs.get('logger', logging.getLogger(__name__))
|
||||||
|
self.chatbot = kwargs.get('chatbot')
|
||||||
|
|
||||||
|
def set_chatbot(self, chatbot):
|
||||||
|
"""
|
||||||
|
Gives the adapter access to an instance of the ChatBot class.
|
||||||
|
|
||||||
|
:param chatbot: A chat bot instanse.
|
||||||
|
:type chatbot: ChatBot
|
||||||
|
"""
|
||||||
|
self.chatbot = chatbot
|
||||||
|
|
||||||
|
class AdapterMethodNotImplementedError(NotImplementedError):
|
||||||
|
"""
|
||||||
|
An exception to be raised when an adapter method has not been implemented.
|
||||||
|
Typically this indicates that the developer is expected to implement the
|
||||||
|
method in a subclass.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, message=None):
|
||||||
|
"""
|
||||||
|
Set the message for the esception.
|
||||||
|
"""
|
||||||
|
if not message:
|
||||||
|
message = 'This method must be overridden in a subclass method.'
|
||||||
|
self.message = message
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.message
|
||||||
|
|
||||||
|
class InvalidAdapterTypeException(Exception):
|
||||||
|
"""
|
||||||
|
An exception to be raised when an adapter
|
||||||
|
of an unexpected class type is received.
|
||||||
|
"""
|
||||||
|
pass
|
@ -0,0 +1,173 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
import logging
|
||||||
|
from .storage import StorageAdapter
|
||||||
|
from .input import InputAdapter
|
||||||
|
from .output import OutputAdapter
|
||||||
|
from . import utils
|
||||||
|
|
||||||
|
|
||||||
|
class ChatBot(object):
|
||||||
|
"""
|
||||||
|
A conversational dialog chat bot.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, name, **kwargs):
|
||||||
|
from .logic import MultiLogicAdapter
|
||||||
|
|
||||||
|
self.name = name
|
||||||
|
kwargs['name'] = name
|
||||||
|
kwargs['chatbot'] = self
|
||||||
|
|
||||||
|
self.default_session = None
|
||||||
|
|
||||||
|
storage_adapter = kwargs.get('storage_adapter', 'chatterbot.storage.SQLStorageAdapter')
|
||||||
|
|
||||||
|
logic_adapters = kwargs.get('logic_adapters', [
|
||||||
|
'chatterbot.logic.BestMatch'
|
||||||
|
])
|
||||||
|
|
||||||
|
input_adapter = kwargs.get('input_adapter', 'chatterbot.input.VariableInputTypeAdapter')
|
||||||
|
|
||||||
|
output_adapter = kwargs.get('output_adapter', 'chatterbot.output.OutputAdapter')
|
||||||
|
|
||||||
|
# Check that each adapter is a valid subclass of it's respective parent
|
||||||
|
utils.validate_adapter_class(storage_adapter, StorageAdapter)
|
||||||
|
utils.validate_adapter_class(input_adapter, InputAdapter)
|
||||||
|
utils.validate_adapter_class(output_adapter, OutputAdapter)
|
||||||
|
|
||||||
|
self.logic = MultiLogicAdapter(**kwargs)
|
||||||
|
self.storage = utils.initialize_class(storage_adapter, **kwargs)
|
||||||
|
self.input = utils.initialize_class(input_adapter, **kwargs)
|
||||||
|
self.output = utils.initialize_class(output_adapter, **kwargs)
|
||||||
|
|
||||||
|
filters = kwargs.get('filters', tuple())
|
||||||
|
self.filters = tuple([utils.import_module(F)() for F in filters])
|
||||||
|
|
||||||
|
# Add required system logic adapter
|
||||||
|
self.logic.system_adapters.append(
|
||||||
|
utils.initialize_class('chatterbot.logic.NoKnowledgeAdapter', **kwargs)
|
||||||
|
)
|
||||||
|
|
||||||
|
for adapter in logic_adapters:
|
||||||
|
self.logic.add_adapter(adapter, **kwargs)
|
||||||
|
|
||||||
|
# Add the chatbot instance to each adapter to share information such as
|
||||||
|
# the name, the current conversation, or other adapters
|
||||||
|
self.logic.set_chatbot(self)
|
||||||
|
self.input.set_chatbot(self)
|
||||||
|
self.output.set_chatbot(self)
|
||||||
|
|
||||||
|
preprocessors = kwargs.get(
|
||||||
|
'preprocessors', [
|
||||||
|
'chatterbot.preprocessors.clean_whitespace'
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
self.preprocessors = []
|
||||||
|
|
||||||
|
for preprocessor in preprocessors:
|
||||||
|
self.preprocessors.append(utils.import_module(preprocessor))
|
||||||
|
|
||||||
|
# Use specified trainer or fall back to the default
|
||||||
|
trainer = kwargs.get('trainer', 'chatterbot.trainers.Trainer')
|
||||||
|
TrainerClass = utils.import_module(trainer)
|
||||||
|
self.trainer = TrainerClass(self.storage, **kwargs)
|
||||||
|
self.training_data = kwargs.get('training_data')
|
||||||
|
|
||||||
|
self.default_conversation_id = None
|
||||||
|
|
||||||
|
self.logger = kwargs.get('logger', logging.getLogger(__name__))
|
||||||
|
|
||||||
|
# Allow the bot to save input it receives so that it can learn
|
||||||
|
self.read_only = kwargs.get('read_only', False)
|
||||||
|
|
||||||
|
if kwargs.get('initialize', True):
|
||||||
|
self.initialize()
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
"""
|
||||||
|
Do any work that needs to be done before the responses can be returned.
|
||||||
|
"""
|
||||||
|
self.logic.initialize()
|
||||||
|
|
||||||
|
def get_response(self, input_item, conversation_id=None):
|
||||||
|
"""
|
||||||
|
Return the bot's response based on the input.
|
||||||
|
|
||||||
|
:param input_item: An input value.
|
||||||
|
:param conversation_id: The id of a conversation.
|
||||||
|
:returns: A response to the input.
|
||||||
|
:rtype: Statement
|
||||||
|
"""
|
||||||
|
if not conversation_id:
|
||||||
|
if not self.default_conversation_id:
|
||||||
|
self.default_conversation_id = self.storage.create_conversation()
|
||||||
|
conversation_id = self.default_conversation_id
|
||||||
|
|
||||||
|
input_statement = self.input.process_input_statement(input_item)
|
||||||
|
|
||||||
|
# Preprocess the input statement
|
||||||
|
for preprocessor in self.preprocessors:
|
||||||
|
input_statement = preprocessor(self, input_statement)
|
||||||
|
|
||||||
|
statement, response = self.generate_response(input_statement, conversation_id)
|
||||||
|
|
||||||
|
# Learn that the user's input was a valid response to the chat bot's previous output
|
||||||
|
previous_statement = self.storage.get_latest_response(conversation_id)
|
||||||
|
|
||||||
|
if not self.read_only:
|
||||||
|
self.learn_response(statement, previous_statement)
|
||||||
|
self.storage.add_to_conversation(conversation_id, statement, response)
|
||||||
|
|
||||||
|
# Process the response output with the output adapter
|
||||||
|
return self.output.process_response(response, conversation_id)
|
||||||
|
|
||||||
|
def generate_response(self, input_statement, conversation_id):
|
||||||
|
"""
|
||||||
|
Return a response based on a given input statement.
|
||||||
|
"""
|
||||||
|
self.storage.generate_base_query(self, conversation_id)
|
||||||
|
|
||||||
|
# Select a response to the input statement
|
||||||
|
response = self.logic.process(input_statement)
|
||||||
|
|
||||||
|
return input_statement, response
|
||||||
|
|
||||||
|
def learn_response(self, statement, previous_statement):
|
||||||
|
"""
|
||||||
|
Learn that the statement provided is a valid response.
|
||||||
|
"""
|
||||||
|
from .conversation import Response
|
||||||
|
|
||||||
|
if previous_statement:
|
||||||
|
statement.add_response(
|
||||||
|
Response(previous_statement.text)
|
||||||
|
)
|
||||||
|
self.logger.info('Adding "{}" as a response to "{}"'.format(
|
||||||
|
statement.text,
|
||||||
|
previous_statement.text
|
||||||
|
))
|
||||||
|
|
||||||
|
# Save the statement after selecting a response
|
||||||
|
self.storage.update(statement)
|
||||||
|
|
||||||
|
def set_trainer(self, training_class, **kwargs):
|
||||||
|
"""
|
||||||
|
Set the module used to train the chatbot.
|
||||||
|
|
||||||
|
:param training_class: The training class to use for the chat bot.
|
||||||
|
:type training_class: `Trainer`
|
||||||
|
|
||||||
|
:param \**kwargs: Any parameters that should be passed to the training class.
|
||||||
|
"""
|
||||||
|
if 'chatbot' not in kwargs:
|
||||||
|
kwargs['chatbot'] = self
|
||||||
|
|
||||||
|
self.trainer = training_class(self.storage, **kwargs)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def train(self):
|
||||||
|
"""
|
||||||
|
Proxy method to the chat bot's trainer class.
|
||||||
|
"""
|
||||||
|
return self.trainer.train
|
@ -0,0 +1,331 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
This module contains various text-comparison algorithms
|
||||||
|
designed to compare one statement to another.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Use python-Levenshtein if available
|
||||||
|
try:
|
||||||
|
from Levenshtein.StringMatcher import StringMatcher as SequenceMatcher
|
||||||
|
except ImportError:
|
||||||
|
from difflib import SequenceMatcher
|
||||||
|
|
||||||
|
|
||||||
|
class Comparator:
|
||||||
|
|
||||||
|
def __call__(self, statement_a, statement_b):
|
||||||
|
return self.compare(statement_a, statement_b)
|
||||||
|
|
||||||
|
def compare(self, statement_a, statement_b):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def get_initialization_functions(self):
|
||||||
|
"""
|
||||||
|
Return all initialization methods for the comparison algorithm.
|
||||||
|
Initialization methods must start with 'initialize_' and
|
||||||
|
take no parameters.
|
||||||
|
"""
|
||||||
|
initialization_methods = [
|
||||||
|
(
|
||||||
|
method,
|
||||||
|
getattr(self, method),
|
||||||
|
) for method in dir(self) if method.startswith('initialize_')
|
||||||
|
]
|
||||||
|
|
||||||
|
return {
|
||||||
|
key: value for (key, value) in initialization_methods
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LevenshteinDistance(Comparator):
|
||||||
|
"""
|
||||||
|
Compare two statements based on the Levenshtein distance
|
||||||
|
of each statement's text.
|
||||||
|
|
||||||
|
For example, there is a 65% similarity between the statements
|
||||||
|
"where is the post office?" and "looking for the post office"
|
||||||
|
based on the Levenshtein distance algorithm.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def compare(self, statement, other_statement):
|
||||||
|
"""
|
||||||
|
Compare the two input statements.
|
||||||
|
|
||||||
|
:return: The percent of similarity between the text of the statements.
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
|
||||||
|
PYTHON = sys.version_info[0]
|
||||||
|
|
||||||
|
# Return 0 if either statement has a falsy text value
|
||||||
|
if not statement.text or not other_statement.text:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Get the lowercase version of both strings
|
||||||
|
if PYTHON < 3:
|
||||||
|
statement_text = unicode(statement.text.lower()) # NOQA
|
||||||
|
other_statement_text = unicode(other_statement.text.lower()) # NOQA
|
||||||
|
else:
|
||||||
|
statement_text = str(statement.text.lower())
|
||||||
|
other_statement_text = str(other_statement.text.lower())
|
||||||
|
|
||||||
|
similarity = SequenceMatcher(
|
||||||
|
None,
|
||||||
|
statement_text,
|
||||||
|
other_statement_text
|
||||||
|
)
|
||||||
|
|
||||||
|
# Calculate a decimal percent of the similarity
|
||||||
|
percent = round(similarity.ratio(), 2)
|
||||||
|
|
||||||
|
return percent
|
||||||
|
|
||||||
|
|
||||||
|
class SynsetDistance(Comparator):
|
||||||
|
"""
|
||||||
|
Calculate the similarity of two statements.
|
||||||
|
This is based on the total maximum synset similarity between each word in each sentence.
|
||||||
|
|
||||||
|
This algorithm uses the `wordnet`_ functionality of `NLTK`_ to determine the similarity
|
||||||
|
of two statements based on the path similarity between each token of each statement.
|
||||||
|
This is essentially an evaluation of the closeness of synonyms.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def initialize_nltk_wordnet(self):
|
||||||
|
"""
|
||||||
|
Download required NLTK corpora if they have not already been downloaded.
|
||||||
|
"""
|
||||||
|
from .utils import nltk_download_corpus
|
||||||
|
|
||||||
|
nltk_download_corpus('corpora/wordnet')
|
||||||
|
|
||||||
|
def initialize_nltk_punkt(self):
|
||||||
|
"""
|
||||||
|
Download required NLTK corpora if they have not already been downloaded.
|
||||||
|
"""
|
||||||
|
from .utils import nltk_download_corpus
|
||||||
|
|
||||||
|
nltk_download_corpus('tokenizers/punkt')
|
||||||
|
|
||||||
|
def initialize_nltk_stopwords(self):
|
||||||
|
"""
|
||||||
|
Download required NLTK corpora if they have not already been downloaded.
|
||||||
|
"""
|
||||||
|
from .utils import nltk_download_corpus
|
||||||
|
|
||||||
|
nltk_download_corpus('corpora/stopwords')
|
||||||
|
|
||||||
|
def compare(self, statement, other_statement):
|
||||||
|
"""
|
||||||
|
Compare the two input statements.
|
||||||
|
|
||||||
|
:return: The percent of similarity between the closest synset distance.
|
||||||
|
:rtype: float
|
||||||
|
|
||||||
|
.. _wordnet: http://www.nltk.org/howto/wordnet.html
|
||||||
|
.. _NLTK: http://www.nltk.org/
|
||||||
|
"""
|
||||||
|
from nltk.corpus import wordnet
|
||||||
|
from nltk import word_tokenize
|
||||||
|
from chatterbot import utils
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
tokens1 = word_tokenize(statement.text.lower())
|
||||||
|
tokens2 = word_tokenize(other_statement.text.lower())
|
||||||
|
|
||||||
|
# Remove all stop words from the list of word tokens
|
||||||
|
tokens1 = utils.remove_stopwords(tokens1, language='english')
|
||||||
|
tokens2 = utils.remove_stopwords(tokens2, language='english')
|
||||||
|
|
||||||
|
# The maximum possible similarity is an exact match
|
||||||
|
# Because path_similarity returns a value between 0 and 1,
|
||||||
|
# max_possible_similarity is the number of words in the longer
|
||||||
|
# of the two input statements.
|
||||||
|
max_possible_similarity = max(
|
||||||
|
len(statement.text.split()),
|
||||||
|
len(other_statement.text.split())
|
||||||
|
)
|
||||||
|
|
||||||
|
max_similarity = 0.0
|
||||||
|
|
||||||
|
# Get the highest matching value for each possible combination of words
|
||||||
|
for combination in itertools.product(*[tokens1, tokens2]):
|
||||||
|
|
||||||
|
synset1 = wordnet.synsets(combination[0])
|
||||||
|
synset2 = wordnet.synsets(combination[1])
|
||||||
|
|
||||||
|
if synset1 and synset2:
|
||||||
|
|
||||||
|
# Get the highest similarity for each combination of synsets
|
||||||
|
for synset in itertools.product(*[synset1, synset2]):
|
||||||
|
similarity = synset[0].path_similarity(synset[1])
|
||||||
|
|
||||||
|
if similarity and (similarity > max_similarity):
|
||||||
|
max_similarity = similarity
|
||||||
|
|
||||||
|
if max_possible_similarity == 0:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
return max_similarity / max_possible_similarity
|
||||||
|
|
||||||
|
|
||||||
|
class SentimentComparison(Comparator):
|
||||||
|
"""
|
||||||
|
Calculate the similarity of two statements based on the closeness of
|
||||||
|
the sentiment value calculated for each statement.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def initialize_nltk_vader_lexicon(self):
|
||||||
|
"""
|
||||||
|
Download the NLTK vader lexicon for sentiment analysis
|
||||||
|
that is required for this algorithm to run.
|
||||||
|
"""
|
||||||
|
from .utils import nltk_download_corpus
|
||||||
|
|
||||||
|
nltk_download_corpus('sentiment/vader_lexicon')
|
||||||
|
|
||||||
|
def compare(self, statement, other_statement):
|
||||||
|
"""
|
||||||
|
Return the similarity of two statements based on
|
||||||
|
their calculated sentiment values.
|
||||||
|
|
||||||
|
:return: The percent of similarity between the sentiment value.
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
from nltk.sentiment.vader import SentimentIntensityAnalyzer
|
||||||
|
|
||||||
|
sentiment_analyzer = SentimentIntensityAnalyzer()
|
||||||
|
statement_polarity = sentiment_analyzer.polarity_scores(statement.text.lower())
|
||||||
|
statement2_polarity = sentiment_analyzer.polarity_scores(other_statement.text.lower())
|
||||||
|
|
||||||
|
statement_greatest_polarity = 'neu'
|
||||||
|
statement_greatest_score = -1
|
||||||
|
for polarity in sorted(statement_polarity):
|
||||||
|
if statement_polarity[polarity] > statement_greatest_score:
|
||||||
|
statement_greatest_polarity = polarity
|
||||||
|
statement_greatest_score = statement_polarity[polarity]
|
||||||
|
|
||||||
|
statement2_greatest_polarity = 'neu'
|
||||||
|
statement2_greatest_score = -1
|
||||||
|
for polarity in sorted(statement2_polarity):
|
||||||
|
if statement2_polarity[polarity] > statement2_greatest_score:
|
||||||
|
statement2_greatest_polarity = polarity
|
||||||
|
statement2_greatest_score = statement2_polarity[polarity]
|
||||||
|
|
||||||
|
# Check if the polarity if of a different type
|
||||||
|
if statement_greatest_polarity != statement2_greatest_polarity:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
values = [statement_greatest_score, statement2_greatest_score]
|
||||||
|
difference = max(values) - min(values)
|
||||||
|
|
||||||
|
return 1.0 - difference
|
||||||
|
|
||||||
|
|
||||||
|
class JaccardSimilarity(Comparator):
|
||||||
|
"""
|
||||||
|
Calculates the similarity of two statements based on the Jaccard index.
|
||||||
|
|
||||||
|
The Jaccard index is composed of a numerator and denominator.
|
||||||
|
In the numerator, we count the number of items that are shared between the sets.
|
||||||
|
In the denominator, we count the total number of items across both sets.
|
||||||
|
Let's say we define sentences to be equivalent if 50% or more of their tokens are equivalent.
|
||||||
|
Here are two sample sentences:
|
||||||
|
|
||||||
|
The young cat is hungry.
|
||||||
|
The cat is very hungry.
|
||||||
|
|
||||||
|
When we parse these sentences to remove stopwords, we end up with the following two sets:
|
||||||
|
|
||||||
|
{young, cat, hungry}
|
||||||
|
{cat, very, hungry}
|
||||||
|
|
||||||
|
In our example above, our intersection is {cat, hungry}, which has count of two.
|
||||||
|
The union of the sets is {young, cat, very, hungry}, which has a count of four.
|
||||||
|
Therefore, our `Jaccard similarity index`_ is two divided by four, or 50%.
|
||||||
|
Given our similarity threshold above, we would consider this to be a match.
|
||||||
|
|
||||||
|
.. _`Jaccard similarity index`: https://en.wikipedia.org/wiki/Jaccard_index
|
||||||
|
"""
|
||||||
|
|
||||||
|
SIMILARITY_THRESHOLD = 0.5
|
||||||
|
|
||||||
|
def initialize_nltk_wordnet(self):
|
||||||
|
"""
|
||||||
|
Download the NLTK wordnet corpora that is required for this algorithm
|
||||||
|
to run only if the corpora has not already been downloaded.
|
||||||
|
"""
|
||||||
|
from .utils import nltk_download_corpus
|
||||||
|
|
||||||
|
nltk_download_corpus('corpora/wordnet')
|
||||||
|
|
||||||
|
def compare(self, statement, other_statement):
|
||||||
|
"""
|
||||||
|
Return the calculated similarity of two
|
||||||
|
statements based on the Jaccard index.
|
||||||
|
"""
|
||||||
|
from nltk.corpus import wordnet
|
||||||
|
import nltk
|
||||||
|
import string
|
||||||
|
|
||||||
|
a = statement.text.lower()
|
||||||
|
b = other_statement.text.lower()
|
||||||
|
|
||||||
|
# Get default English stopwords and extend with punctuation
|
||||||
|
stopwords = nltk.corpus.stopwords.words('english')
|
||||||
|
stopwords.extend(string.punctuation)
|
||||||
|
stopwords.append('')
|
||||||
|
lemmatizer = nltk.stem.wordnet.WordNetLemmatizer()
|
||||||
|
|
||||||
|
def get_wordnet_pos(pos_tag):
|
||||||
|
if pos_tag[1].startswith('J'):
|
||||||
|
return (pos_tag[0], wordnet.ADJ)
|
||||||
|
elif pos_tag[1].startswith('V'):
|
||||||
|
return (pos_tag[0], wordnet.VERB)
|
||||||
|
elif pos_tag[1].startswith('N'):
|
||||||
|
return (pos_tag[0], wordnet.NOUN)
|
||||||
|
elif pos_tag[1].startswith('R'):
|
||||||
|
return (pos_tag[0], wordnet.ADV)
|
||||||
|
else:
|
||||||
|
return (pos_tag[0], wordnet.NOUN)
|
||||||
|
|
||||||
|
ratio = 0
|
||||||
|
pos_a = map(get_wordnet_pos, nltk.pos_tag(nltk.tokenize.word_tokenize(a)))
|
||||||
|
pos_b = map(get_wordnet_pos, nltk.pos_tag(nltk.tokenize.word_tokenize(b)))
|
||||||
|
lemma_a = [
|
||||||
|
lemmatizer.lemmatize(
|
||||||
|
token.strip(string.punctuation),
|
||||||
|
pos
|
||||||
|
) for token, pos in pos_a if pos == wordnet.NOUN and token.strip(
|
||||||
|
string.punctuation
|
||||||
|
) not in stopwords
|
||||||
|
]
|
||||||
|
lemma_b = [
|
||||||
|
lemmatizer.lemmatize(
|
||||||
|
token.strip(string.punctuation),
|
||||||
|
pos
|
||||||
|
) for token, pos in pos_b if pos == wordnet.NOUN and token.strip(
|
||||||
|
string.punctuation
|
||||||
|
) not in stopwords
|
||||||
|
]
|
||||||
|
|
||||||
|
# Calculate Jaccard similarity
|
||||||
|
try:
|
||||||
|
numerator = len(set(lemma_a).intersection(lemma_b))
|
||||||
|
denominator = float(len(set(lemma_a).union(lemma_b)))
|
||||||
|
ratio = numerator / denominator
|
||||||
|
except Exception as e:
|
||||||
|
print('Error', e)
|
||||||
|
return ratio >= self.SIMILARITY_THRESHOLD
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------- #
|
||||||
|
|
||||||
|
|
||||||
|
levenshtein_distance = LevenshteinDistance()
|
||||||
|
synset_distance = SynsetDistance()
|
||||||
|
sentiment_comparison = SentimentComparison()
|
||||||
|
jaccard_similarity = JaccardSimilarity()
|
@ -0,0 +1,15 @@
|
|||||||
|
"""
|
||||||
|
ChatterBot constants
|
||||||
|
"""
|
||||||
|
|
||||||
|
'''
|
||||||
|
The maximum length of characters that the text of a statement can contain.
|
||||||
|
This should be enforced on a per-model basis by the data model for each
|
||||||
|
storage adapter.
|
||||||
|
'''
|
||||||
|
STATEMENT_TEXT_MAX_LENGTH = 400
|
||||||
|
|
||||||
|
# The maximum length of characters that the name of a tag can contain
|
||||||
|
TAG_NAME_MAX_LENGTH = 50
|
||||||
|
|
||||||
|
DEFAULT_DJANGO_APP_NAME = 'django_chatterbot'
|
@ -0,0 +1,229 @@
|
|||||||
|
class StatementMixin(object):
|
||||||
|
"""
|
||||||
|
This class has shared methods used to
|
||||||
|
normalize different statement models.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def get_tags(self):
|
||||||
|
"""
|
||||||
|
Return the list of tags for this statement.
|
||||||
|
"""
|
||||||
|
return self.tags
|
||||||
|
|
||||||
|
def add_tags(self, tags):
|
||||||
|
"""
|
||||||
|
Add a list of strings to the statement as tags.
|
||||||
|
"""
|
||||||
|
for tag in tags:
|
||||||
|
self.tags.append(tag)
|
||||||
|
|
||||||
|
|
||||||
|
class Statement(StatementMixin):
|
||||||
|
"""
|
||||||
|
A statement represents a single spoken entity, sentence or
|
||||||
|
phrase that someone can say.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, text, **kwargs):
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Try not to allow non-string types to be passed to statements
|
||||||
|
try:
|
||||||
|
text = str(text)
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Prefer decoded utf8-strings in Python 2.7
|
||||||
|
if sys.version_info[0] < 3:
|
||||||
|
try:
|
||||||
|
text = text.decode('utf-8')
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
self.text = text
|
||||||
|
self.tags = kwargs.pop('tags', [])
|
||||||
|
self.in_response_to = kwargs.pop('in_response_to', [])
|
||||||
|
|
||||||
|
self.extra_data = kwargs.pop('extra_data', {})
|
||||||
|
|
||||||
|
# This is the confidence with which the chat bot believes
|
||||||
|
# this is an accurate response. This value is set when the
|
||||||
|
# statement is returned by the chat bot.
|
||||||
|
self.confidence = 0
|
||||||
|
|
||||||
|
self.storage = None
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.text
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<Statement text:%s>' % (self.text)
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(self.text)
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if not other:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if isinstance(other, Statement):
|
||||||
|
return self.text == other.text
|
||||||
|
|
||||||
|
return self.text == other
|
||||||
|
|
||||||
|
def save(self):
|
||||||
|
"""
|
||||||
|
Save the statement in the database.
|
||||||
|
"""
|
||||||
|
self.storage.update(self)
|
||||||
|
|
||||||
|
def add_extra_data(self, key, value):
|
||||||
|
"""
|
||||||
|
This method allows additional data to be stored on the statement object.
|
||||||
|
|
||||||
|
Typically this data is something that pertains just to this statement.
|
||||||
|
For example, a value stored here might be the tagged parts of speech for
|
||||||
|
each word in the statement text.
|
||||||
|
|
||||||
|
- key = 'pos_tags'
|
||||||
|
- value = [('Now', 'RB'), ('for', 'IN'), ('something', 'NN'), ('different', 'JJ')]
|
||||||
|
|
||||||
|
:param key: The key to use in the dictionary of extra data.
|
||||||
|
:type key: str
|
||||||
|
|
||||||
|
:param value: The value to set for the specified key.
|
||||||
|
"""
|
||||||
|
self.extra_data[key] = value
|
||||||
|
|
||||||
|
def add_response(self, response):
|
||||||
|
"""
|
||||||
|
Add the response to the list of statements that this statement is in response to.
|
||||||
|
If the response is already in the list, increment the occurrence count of that response.
|
||||||
|
|
||||||
|
:param response: The response to add.
|
||||||
|
:type response: `Response`
|
||||||
|
"""
|
||||||
|
if not isinstance(response, Response):
|
||||||
|
raise Statement.InvalidTypeException(
|
||||||
|
'A {} was received when a {} instance was expected'.format(
|
||||||
|
type(response),
|
||||||
|
type(Response(''))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
updated = False
|
||||||
|
for index in range(0, len(self.in_response_to)):
|
||||||
|
if response.text == self.in_response_to[index].text:
|
||||||
|
self.in_response_to[index].occurrence += 1
|
||||||
|
updated = True
|
||||||
|
|
||||||
|
if not updated:
|
||||||
|
self.in_response_to.append(response)
|
||||||
|
|
||||||
|
def remove_response(self, response_text):
|
||||||
|
"""
|
||||||
|
Removes a response from the statement's response list based
|
||||||
|
on the value of the response text.
|
||||||
|
|
||||||
|
:param response_text: The text of the response to be removed.
|
||||||
|
:type response_text: str
|
||||||
|
"""
|
||||||
|
for response in self.in_response_to:
|
||||||
|
if response_text == response.text:
|
||||||
|
self.in_response_to.remove(response)
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_response_count(self, statement):
|
||||||
|
"""
|
||||||
|
Find the number of times that the statement has been used
|
||||||
|
as a response to the current statement.
|
||||||
|
|
||||||
|
:param statement: The statement object to get the count for.
|
||||||
|
:type statement: `Statement`
|
||||||
|
|
||||||
|
:returns: Return the number of times the statement has been used as a response.
|
||||||
|
:rtype: int
|
||||||
|
"""
|
||||||
|
for response in self.in_response_to:
|
||||||
|
if statement.text == response.text:
|
||||||
|
return response.occurrence
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def serialize(self):
|
||||||
|
"""
|
||||||
|
:returns: A dictionary representation of the statement object.
|
||||||
|
:rtype: dict
|
||||||
|
"""
|
||||||
|
data = {}
|
||||||
|
|
||||||
|
data['text'] = self.text
|
||||||
|
data['in_response_to'] = []
|
||||||
|
data['extra_data'] = self.extra_data
|
||||||
|
|
||||||
|
for response in self.in_response_to:
|
||||||
|
data['in_response_to'].append(response.serialize())
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
@property
|
||||||
|
def response_statement_cache(self):
|
||||||
|
"""
|
||||||
|
This property is to allow ChatterBot Statement objects to
|
||||||
|
be swappable with Django Statement models.
|
||||||
|
"""
|
||||||
|
return self.in_response_to
|
||||||
|
|
||||||
|
class InvalidTypeException(Exception):
|
||||||
|
|
||||||
|
def __init__(self, value='Received an unexpected value type.'):
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return repr(self.value)
|
||||||
|
|
||||||
|
|
||||||
|
class Response(object):
|
||||||
|
"""
|
||||||
|
A response represents an entity which response to a statement.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, text, **kwargs):
|
||||||
|
from datetime import datetime
|
||||||
|
from dateutil import parser as date_parser
|
||||||
|
|
||||||
|
self.text = text
|
||||||
|
self.created_at = kwargs.get('created_at', datetime.now())
|
||||||
|
self.occurrence = kwargs.get('occurrence', 1)
|
||||||
|
|
||||||
|
if not isinstance(self.created_at, datetime):
|
||||||
|
self.created_at = date_parser.parse(self.created_at)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.text
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<Response text:%s>' % (self.text)
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(self.text)
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if not other:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if isinstance(other, Response):
|
||||||
|
return self.text == other.text
|
||||||
|
|
||||||
|
return self.text == other
|
||||||
|
|
||||||
|
def serialize(self):
|
||||||
|
data = {}
|
||||||
|
|
||||||
|
data['text'] = self.text
|
||||||
|
data['created_at'] = self.created_at.isoformat()
|
||||||
|
|
||||||
|
data['occurrence'] = self.occurrence
|
||||||
|
|
||||||
|
return data
|
@ -0,0 +1,11 @@
|
|||||||
|
"""
|
||||||
|
Seamlessly import the external chatterbot corpus module.
|
||||||
|
View the corpus on GitHub at https://github.com/gunthercox/chatterbot-corpus
|
||||||
|
"""
|
||||||
|
|
||||||
|
from chatterbot_corpus import Corpus
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = (
|
||||||
|
'Corpus',
|
||||||
|
)
|
@ -0,0 +1,3 @@
|
|||||||
|
default_app_config = (
|
||||||
|
'chatterbot.ext.django_chatterbot.apps.DjangoChatterBotConfig'
|
||||||
|
)
|
@ -0,0 +1,261 @@
|
|||||||
|
from chatterbot.conversation import StatementMixin
|
||||||
|
from chatterbot import constants
|
||||||
|
from django.db import models
|
||||||
|
from django.apps import apps
|
||||||
|
from django.utils import timezone
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
|
||||||
|
DJANGO_APP_NAME = constants.DEFAULT_DJANGO_APP_NAME
|
||||||
|
STATEMENT_MODEL = 'Statement'
|
||||||
|
RESPONSE_MODEL = 'Response'
|
||||||
|
|
||||||
|
if hasattr(settings, 'CHATTERBOT'):
|
||||||
|
"""
|
||||||
|
Allow related models to be overridden in the project settings.
|
||||||
|
Default to the original settings if one is not defined.
|
||||||
|
"""
|
||||||
|
DJANGO_APP_NAME = settings.CHATTERBOT.get(
|
||||||
|
'django_app_name',
|
||||||
|
DJANGO_APP_NAME
|
||||||
|
)
|
||||||
|
STATEMENT_MODEL = settings.CHATTERBOT.get(
|
||||||
|
'statement_model',
|
||||||
|
STATEMENT_MODEL
|
||||||
|
)
|
||||||
|
RESPONSE_MODEL = settings.CHATTERBOT.get(
|
||||||
|
'response_model',
|
||||||
|
RESPONSE_MODEL
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AbstractBaseStatement(models.Model, StatementMixin):
|
||||||
|
"""
|
||||||
|
The abstract base statement allows other models to
|
||||||
|
be created using the attributes that exist on the
|
||||||
|
default models.
|
||||||
|
"""
|
||||||
|
|
||||||
|
text = models.CharField(
|
||||||
|
unique=True,
|
||||||
|
blank=False,
|
||||||
|
null=False,
|
||||||
|
max_length=constants.STATEMENT_TEXT_MAX_LENGTH
|
||||||
|
)
|
||||||
|
|
||||||
|
extra_data = models.CharField(
|
||||||
|
max_length=500,
|
||||||
|
blank=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# This is the confidence with which the chat bot believes
|
||||||
|
# this is an accurate response. This value is set when the
|
||||||
|
# statement is returned by the chat bot.
|
||||||
|
confidence = 0
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
abstract = True
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
if len(self.text.strip()) > 60:
|
||||||
|
return '{}...'.format(self.text[:57])
|
||||||
|
elif len(self.text.strip()) > 0:
|
||||||
|
return self.text
|
||||||
|
return '<empty>'
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(AbstractBaseStatement, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
# Responses to be saved if the statement is updated with the storage adapter
|
||||||
|
self.response_statement_cache = []
|
||||||
|
|
||||||
|
@property
|
||||||
|
def in_response_to(self):
|
||||||
|
"""
|
||||||
|
Return the response objects that are for this statement.
|
||||||
|
"""
|
||||||
|
ResponseModel = apps.get_model(DJANGO_APP_NAME, RESPONSE_MODEL)
|
||||||
|
return ResponseModel.objects.filter(statement=self)
|
||||||
|
|
||||||
|
def add_extra_data(self, key, value):
|
||||||
|
"""
|
||||||
|
Add extra data to the extra_data field.
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
|
||||||
|
if not self.extra_data:
|
||||||
|
self.extra_data = '{}'
|
||||||
|
|
||||||
|
extra_data = json.loads(self.extra_data)
|
||||||
|
extra_data[key] = value
|
||||||
|
|
||||||
|
self.extra_data = json.dumps(extra_data)
|
||||||
|
|
||||||
|
def add_tags(self, tags):
|
||||||
|
"""
|
||||||
|
Add a list of strings to the statement as tags.
|
||||||
|
(Overrides the method from StatementMixin)
|
||||||
|
"""
|
||||||
|
for tag in tags:
|
||||||
|
self.tags.create(
|
||||||
|
name=tag
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_response(self, statement):
|
||||||
|
"""
|
||||||
|
Add a response to this statement.
|
||||||
|
"""
|
||||||
|
self.response_statement_cache.append(statement)
|
||||||
|
|
||||||
|
def remove_response(self, response_text):
|
||||||
|
"""
|
||||||
|
Removes a response from the statement's response list based
|
||||||
|
on the value of the response text.
|
||||||
|
|
||||||
|
:param response_text: The text of the response to be removed.
|
||||||
|
:type response_text: str
|
||||||
|
"""
|
||||||
|
is_deleted = False
|
||||||
|
response = self.in_response.filter(response__text=response_text)
|
||||||
|
|
||||||
|
if response.exists():
|
||||||
|
is_deleted = True
|
||||||
|
|
||||||
|
return is_deleted
|
||||||
|
|
||||||
|
def get_response_count(self, statement):
|
||||||
|
"""
|
||||||
|
Find the number of times that the statement has been used
|
||||||
|
as a response to the current statement.
|
||||||
|
|
||||||
|
:param statement: The statement object to get the count for.
|
||||||
|
:type statement: chatterbot.conversation.Statement
|
||||||
|
|
||||||
|
:returns: Return the number of times the statement has been used as a response.
|
||||||
|
:rtype: int
|
||||||
|
"""
|
||||||
|
return self.in_response.filter(response__text=statement.text).count()
|
||||||
|
|
||||||
|
def serialize(self):
|
||||||
|
"""
|
||||||
|
:returns: A dictionary representation of the statement object.
|
||||||
|
:rtype: dict
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
data = {}
|
||||||
|
|
||||||
|
if not self.extra_data:
|
||||||
|
self.extra_data = '{}'
|
||||||
|
|
||||||
|
data['text'] = self.text
|
||||||
|
data['in_response_to'] = []
|
||||||
|
data['extra_data'] = json.loads(self.extra_data)
|
||||||
|
|
||||||
|
for response in self.in_response.all():
|
||||||
|
data['in_response_to'].append(response.serialize())
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class AbstractBaseResponse(models.Model):
|
||||||
|
"""
|
||||||
|
The abstract base response allows other models to
|
||||||
|
be created using the attributes that exist on the
|
||||||
|
default models.
|
||||||
|
"""
|
||||||
|
|
||||||
|
statement = models.ForeignKey(
|
||||||
|
STATEMENT_MODEL,
|
||||||
|
related_name='in_response',
|
||||||
|
on_delete=models.CASCADE
|
||||||
|
)
|
||||||
|
|
||||||
|
response = models.ForeignKey(
|
||||||
|
STATEMENT_MODEL,
|
||||||
|
related_name='responses',
|
||||||
|
on_delete=models.CASCADE
|
||||||
|
)
|
||||||
|
|
||||||
|
created_at = models.DateTimeField(
|
||||||
|
default=timezone.now,
|
||||||
|
help_text='The date and time that this response was created at.'
|
||||||
|
)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
abstract = True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def occurrence(self):
|
||||||
|
"""
|
||||||
|
Return a count of the number of times this response has occurred.
|
||||||
|
"""
|
||||||
|
ResponseModel = apps.get_model(DJANGO_APP_NAME, RESPONSE_MODEL)
|
||||||
|
|
||||||
|
return ResponseModel.objects.filter(
|
||||||
|
statement__text=self.statement.text,
|
||||||
|
response__text=self.response.text
|
||||||
|
).count()
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
statement = self.statement.text
|
||||||
|
response = self.response.text
|
||||||
|
return '{} => {}'.format(
|
||||||
|
statement if len(statement) <= 20 else statement[:17] + '...',
|
||||||
|
response if len(response) <= 40 else response[:37] + '...'
|
||||||
|
)
|
||||||
|
|
||||||
|
def serialize(self):
|
||||||
|
"""
|
||||||
|
:returns: A dictionary representation of the statement object.
|
||||||
|
:rtype: dict
|
||||||
|
"""
|
||||||
|
data = {}
|
||||||
|
|
||||||
|
data['text'] = self.response.text
|
||||||
|
data['created_at'] = self.created_at.isoformat()
|
||||||
|
data['occurrence'] = self.occurrence
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class AbstractBaseConversation(models.Model):
|
||||||
|
"""
|
||||||
|
The abstract base conversation allows other models to
|
||||||
|
be created using the attributes that exist on the
|
||||||
|
default models.
|
||||||
|
"""
|
||||||
|
|
||||||
|
responses = models.ManyToManyField(
|
||||||
|
RESPONSE_MODEL,
|
||||||
|
related_name='conversations',
|
||||||
|
help_text='The responses in this conversation.'
|
||||||
|
)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
abstract = True
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return str(self.id)
|
||||||
|
|
||||||
|
|
||||||
|
class AbstractBaseTag(models.Model):
|
||||||
|
"""
|
||||||
|
The abstract base tag allows other models to
|
||||||
|
be created using the attributes that exist on the
|
||||||
|
default models.
|
||||||
|
"""
|
||||||
|
|
||||||
|
name = models.SlugField(
|
||||||
|
max_length=constants.TAG_NAME_MAX_LENGTH
|
||||||
|
)
|
||||||
|
|
||||||
|
statements = models.ManyToManyField(
|
||||||
|
STATEMENT_MODEL,
|
||||||
|
related_name='tags'
|
||||||
|
)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
abstract = True
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.name
|
@ -0,0 +1,31 @@
|
|||||||
|
from django.contrib import admin
|
||||||
|
from chatterbot.ext.django_chatterbot.models import (
|
||||||
|
Statement, Response, Conversation, Tag
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class StatementAdmin(admin.ModelAdmin):
|
||||||
|
list_display = ('text', )
|
||||||
|
list_filter = ('text', )
|
||||||
|
search_fields = ('text', )
|
||||||
|
|
||||||
|
|
||||||
|
class ResponseAdmin(admin.ModelAdmin):
|
||||||
|
list_display = ('statement', 'response', 'occurrence', )
|
||||||
|
search_fields = ['statement__text', 'response__text']
|
||||||
|
|
||||||
|
|
||||||
|
class ConversationAdmin(admin.ModelAdmin):
|
||||||
|
list_display = ('id', )
|
||||||
|
|
||||||
|
|
||||||
|
class TagAdmin(admin.ModelAdmin):
|
||||||
|
list_display = ('name', )
|
||||||
|
list_filter = ('name', )
|
||||||
|
search_fields = ('name', )
|
||||||
|
|
||||||
|
|
||||||
|
admin.site.register(Statement, StatementAdmin)
|
||||||
|
admin.site.register(Response, ResponseAdmin)
|
||||||
|
admin.site.register(Conversation, ConversationAdmin)
|
||||||
|
admin.site.register(Tag, TagAdmin)
|
@ -0,0 +1,8 @@
|
|||||||
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
|
||||||
|
class DjangoChatterBotConfig(AppConfig):
|
||||||
|
|
||||||
|
name = 'chatterbot.ext.django_chatterbot'
|
||||||
|
label = 'django_chatterbot'
|
||||||
|
verbose_name = 'Django ChatterBot'
|
@ -0,0 +1,42 @@
|
|||||||
|
"""
|
||||||
|
These factories are used to generate fake data for testing.
|
||||||
|
"""
|
||||||
|
import factory
|
||||||
|
from chatterbot.ext.django_chatterbot import models
|
||||||
|
from chatterbot import constants
|
||||||
|
from factory.django import DjangoModelFactory
|
||||||
|
|
||||||
|
|
||||||
|
class StatementFactory(DjangoModelFactory):
|
||||||
|
|
||||||
|
text = factory.Faker(
|
||||||
|
'text',
|
||||||
|
max_nb_chars=constants.STATEMENT_TEXT_MAX_LENGTH
|
||||||
|
)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
model = models.Statement
|
||||||
|
|
||||||
|
|
||||||
|
class ResponseFactory(DjangoModelFactory):
|
||||||
|
|
||||||
|
statement = factory.SubFactory(StatementFactory)
|
||||||
|
|
||||||
|
response = factory.SubFactory(StatementFactory)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
model = models.Response
|
||||||
|
|
||||||
|
|
||||||
|
class ConversationFactory(DjangoModelFactory):
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
model = models.Conversation
|
||||||
|
|
||||||
|
|
||||||
|
class TagFactory(DjangoModelFactory):
|
||||||
|
|
||||||
|
name = factory.Faker('word')
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
model = models.Tag
|
@ -0,0 +1,29 @@
|
|||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
"""
|
||||||
|
A Django management command for calling a
|
||||||
|
chat bot's training method.
|
||||||
|
"""
|
||||||
|
|
||||||
|
help = 'Trains the database used by the chat bot'
|
||||||
|
can_import_settings = True
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
from chatterbot import ChatBot
|
||||||
|
from chatterbot.ext.django_chatterbot import settings
|
||||||
|
|
||||||
|
chatterbot = ChatBot(**settings.CHATTERBOT)
|
||||||
|
|
||||||
|
chatterbot.train(chatterbot.training_data)
|
||||||
|
|
||||||
|
# Django 1.8 does not define SUCCESS
|
||||||
|
if hasattr(self.style, 'SUCCESS'):
|
||||||
|
style = self.style.SUCCESS
|
||||||
|
else:
|
||||||
|
style = self.style.NOTICE
|
||||||
|
|
||||||
|
self.stdout.write(style('Starting training...'))
|
||||||
|
training_class = chatterbot.trainer.__class__.__name__
|
||||||
|
self.stdout.write(style('ChatterBot trained using "%s"' % training_class))
|
@ -0,0 +1,39 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
import django.db.models.deletion
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
initial = True
|
||||||
|
|
||||||
|
dependencies = []
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='Response',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('occurrence', models.PositiveIntegerField(default=0)),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='Statement',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('text', models.CharField(max_length=255, unique=True)),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='response',
|
||||||
|
name='response',
|
||||||
|
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='+', to='django_chatterbot.Statement'),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='response',
|
||||||
|
name='statement',
|
||||||
|
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='in_response_to', to='django_chatterbot.Statement'),
|
||||||
|
),
|
||||||
|
]
|
@ -0,0 +1,21 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by Django 1.10.2 on 2016-10-30 12:13
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('django_chatterbot', '0001_initial'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='statement',
|
||||||
|
name='extra_data',
|
||||||
|
field=models.CharField(default='{}', max_length=500),
|
||||||
|
preserve_default=False,
|
||||||
|
),
|
||||||
|
]
|
@ -0,0 +1,20 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by Django 1.9 on 2016-12-12 00:06
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('django_chatterbot', '0002_statement_extra_data'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='response',
|
||||||
|
name='occurrence',
|
||||||
|
field=models.PositiveIntegerField(default=1),
|
||||||
|
),
|
||||||
|
]
|
@ -0,0 +1,26 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by Django 1.10.3 on 2016-12-04 23:52
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
import django.db.models.deletion
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('django_chatterbot', '0003_change_occurrence_default'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='response',
|
||||||
|
name='statement',
|
||||||
|
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='in_response', to='django_chatterbot.Statement'),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='response',
|
||||||
|
name='response',
|
||||||
|
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='responses', to='django_chatterbot.Statement'),
|
||||||
|
),
|
||||||
|
]
|
@ -0,0 +1,24 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by Django 1.10.1 on 2016-12-29 19:20
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
import django.utils.timezone
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('django_chatterbot', '0004_rename_in_response_to'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='statement',
|
||||||
|
name='created_at',
|
||||||
|
field=models.DateTimeField(
|
||||||
|
default=django.utils.timezone.now,
|
||||||
|
help_text='The date and time that this statement was created at.'
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
@ -0,0 +1,33 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by Django 1.9 on 2017-01-17 07:02
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
import django.db.models.deletion
|
||||||
|
import django.utils.timezone
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('django_chatterbot', '0005_statement_created_at'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='Conversation',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='statement',
|
||||||
|
name='created_at',
|
||||||
|
field=models.DateTimeField(default=django.utils.timezone.now, help_text='The date and time that this statement was created at.'),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='conversation',
|
||||||
|
name='statements',
|
||||||
|
field=models.ManyToManyField(help_text='The statements in this conversation.', related_name='conversation', to='django_chatterbot.Statement'),
|
||||||
|
),
|
||||||
|
]
|
@ -0,0 +1,24 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by Django 1.11 on 2017-07-18 00:16
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
import django.utils.timezone
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('django_chatterbot', '0006_create_conversation'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='response',
|
||||||
|
name='created_at',
|
||||||
|
field=models.DateTimeField(
|
||||||
|
default=django.utils.timezone.now,
|
||||||
|
help_text='The date and time that this response was created at.'
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
@ -0,0 +1,32 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by Django 1.11 on 2017-07-18 11:25
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('django_chatterbot', '0007_response_created_at'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='conversation',
|
||||||
|
name='statements',
|
||||||
|
),
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='response',
|
||||||
|
name='occurrence',
|
||||||
|
),
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='statement',
|
||||||
|
name='created_at',
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='conversation',
|
||||||
|
name='responses',
|
||||||
|
field=models.ManyToManyField(help_text='The responses in this conversation.', related_name='conversations', to='django_chatterbot.Response'),
|
||||||
|
),
|
||||||
|
]
|
@ -0,0 +1,35 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by Django 1.11a1 on 2017-07-07 00:12
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('django_chatterbot', '0008_update_conversations'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='Tag',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('name', models.SlugField()),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='statement',
|
||||||
|
name='text',
|
||||||
|
field=models.CharField(max_length=255, unique=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='tag',
|
||||||
|
name='statements',
|
||||||
|
field=models.ManyToManyField(related_name='tags', to='django_chatterbot.Statement'),
|
||||||
|
),
|
||||||
|
]
|
@ -0,0 +1,20 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by Django 1.11.4 on 2017-08-16 00:56
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('django_chatterbot', '0009_tags'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='statement',
|
||||||
|
name='text',
|
||||||
|
field=models.CharField(max_length=400, unique=True),
|
||||||
|
),
|
||||||
|
]
|
@ -0,0 +1,20 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by Django 1.11.4 on 2017-08-20 13:55
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('django_chatterbot', '0010_statement_text'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='statement',
|
||||||
|
name='extra_data',
|
||||||
|
field=models.CharField(blank=True, max_length=500),
|
||||||
|
),
|
||||||
|
]
|
@ -0,0 +1,34 @@
|
|||||||
|
from chatterbot.ext.django_chatterbot.abstract_models import (
|
||||||
|
AbstractBaseConversation, AbstractBaseResponse,
|
||||||
|
AbstractBaseStatement, AbstractBaseTag
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Statement(AbstractBaseStatement):
|
||||||
|
"""
|
||||||
|
A statement represents a single spoken entity, sentence or
|
||||||
|
phrase that someone can say.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Response(AbstractBaseResponse):
|
||||||
|
"""
|
||||||
|
A connection between a statement and anther statement
|
||||||
|
that response to it.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Conversation(AbstractBaseConversation):
|
||||||
|
"""
|
||||||
|
A sequence of statements representing a conversation.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Tag(AbstractBaseTag):
|
||||||
|
"""
|
||||||
|
A label that categorizes a statement.
|
||||||
|
"""
|
||||||
|
pass
|
@ -0,0 +1,19 @@
|
|||||||
|
"""
|
||||||
|
Default ChatterBot settings for Django.
|
||||||
|
"""
|
||||||
|
from django.conf import settings
|
||||||
|
from chatterbot import constants
|
||||||
|
|
||||||
|
|
||||||
|
CHATTERBOT_SETTINGS = getattr(settings, 'CHATTERBOT', {})
|
||||||
|
|
||||||
|
CHATTERBOT_DEFAULTS = {
|
||||||
|
'name': 'ChatterBot',
|
||||||
|
'storage_adapter': 'chatterbot.storage.DjangoStorageAdapter',
|
||||||
|
'input_adapter': 'chatterbot.input.VariableInputTypeAdapter',
|
||||||
|
'output_adapter': 'chatterbot.output.OutputAdapter',
|
||||||
|
'django_app_name': constants.DEFAULT_DJANGO_APP_NAME
|
||||||
|
}
|
||||||
|
|
||||||
|
CHATTERBOT = CHATTERBOT_DEFAULTS.copy()
|
||||||
|
CHATTERBOT.update(CHATTERBOT_SETTINGS)
|
@ -0,0 +1,11 @@
|
|||||||
|
from django.conf.urls import url
|
||||||
|
from .views import ChatterBotView
|
||||||
|
|
||||||
|
|
||||||
|
urlpatterns = [
|
||||||
|
url(
|
||||||
|
r'^$',
|
||||||
|
ChatterBotView.as_view(),
|
||||||
|
name='chatterbot',
|
||||||
|
),
|
||||||
|
]
|
@ -0,0 +1,118 @@
|
|||||||
|
import json
|
||||||
|
from django.views.generic import View
|
||||||
|
from django.http import JsonResponse
|
||||||
|
from chatterbot import ChatBot
|
||||||
|
from chatterbot.ext.django_chatterbot import settings
|
||||||
|
|
||||||
|
|
||||||
|
class ChatterBotViewMixin(object):
|
||||||
|
"""
|
||||||
|
Subclass this mixin for access to the 'chatterbot' attribute.
|
||||||
|
"""
|
||||||
|
|
||||||
|
chatterbot = ChatBot(**settings.CHATTERBOT)
|
||||||
|
|
||||||
|
def validate(self, data):
|
||||||
|
"""
|
||||||
|
Validate the data recieved from the client.
|
||||||
|
|
||||||
|
* The data should contain a text attribute.
|
||||||
|
"""
|
||||||
|
from django.core.exceptions import ValidationError
|
||||||
|
|
||||||
|
if 'text' not in data:
|
||||||
|
raise ValidationError('The attribute "text" is required.')
|
||||||
|
|
||||||
|
def get_conversation(self, request):
|
||||||
|
"""
|
||||||
|
Return the conversation for the session if one exists.
|
||||||
|
Create a new conversation if one does not exist.
|
||||||
|
"""
|
||||||
|
from chatterbot.ext.django_chatterbot.models import Conversation, Response
|
||||||
|
|
||||||
|
class Obj(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.id = None
|
||||||
|
self.statements = []
|
||||||
|
|
||||||
|
conversation = Obj()
|
||||||
|
|
||||||
|
conversation.id = request.session.get('conversation_id', 0)
|
||||||
|
existing_conversation = False
|
||||||
|
try:
|
||||||
|
Conversation.objects.get(id=conversation.id)
|
||||||
|
existing_conversation = True
|
||||||
|
|
||||||
|
except Conversation.DoesNotExist:
|
||||||
|
conversation_id = self.chatterbot.storage.create_conversation()
|
||||||
|
request.session['conversation_id'] = conversation_id
|
||||||
|
conversation.id = conversation_id
|
||||||
|
|
||||||
|
if existing_conversation:
|
||||||
|
responses = Response.objects.filter(
|
||||||
|
conversations__id=conversation.id
|
||||||
|
)
|
||||||
|
|
||||||
|
for response in responses:
|
||||||
|
conversation.statements.append(response.statement.serialize())
|
||||||
|
conversation.statements.append(response.response.serialize())
|
||||||
|
|
||||||
|
return conversation
|
||||||
|
|
||||||
|
|
||||||
|
class ChatterBotView(ChatterBotViewMixin, View):
|
||||||
|
"""
|
||||||
|
Provide an API endpoint to interact with ChatterBot.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def post(self, request, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Return a response to the statement in the posted data.
|
||||||
|
"""
|
||||||
|
input_data = json.loads(request.read().decode('utf-8'))
|
||||||
|
|
||||||
|
self.validate(input_data)
|
||||||
|
|
||||||
|
conversation = self.get_conversation(request)
|
||||||
|
|
||||||
|
response = self.chatterbot.get_response(input_data, conversation.id)
|
||||||
|
response_data = response.serialize()
|
||||||
|
|
||||||
|
return JsonResponse(response_data, status=200)
|
||||||
|
|
||||||
|
def get(self, request, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Return data corresponding to the current conversation.
|
||||||
|
"""
|
||||||
|
conversation = self.get_conversation(request)
|
||||||
|
|
||||||
|
data = {
|
||||||
|
'detail': 'You should make a POST request to this endpoint.',
|
||||||
|
'name': self.chatterbot.name,
|
||||||
|
'conversation': conversation.statements
|
||||||
|
}
|
||||||
|
|
||||||
|
# Return a method not allowed response
|
||||||
|
return JsonResponse(data, status=405)
|
||||||
|
|
||||||
|
def patch(self, request, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
The patch method is not allowed for this endpoint.
|
||||||
|
"""
|
||||||
|
data = {
|
||||||
|
'detail': 'You should make a POST request to this endpoint.'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Return a method not allowed response
|
||||||
|
return JsonResponse(data, status=405)
|
||||||
|
|
||||||
|
def delete(self, request, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
The delete method is not allowed for this endpoint.
|
||||||
|
"""
|
||||||
|
data = {
|
||||||
|
'detail': 'You should make a POST request to this endpoint.'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Return a method not allowed response
|
||||||
|
return JsonResponse(data, status=405)
|
@ -0,0 +1,132 @@
|
|||||||
|
from sqlalchemy import Table, Column, Integer, DateTime, ForeignKey, PickleType
|
||||||
|
from sqlalchemy.orm import relationship
|
||||||
|
from sqlalchemy.sql import func
|
||||||
|
from sqlalchemy.ext.declarative import declared_attr, declarative_base
|
||||||
|
|
||||||
|
from chatterbot.constants import TAG_NAME_MAX_LENGTH, STATEMENT_TEXT_MAX_LENGTH
|
||||||
|
from chatterbot.ext.sqlalchemy_app.types import UnicodeString
|
||||||
|
from chatterbot.conversation import StatementMixin
|
||||||
|
|
||||||
|
|
||||||
|
class ModelBase(object):
|
||||||
|
"""
|
||||||
|
An augmented base class for SqlAlchemy models.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@declared_attr
|
||||||
|
def __tablename__(cls):
|
||||||
|
"""
|
||||||
|
Return the lowercase class name as the name of the table.
|
||||||
|
"""
|
||||||
|
return cls.__name__.lower()
|
||||||
|
|
||||||
|
id = Column(
|
||||||
|
Integer,
|
||||||
|
primary_key=True,
|
||||||
|
autoincrement=True
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
Base = declarative_base(cls=ModelBase)
|
||||||
|
|
||||||
|
|
||||||
|
tag_association_table = Table(
|
||||||
|
'tag_association',
|
||||||
|
Base.metadata,
|
||||||
|
Column('tag_id', Integer, ForeignKey('tag.id')),
|
||||||
|
Column('statement_id', Integer, ForeignKey('statement.id'))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Tag(Base):
|
||||||
|
"""
|
||||||
|
A tag that describes a statement.
|
||||||
|
"""
|
||||||
|
|
||||||
|
name = Column(UnicodeString(TAG_NAME_MAX_LENGTH))
|
||||||
|
|
||||||
|
|
||||||
|
class Statement(Base, StatementMixin):
|
||||||
|
"""
|
||||||
|
A Statement represents a sentence or phrase.
|
||||||
|
"""
|
||||||
|
|
||||||
|
text = Column(UnicodeString(STATEMENT_TEXT_MAX_LENGTH), unique=True)
|
||||||
|
|
||||||
|
tags = relationship(
|
||||||
|
'Tag',
|
||||||
|
secondary=lambda: tag_association_table,
|
||||||
|
backref='statements'
|
||||||
|
)
|
||||||
|
|
||||||
|
extra_data = Column(PickleType)
|
||||||
|
|
||||||
|
in_response_to = relationship(
|
||||||
|
'Response',
|
||||||
|
back_populates='statement_table'
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_tags(self):
|
||||||
|
"""
|
||||||
|
Return a list of tags for this statement.
|
||||||
|
"""
|
||||||
|
return [tag.name for tag in self.tags]
|
||||||
|
|
||||||
|
def get_statement(self):
|
||||||
|
from chatterbot.conversation import Statement as StatementObject
|
||||||
|
from chatterbot.conversation import Response as ResponseObject
|
||||||
|
|
||||||
|
statement = StatementObject(
|
||||||
|
self.text,
|
||||||
|
tags=[tag.name for tag in self.tags],
|
||||||
|
extra_data=self.extra_data
|
||||||
|
)
|
||||||
|
for response in self.in_response_to:
|
||||||
|
statement.add_response(
|
||||||
|
ResponseObject(text=response.text, occurrence=response.occurrence)
|
||||||
|
)
|
||||||
|
return statement
|
||||||
|
|
||||||
|
|
||||||
|
class Response(Base):
|
||||||
|
"""
|
||||||
|
Response, contains responses related to a given statement.
|
||||||
|
"""
|
||||||
|
|
||||||
|
text = Column(UnicodeString(STATEMENT_TEXT_MAX_LENGTH))
|
||||||
|
|
||||||
|
created_at = Column(
|
||||||
|
DateTime(timezone=True),
|
||||||
|
server_default=func.now()
|
||||||
|
)
|
||||||
|
|
||||||
|
occurrence = Column(Integer, default=1)
|
||||||
|
|
||||||
|
statement_text = Column(UnicodeString(STATEMENT_TEXT_MAX_LENGTH), ForeignKey('statement.text'))
|
||||||
|
|
||||||
|
statement_table = relationship(
|
||||||
|
'Statement',
|
||||||
|
back_populates='in_response_to',
|
||||||
|
cascade='all',
|
||||||
|
uselist=False
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
conversation_association_table = Table(
|
||||||
|
'conversation_association',
|
||||||
|
Base.metadata,
|
||||||
|
Column('conversation_id', Integer, ForeignKey('conversation.id')),
|
||||||
|
Column('statement_id', Integer, ForeignKey('statement.id'))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Conversation(Base):
|
||||||
|
"""
|
||||||
|
A conversation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
statements = relationship(
|
||||||
|
'Statement',
|
||||||
|
secondary=lambda: conversation_association_table,
|
||||||
|
backref='conversations'
|
||||||
|
)
|
@ -0,0 +1,21 @@
|
|||||||
|
from sqlalchemy.types import TypeDecorator, Unicode
|
||||||
|
|
||||||
|
|
||||||
|
class UnicodeString(TypeDecorator):
|
||||||
|
"""
|
||||||
|
Type for unicode strings.
|
||||||
|
"""
|
||||||
|
|
||||||
|
impl = Unicode
|
||||||
|
|
||||||
|
def process_bind_param(self, value, dialect):
|
||||||
|
"""
|
||||||
|
Coerce Python bytestrings to unicode before
|
||||||
|
saving them to the database.
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
|
||||||
|
if sys.version_info[0] < 3:
|
||||||
|
if isinstance(value, str):
|
||||||
|
value = value.decode('utf-8')
|
||||||
|
return value
|
@ -0,0 +1,47 @@
|
|||||||
|
"""
|
||||||
|
Filters set the base query that gets passed to the storage adapter.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class Filter(object):
|
||||||
|
"""
|
||||||
|
A base filter object from which all other
|
||||||
|
filters should be subclassed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def filter_selection(self, chatterbot, conversation_id):
|
||||||
|
"""
|
||||||
|
Because this is the base filter class, this method just
|
||||||
|
returns the storage adapter's base query. Other filters
|
||||||
|
are expected to override this method.
|
||||||
|
"""
|
||||||
|
return chatterbot.storage.base_query
|
||||||
|
|
||||||
|
|
||||||
|
class RepetitiveResponseFilter(Filter):
|
||||||
|
"""
|
||||||
|
A filter that eliminates possibly repetitive responses to prevent
|
||||||
|
a chat bot from repeating statements that it has recently said.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def filter_selection(self, chatterbot, conversation_id):
|
||||||
|
|
||||||
|
text_of_recent_responses = []
|
||||||
|
|
||||||
|
# TODO: Add a larger quantity of response history
|
||||||
|
latest_response = chatterbot.storage.get_latest_response(conversation_id)
|
||||||
|
if latest_response:
|
||||||
|
text_of_recent_responses.append(latest_response.text)
|
||||||
|
|
||||||
|
# Return the query with no changes if there are no statements to exclude
|
||||||
|
if not text_of_recent_responses:
|
||||||
|
return super(RepetitiveResponseFilter, self).filter_selection(
|
||||||
|
chatterbot,
|
||||||
|
conversation_id
|
||||||
|
)
|
||||||
|
|
||||||
|
query = chatterbot.storage.base_query.statement_text_not_in(
|
||||||
|
text_of_recent_responses
|
||||||
|
)
|
||||||
|
|
||||||
|
return query
|
@ -0,0 +1,18 @@
|
|||||||
|
from .input_adapter import InputAdapter
|
||||||
|
from .microsoft import Microsoft
|
||||||
|
from .gitter import Gitter
|
||||||
|
from .hipchat import HipChat
|
||||||
|
from .mailgun import Mailgun
|
||||||
|
from .terminal import TerminalAdapter
|
||||||
|
from .variable_input_type_adapter import VariableInputTypeAdapter
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = (
|
||||||
|
'InputAdapter',
|
||||||
|
'Microsoft',
|
||||||
|
'Gitter',
|
||||||
|
'HipChat',
|
||||||
|
'Mailgun',
|
||||||
|
'TerminalAdapter',
|
||||||
|
'VariableInputTypeAdapter',
|
||||||
|
)
|
@ -0,0 +1,176 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
from time import sleep
|
||||||
|
from chatterbot.input import InputAdapter
|
||||||
|
from chatterbot.conversation import Statement
|
||||||
|
|
||||||
|
|
||||||
|
class Gitter(InputAdapter):
|
||||||
|
"""
|
||||||
|
An input adapter that allows a ChatterBot instance to get
|
||||||
|
input statements from a Gitter room.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(Gitter, self).__init__(**kwargs)
|
||||||
|
|
||||||
|
self.gitter_host = kwargs.get('gitter_host', 'https://api.gitter.im/v1/')
|
||||||
|
self.gitter_room = kwargs.get('gitter_room')
|
||||||
|
self.gitter_api_token = kwargs.get('gitter_api_token')
|
||||||
|
self.only_respond_to_mentions = kwargs.get('gitter_only_respond_to_mentions', True)
|
||||||
|
self.sleep_time = kwargs.get('gitter_sleep_time', 4)
|
||||||
|
|
||||||
|
authorization_header = 'Bearer {}'.format(self.gitter_api_token)
|
||||||
|
|
||||||
|
self.headers = {
|
||||||
|
'Authorization': authorization_header,
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Accept': 'application/json'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Join the Gitter room
|
||||||
|
room_data = self.join_room(self.gitter_room)
|
||||||
|
self.room_id = room_data.get('id')
|
||||||
|
|
||||||
|
user_data = self.get_user_data()
|
||||||
|
self.user_id = user_data[0].get('id')
|
||||||
|
self.username = user_data[0].get('username')
|
||||||
|
|
||||||
|
def _validate_status_code(self, response):
|
||||||
|
code = response.status_code
|
||||||
|
if code not in [200, 201]:
|
||||||
|
raise self.HTTPStatusException('{} status code recieved'.format(code))
|
||||||
|
|
||||||
|
def join_room(self, room_name):
|
||||||
|
"""
|
||||||
|
Join the specified Gitter room.
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
|
||||||
|
endpoint = '{}rooms'.format(self.gitter_host)
|
||||||
|
response = requests.post(
|
||||||
|
endpoint,
|
||||||
|
headers=self.headers,
|
||||||
|
json={'uri': room_name}
|
||||||
|
)
|
||||||
|
self.logger.info('{} joining room {}'.format(
|
||||||
|
response.status_code, endpoint
|
||||||
|
))
|
||||||
|
self._validate_status_code(response)
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
def get_user_data(self):
|
||||||
|
import requests
|
||||||
|
|
||||||
|
endpoint = '{}user'.format(self.gitter_host)
|
||||||
|
response = requests.get(
|
||||||
|
endpoint,
|
||||||
|
headers=self.headers
|
||||||
|
)
|
||||||
|
self.logger.info('{} retrieving user data {}'.format(
|
||||||
|
response.status_code, endpoint
|
||||||
|
))
|
||||||
|
self._validate_status_code(response)
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
def mark_messages_as_read(self, message_ids):
|
||||||
|
"""
|
||||||
|
Mark the specified message ids as read.
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
|
||||||
|
endpoint = '{}user/{}/rooms/{}/unreadItems'.format(
|
||||||
|
self.gitter_host, self.user_id, self.room_id
|
||||||
|
)
|
||||||
|
response = requests.post(
|
||||||
|
endpoint,
|
||||||
|
headers=self.headers,
|
||||||
|
json={'chat': message_ids}
|
||||||
|
)
|
||||||
|
self.logger.info('{} marking messages as read {}'.format(
|
||||||
|
response.status_code, endpoint
|
||||||
|
))
|
||||||
|
self._validate_status_code(response)
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
def get_most_recent_message(self):
|
||||||
|
"""
|
||||||
|
Get the most recent message from the Gitter room.
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
|
||||||
|
endpoint = '{}rooms/{}/chatMessages?limit=1'.format(self.gitter_host, self.room_id)
|
||||||
|
response = requests.get(
|
||||||
|
endpoint,
|
||||||
|
headers=self.headers
|
||||||
|
)
|
||||||
|
self.logger.info('{} getting most recent message'.format(
|
||||||
|
response.status_code
|
||||||
|
))
|
||||||
|
self._validate_status_code(response)
|
||||||
|
data = response.json()
|
||||||
|
if data:
|
||||||
|
return data[0]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _contains_mention(self, mentions):
|
||||||
|
for mention in mentions:
|
||||||
|
if self.username == mention.get('screenName'):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def should_respond(self, data):
|
||||||
|
"""
|
||||||
|
Takes the API response data from a single message.
|
||||||
|
Returns true if the chat bot should respond.
|
||||||
|
"""
|
||||||
|
if data:
|
||||||
|
unread = data.get('unread', False)
|
||||||
|
|
||||||
|
if self.only_respond_to_mentions:
|
||||||
|
if unread and self._contains_mention(data['mentions']):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
elif unread:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def remove_mentions(self, text):
|
||||||
|
"""
|
||||||
|
Return a string that has no leading mentions.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
text_without_mentions = re.sub(r'@\S+', '', text)
|
||||||
|
|
||||||
|
# Remove consecutive spaces
|
||||||
|
text_without_mentions = re.sub(' +', ' ', text_without_mentions.strip())
|
||||||
|
|
||||||
|
return text_without_mentions
|
||||||
|
|
||||||
|
def process_input(self, statement):
|
||||||
|
new_message = False
|
||||||
|
|
||||||
|
while not new_message:
|
||||||
|
data = self.get_most_recent_message()
|
||||||
|
if self.should_respond(data):
|
||||||
|
self.mark_messages_as_read([data['id']])
|
||||||
|
new_message = True
|
||||||
|
sleep(self.sleep_time)
|
||||||
|
|
||||||
|
text = self.remove_mentions(data['text'])
|
||||||
|
statement = Statement(text)
|
||||||
|
|
||||||
|
return statement
|
||||||
|
|
||||||
|
class HTTPStatusException(Exception):
|
||||||
|
"""
|
||||||
|
Exception raised when unexpected non-success HTTP
|
||||||
|
status codes are returned in a response.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, value):
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return repr(self.value)
|
@ -0,0 +1,113 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
from time import sleep
|
||||||
|
from chatterbot.input import InputAdapter
|
||||||
|
from chatterbot.conversation import Statement
|
||||||
|
|
||||||
|
|
||||||
|
class HipChat(InputAdapter):
|
||||||
|
"""
|
||||||
|
An input adapter that allows a ChatterBot instance to get
|
||||||
|
input statements from a HipChat room.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(HipChat, self).__init__(**kwargs)
|
||||||
|
|
||||||
|
self.hipchat_host = kwargs.get('hipchat_host')
|
||||||
|
self.hipchat_access_token = kwargs.get('hipchat_access_token')
|
||||||
|
self.hipchat_room = kwargs.get('hipchat_room')
|
||||||
|
self.session_id = str(self.chatbot.default_session.uuid)
|
||||||
|
|
||||||
|
import requests
|
||||||
|
self.session = requests.Session()
|
||||||
|
self.session.verify = kwargs.get('ssl_verify', True)
|
||||||
|
|
||||||
|
authorization_header = 'Bearer {}'.format(self.hipchat_access_token)
|
||||||
|
|
||||||
|
self.headers = {
|
||||||
|
'Authorization': authorization_header,
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
|
||||||
|
# This is a list of the messages that have been responded to
|
||||||
|
self.recent_message_ids = self.get_initial_ids()
|
||||||
|
|
||||||
|
def get_initial_ids(self):
|
||||||
|
"""
|
||||||
|
Returns a list of the most recent message ids.
|
||||||
|
"""
|
||||||
|
data = self.view_recent_room_history(
|
||||||
|
self.hipchat_room,
|
||||||
|
max_results=75
|
||||||
|
)
|
||||||
|
|
||||||
|
results = set()
|
||||||
|
|
||||||
|
for item in data['items']:
|
||||||
|
results.add(item['id'])
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def view_recent_room_history(self, room_id_or_name, max_results=1):
|
||||||
|
"""
|
||||||
|
https://www.hipchat.com/docs/apiv2/method/view_recent_room_history
|
||||||
|
"""
|
||||||
|
|
||||||
|
recent_histroy_url = '{}/v2/room/{}/history?max-results={}'.format(
|
||||||
|
self.hipchat_host,
|
||||||
|
room_id_or_name,
|
||||||
|
max_results
|
||||||
|
)
|
||||||
|
|
||||||
|
response = self.session.get(
|
||||||
|
recent_histroy_url,
|
||||||
|
headers=self.headers
|
||||||
|
)
|
||||||
|
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
def get_most_recent_message(self, room_id_or_name):
|
||||||
|
"""
|
||||||
|
Return the most recent message from the HipChat room.
|
||||||
|
"""
|
||||||
|
data = self.view_recent_room_history(room_id_or_name)
|
||||||
|
|
||||||
|
items = data['items']
|
||||||
|
|
||||||
|
if not items:
|
||||||
|
return None
|
||||||
|
return items[-1]
|
||||||
|
|
||||||
|
def process_input(self, statement):
|
||||||
|
"""
|
||||||
|
Process input from the HipChat room.
|
||||||
|
"""
|
||||||
|
new_message = False
|
||||||
|
|
||||||
|
response_statement = self.chatbot.storage.get_latest_response(
|
||||||
|
self.session_id
|
||||||
|
)
|
||||||
|
|
||||||
|
if response_statement:
|
||||||
|
last_message_id = response_statement.extra_data.get(
|
||||||
|
'hipchat_message_id', None
|
||||||
|
)
|
||||||
|
if last_message_id:
|
||||||
|
self.recent_message_ids.add(last_message_id)
|
||||||
|
|
||||||
|
while not new_message:
|
||||||
|
data = self.get_most_recent_message(self.hipchat_room)
|
||||||
|
|
||||||
|
if data and data['id'] not in self.recent_message_ids:
|
||||||
|
self.recent_message_ids.add(data['id'])
|
||||||
|
new_message = True
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
sleep(3.5)
|
||||||
|
|
||||||
|
text = data['message']
|
||||||
|
|
||||||
|
statement = Statement(text)
|
||||||
|
statement.add_extra_data('hipchat_message_id', data['id'])
|
||||||
|
|
||||||
|
return statement
|
@ -0,0 +1,33 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
from chatterbot.adapters import Adapter
|
||||||
|
|
||||||
|
|
||||||
|
class InputAdapter(Adapter):
|
||||||
|
"""
|
||||||
|
This is an abstract class that represents the
|
||||||
|
interface that all input adapters should implement.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def process_input(self, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Returns a statement object based on the input source.
|
||||||
|
"""
|
||||||
|
raise self.AdapterMethodNotImplementedError()
|
||||||
|
|
||||||
|
def process_input_statement(self, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Return an existing statement object (if one exists).
|
||||||
|
"""
|
||||||
|
input_statement = self.process_input(*args, **kwargs)
|
||||||
|
|
||||||
|
self.logger.info('Received input statement: {}'.format(input_statement.text))
|
||||||
|
|
||||||
|
existing_statement = self.chatbot.storage.find(input_statement.text)
|
||||||
|
|
||||||
|
if existing_statement:
|
||||||
|
self.logger.info('"{}" is a known statement'.format(input_statement.text))
|
||||||
|
input_statement = existing_statement
|
||||||
|
else:
|
||||||
|
self.logger.info('"{}" is not a known statement'.format(input_statement.text))
|
||||||
|
|
||||||
|
return input_statement
|
@ -0,0 +1,61 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
import datetime
|
||||||
|
from chatterbot.input import InputAdapter
|
||||||
|
from chatterbot.conversation import Statement
|
||||||
|
|
||||||
|
|
||||||
|
class Mailgun(InputAdapter):
|
||||||
|
"""
|
||||||
|
Get input from Mailgun.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(Mailgun, self).__init__(**kwargs)
|
||||||
|
|
||||||
|
# Use the bot's name for the name of the sender
|
||||||
|
self.name = kwargs.get('name')
|
||||||
|
self.from_address = kwargs.get('mailgun_from_address')
|
||||||
|
self.api_key = kwargs.get('mailgun_api_key')
|
||||||
|
self.endpoint = kwargs.get('mailgun_api_endpoint')
|
||||||
|
|
||||||
|
def get_email_stored_events(self):
|
||||||
|
import requests
|
||||||
|
|
||||||
|
yesterday = datetime.datetime.now() - datetime.timedelta(1)
|
||||||
|
return requests.get(
|
||||||
|
'{}/events'.format(self.endpoint),
|
||||||
|
auth=('api', self.api_key),
|
||||||
|
params={
|
||||||
|
'begin': yesterday.isoformat(),
|
||||||
|
'ascending': 'yes',
|
||||||
|
'limit': 1
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_stored_email_urls(self):
|
||||||
|
response = self.get_email_stored_events()
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
for item in data.get('items', []):
|
||||||
|
if 'storage' in item:
|
||||||
|
if 'url' in item['storage']:
|
||||||
|
yield item['storage']['url']
|
||||||
|
|
||||||
|
def get_message(self, url):
|
||||||
|
import requests
|
||||||
|
|
||||||
|
return requests.get(
|
||||||
|
url,
|
||||||
|
auth=('api', self.api_key)
|
||||||
|
)
|
||||||
|
|
||||||
|
def process_input(self, statement):
|
||||||
|
urls = self.get_stored_email_urls()
|
||||||
|
url = list(urls)[0]
|
||||||
|
|
||||||
|
response = self.get_message(url)
|
||||||
|
message = response.json()
|
||||||
|
|
||||||
|
text = message.get('stripped-text')
|
||||||
|
|
||||||
|
return Statement(text)
|
@ -0,0 +1,115 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
from time import sleep
|
||||||
|
from chatterbot.input import InputAdapter
|
||||||
|
from chatterbot.conversation import Statement
|
||||||
|
|
||||||
|
|
||||||
|
class Microsoft(InputAdapter):
|
||||||
|
"""
|
||||||
|
An input adapter that allows a ChatterBot instance to get
|
||||||
|
input statements from a Microsoft Bot using *Directline client protocol*.
|
||||||
|
https://docs.botframework.com/en-us/restapi/directline/#navtitle
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(Microsoft, self).__init__(**kwargs)
|
||||||
|
import requests
|
||||||
|
from requests.packages.urllib3.exceptions import InsecureRequestWarning
|
||||||
|
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
|
||||||
|
|
||||||
|
self.directline_host = kwargs.get('directline_host', 'https://directline.botframework.com')
|
||||||
|
|
||||||
|
# NOTE: Direct Line client credentials are different from your bot's
|
||||||
|
# credentials
|
||||||
|
self.direct_line_token_or_secret = kwargs.\
|
||||||
|
get('direct_line_token_or_secret')
|
||||||
|
|
||||||
|
authorization_header = 'BotConnector {}'.\
|
||||||
|
format(self.direct_line_token_or_secret)
|
||||||
|
|
||||||
|
self.headers = {
|
||||||
|
'Authorization': authorization_header,
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Accept': 'application/json',
|
||||||
|
'charset': 'utf-8'
|
||||||
|
}
|
||||||
|
|
||||||
|
conversation_data = self.start_conversation()
|
||||||
|
self.conversation_id = conversation_data.get('conversationId')
|
||||||
|
self.conversation_token = conversation_data.get('token')
|
||||||
|
|
||||||
|
def _validate_status_code(self, response):
|
||||||
|
code = response.status_code
|
||||||
|
if not code == 200:
|
||||||
|
raise self.HTTPStatusException('{} status code recieved'.
|
||||||
|
format(code))
|
||||||
|
|
||||||
|
def start_conversation(self):
|
||||||
|
import requests
|
||||||
|
|
||||||
|
endpoint = '{host}/api/conversations'.format(host=self.directline_host)
|
||||||
|
response = requests.post(
|
||||||
|
endpoint,
|
||||||
|
headers=self.headers,
|
||||||
|
verify=False
|
||||||
|
)
|
||||||
|
self.logger.info('{} starting conversation {}'.format(
|
||||||
|
response.status_code, endpoint
|
||||||
|
))
|
||||||
|
self._validate_status_code(response)
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
def get_most_recent_message(self):
|
||||||
|
import requests
|
||||||
|
|
||||||
|
endpoint = '{host}/api/conversations/{id}/messages'\
|
||||||
|
.format(host=self.directline_host,
|
||||||
|
id=self.conversation_id)
|
||||||
|
|
||||||
|
response = requests.get(
|
||||||
|
endpoint,
|
||||||
|
headers=self.headers,
|
||||||
|
verify=False
|
||||||
|
)
|
||||||
|
|
||||||
|
self.logger.info('{} retrieving most recent messages {}'.format(
|
||||||
|
response.status_code, endpoint
|
||||||
|
))
|
||||||
|
|
||||||
|
self._validate_status_code(response)
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
if data['messages']:
|
||||||
|
last_msg = int(data['watermark'])
|
||||||
|
return data['messages'][last_msg - 1]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def process_input(self, statement):
|
||||||
|
new_message = False
|
||||||
|
data = None
|
||||||
|
while not new_message:
|
||||||
|
data = self.get_most_recent_message()
|
||||||
|
if data and data['id']:
|
||||||
|
new_message = True
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
sleep(3.5)
|
||||||
|
|
||||||
|
text = data['text']
|
||||||
|
statement = Statement(text)
|
||||||
|
self.logger.info('processing user statement {}'.format(statement))
|
||||||
|
|
||||||
|
return statement
|
||||||
|
|
||||||
|
class HTTPStatusException(Exception):
|
||||||
|
"""
|
||||||
|
Exception raised when unexpected non-success HTTP
|
||||||
|
status codes are returned in a response.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, value):
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return repr(self.value)
|
@ -0,0 +1,18 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
from chatterbot.input import InputAdapter
|
||||||
|
from chatterbot.conversation import Statement
|
||||||
|
from chatterbot.utils import input_function
|
||||||
|
|
||||||
|
|
||||||
|
class TerminalAdapter(InputAdapter):
|
||||||
|
"""
|
||||||
|
A simple adapter that allows ChatterBot to
|
||||||
|
communicate through the terminal.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def process_input(self, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Read the user's input from the terminal.
|
||||||
|
"""
|
||||||
|
user_input = input_function()
|
||||||
|
return Statement(user_input)
|
@ -0,0 +1,65 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
from chatterbot.input import InputAdapter
|
||||||
|
from chatterbot.conversation import Statement
|
||||||
|
|
||||||
|
|
||||||
|
class VariableInputTypeAdapter(InputAdapter):
|
||||||
|
|
||||||
|
JSON = 'json'
|
||||||
|
TEXT = 'text'
|
||||||
|
OBJECT = 'object'
|
||||||
|
VALID_FORMATS = (JSON, TEXT, OBJECT, )
|
||||||
|
|
||||||
|
def detect_type(self, statement):
|
||||||
|
import sys
|
||||||
|
|
||||||
|
if sys.version_info[0] < 3:
|
||||||
|
string_types = basestring # NOQA
|
||||||
|
else:
|
||||||
|
string_types = str
|
||||||
|
|
||||||
|
if hasattr(statement, 'text'):
|
||||||
|
return self.OBJECT
|
||||||
|
if isinstance(statement, string_types):
|
||||||
|
return self.TEXT
|
||||||
|
if isinstance(statement, dict):
|
||||||
|
return self.JSON
|
||||||
|
|
||||||
|
input_type = type(statement)
|
||||||
|
|
||||||
|
raise self.UnrecognizedInputFormatException(
|
||||||
|
'The type {} is not recognized as a valid input type.'.format(
|
||||||
|
input_type
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def process_input(self, statement):
|
||||||
|
input_type = self.detect_type(statement)
|
||||||
|
|
||||||
|
# Return the statement object without modification
|
||||||
|
if input_type == self.OBJECT:
|
||||||
|
return statement
|
||||||
|
|
||||||
|
# Convert the input string into a statement object
|
||||||
|
if input_type == self.TEXT:
|
||||||
|
return Statement(statement)
|
||||||
|
|
||||||
|
# Convert input dictionary into a statement object
|
||||||
|
if input_type == self.JSON:
|
||||||
|
input_json = dict(statement)
|
||||||
|
text = input_json['text']
|
||||||
|
del input_json['text']
|
||||||
|
|
||||||
|
return Statement(text, **input_json)
|
||||||
|
|
||||||
|
class UnrecognizedInputFormatException(Exception):
|
||||||
|
"""
|
||||||
|
Exception raised when an input format is specified that is
|
||||||
|
not in the VariableInputTypeAdapter.VALID_FORMATS variable.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, value='The input format was not recognized.'):
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return repr(self.value)
|
@ -0,0 +1,20 @@
|
|||||||
|
from .logic_adapter import LogicAdapter
|
||||||
|
from .best_match import BestMatch
|
||||||
|
from .low_confidence import LowConfidenceAdapter
|
||||||
|
from .mathematical_evaluation import MathematicalEvaluation
|
||||||
|
from .multi_adapter import MultiLogicAdapter
|
||||||
|
from .no_knowledge_adapter import NoKnowledgeAdapter
|
||||||
|
from .specific_response import SpecificResponseAdapter
|
||||||
|
from .time_adapter import TimeLogicAdapter
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = (
|
||||||
|
'LogicAdapter',
|
||||||
|
'BestMatch',
|
||||||
|
'LowConfidenceAdapter',
|
||||||
|
'MathematicalEvaluation',
|
||||||
|
'MultiLogicAdapter',
|
||||||
|
'NoKnowledgeAdapter',
|
||||||
|
'SpecificResponseAdapter',
|
||||||
|
'TimeLogicAdapter',
|
||||||
|
)
|
@ -0,0 +1,84 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
from .logic_adapter import LogicAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class BestMatch(LogicAdapter):
|
||||||
|
"""
|
||||||
|
A logic adapter that returns a response based on known responses to
|
||||||
|
the closest matches to the input statement.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def get(self, input_statement):
|
||||||
|
"""
|
||||||
|
Takes a statement string and a list of statement strings.
|
||||||
|
Returns the closest matching statement from the list.
|
||||||
|
"""
|
||||||
|
statement_list = self.chatbot.storage.get_response_statements()
|
||||||
|
|
||||||
|
if not statement_list:
|
||||||
|
if self.chatbot.storage.count():
|
||||||
|
# Use a randomly picked statement
|
||||||
|
self.logger.info(
|
||||||
|
'No statements have known responses. ' +
|
||||||
|
'Choosing a random response to return.'
|
||||||
|
)
|
||||||
|
random_response = self.chatbot.storage.get_random()
|
||||||
|
random_response.confidence = 0
|
||||||
|
return random_response
|
||||||
|
else:
|
||||||
|
raise self.EmptyDatasetException()
|
||||||
|
|
||||||
|
closest_match = input_statement
|
||||||
|
closest_match.confidence = 0
|
||||||
|
|
||||||
|
# Find the closest matching known statement
|
||||||
|
for statement in statement_list:
|
||||||
|
confidence = self.compare_statements(input_statement, statement)
|
||||||
|
|
||||||
|
if confidence > closest_match.confidence:
|
||||||
|
statement.confidence = confidence
|
||||||
|
closest_match = statement
|
||||||
|
|
||||||
|
return closest_match
|
||||||
|
|
||||||
|
def can_process(self, statement):
|
||||||
|
"""
|
||||||
|
Check that the chatbot's storage adapter is available to the logic
|
||||||
|
adapter and there is at least one statement in the database.
|
||||||
|
"""
|
||||||
|
return self.chatbot.storage.count()
|
||||||
|
|
||||||
|
def process(self, input_statement):
|
||||||
|
|
||||||
|
# Select the closest match to the input statement
|
||||||
|
closest_match = self.get(input_statement)
|
||||||
|
self.logger.info('Using "{}" as a close match to "{}"'.format(
|
||||||
|
input_statement.text, closest_match.text
|
||||||
|
))
|
||||||
|
|
||||||
|
# Get all statements that are in response to the closest match
|
||||||
|
response_list = self.chatbot.storage.filter(
|
||||||
|
in_response_to__contains=closest_match.text
|
||||||
|
)
|
||||||
|
|
||||||
|
if response_list:
|
||||||
|
self.logger.info(
|
||||||
|
'Selecting response from {} optimal responses.'.format(
|
||||||
|
len(response_list)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
response = self.select_response(input_statement, response_list)
|
||||||
|
response.confidence = closest_match.confidence
|
||||||
|
self.logger.info('Response selected. Using "{}"'.format(response.text))
|
||||||
|
else:
|
||||||
|
response = self.chatbot.storage.get_random()
|
||||||
|
self.logger.info(
|
||||||
|
'No response to "{}" found. Selecting a random response.'.format(
|
||||||
|
closest_match.text
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set confidence to zero because a random response is selected
|
||||||
|
response.confidence = 0
|
||||||
|
|
||||||
|
return response
|
@ -0,0 +1,100 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
from chatterbot.adapters import Adapter
|
||||||
|
from chatterbot.utils import import_module
|
||||||
|
|
||||||
|
|
||||||
|
class LogicAdapter(Adapter):
|
||||||
|
"""
|
||||||
|
This is an abstract class that represents the interface
|
||||||
|
that all logic adapters should implement.
|
||||||
|
|
||||||
|
:param statement_comparison_function: The dot-notated import path to a statement comparison function.
|
||||||
|
Defaults to ``levenshtein_distance``.
|
||||||
|
|
||||||
|
:param response_selection_method: The a response selection method.
|
||||||
|
Defaults to ``get_first_response``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(LogicAdapter, self).__init__(**kwargs)
|
||||||
|
from chatterbot.comparisons import levenshtein_distance
|
||||||
|
from chatterbot.response_selection import get_first_response
|
||||||
|
|
||||||
|
# Import string module parameters
|
||||||
|
if 'statement_comparison_function' in kwargs:
|
||||||
|
import_path = kwargs.get('statement_comparison_function')
|
||||||
|
if isinstance(import_path, str):
|
||||||
|
kwargs['statement_comparison_function'] = import_module(import_path)
|
||||||
|
|
||||||
|
if 'response_selection_method' in kwargs:
|
||||||
|
import_path = kwargs.get('response_selection_method')
|
||||||
|
if isinstance(import_path, str):
|
||||||
|
kwargs['response_selection_method'] = import_module(import_path)
|
||||||
|
|
||||||
|
# By default, compare statements using Levenshtein distance
|
||||||
|
self.compare_statements = kwargs.get(
|
||||||
|
'statement_comparison_function',
|
||||||
|
levenshtein_distance
|
||||||
|
)
|
||||||
|
|
||||||
|
# By default, select the first available response
|
||||||
|
self.select_response = kwargs.get(
|
||||||
|
'response_selection_method',
|
||||||
|
get_first_response
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_initialization_functions(self):
|
||||||
|
"""
|
||||||
|
Return a dictionary of functions to be run once when the chat bot is instantiated.
|
||||||
|
"""
|
||||||
|
return self.compare_statements.get_initialization_functions()
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
for function in self.get_initialization_functions().values():
|
||||||
|
function()
|
||||||
|
|
||||||
|
def can_process(self, statement):
|
||||||
|
"""
|
||||||
|
A preliminary check that is called to determine if a
|
||||||
|
logic adapter can process a given statement. By default,
|
||||||
|
this method returns true but it can be overridden in
|
||||||
|
child classes as needed.
|
||||||
|
|
||||||
|
:rtype: bool
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
def process(self, statement):
|
||||||
|
"""
|
||||||
|
Override this method and implement your logic for selecting a response to an input statement.
|
||||||
|
|
||||||
|
A confidence value and the selected response statement should be returned.
|
||||||
|
The confidence value represents a rating of how accurate the logic adapter
|
||||||
|
expects the selected response to be. Confidence scores are used to select
|
||||||
|
the best response from multiple logic adapters.
|
||||||
|
|
||||||
|
The confidence value should be a number between 0 and 1 where 0 is the
|
||||||
|
lowest confidence level and 1 is the highest.
|
||||||
|
|
||||||
|
:param statement: An input statement to be processed by the logic adapter.
|
||||||
|
:type statement: Statement
|
||||||
|
|
||||||
|
:rtype: Statement
|
||||||
|
"""
|
||||||
|
raise self.AdapterMethodNotImplementedError()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def class_name(self):
|
||||||
|
"""
|
||||||
|
Return the name of the current logic adapter class.
|
||||||
|
This is typically used for logging and debugging.
|
||||||
|
"""
|
||||||
|
return str(self.__class__.__name__)
|
||||||
|
|
||||||
|
class EmptyDatasetException(Exception):
|
||||||
|
|
||||||
|
def __init__(self, value='An empty set was received when at least one statement was expected.'):
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return repr(self.value)
|
@ -0,0 +1,58 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
from chatterbot.conversation import Statement
|
||||||
|
from .best_match import BestMatch
|
||||||
|
|
||||||
|
|
||||||
|
class LowConfidenceAdapter(BestMatch):
|
||||||
|
"""
|
||||||
|
Returns a default response with a high confidence
|
||||||
|
when a high confidence response is not known.
|
||||||
|
|
||||||
|
:kwargs:
|
||||||
|
* *threshold* (``float``) --
|
||||||
|
The low confidence value that triggers this adapter.
|
||||||
|
Defaults to 0.65.
|
||||||
|
* *default_response* (``str``) or (``iterable``)--
|
||||||
|
The response returned by this logic adaper.
|
||||||
|
* *response_selection_method* (``str``) or (``callable``)
|
||||||
|
The a response selection method.
|
||||||
|
Defaults to ``get_first_response``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(LowConfidenceAdapter, self).__init__(**kwargs)
|
||||||
|
|
||||||
|
self.confidence_threshold = kwargs.get('threshold', 0.65)
|
||||||
|
|
||||||
|
default_responses = kwargs.get(
|
||||||
|
'default_response', "I'm sorry, I do not understand."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert a single string into a list
|
||||||
|
if isinstance(default_responses, str):
|
||||||
|
default_responses = [
|
||||||
|
default_responses
|
||||||
|
]
|
||||||
|
|
||||||
|
self.default_responses = [
|
||||||
|
Statement(text=default) for default in default_responses
|
||||||
|
]
|
||||||
|
|
||||||
|
def process(self, input_statement):
|
||||||
|
"""
|
||||||
|
Return a default response with a high confidence if
|
||||||
|
a high confidence response is not known.
|
||||||
|
"""
|
||||||
|
# Select the closest match to the input statement
|
||||||
|
closest_match = self.get(input_statement)
|
||||||
|
|
||||||
|
# Choose a response from the list of options
|
||||||
|
response = self.select_response(input_statement, self.default_responses)
|
||||||
|
|
||||||
|
# Confidence should be high only if it is less than the threshold
|
||||||
|
if closest_match.confidence < self.confidence_threshold:
|
||||||
|
response.confidence = 1
|
||||||
|
else:
|
||||||
|
response.confidence = 0
|
||||||
|
|
||||||
|
return response
|
@ -0,0 +1,67 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
from chatterbot.logic import LogicAdapter
|
||||||
|
from chatterbot.conversation import Statement
|
||||||
|
|
||||||
|
|
||||||
|
class MathematicalEvaluation(LogicAdapter):
|
||||||
|
"""
|
||||||
|
The MathematicalEvaluation logic adapter parses input to determine
|
||||||
|
whether the user is asking a question that requires math to be done.
|
||||||
|
If so, the equation is extracted from the input and returned with
|
||||||
|
the evaluated result.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
User: 'What is three plus five?'
|
||||||
|
Bot: 'Three plus five equals eight'
|
||||||
|
|
||||||
|
:kwargs:
|
||||||
|
* *language* (``str``) --
|
||||||
|
The language is set to 'ENG' for English by default.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(MathematicalEvaluation, self).__init__(**kwargs)
|
||||||
|
|
||||||
|
self.language = kwargs.get('language', 'ENG')
|
||||||
|
self.cache = {}
|
||||||
|
|
||||||
|
def can_process(self, statement):
|
||||||
|
"""
|
||||||
|
Determines whether it is appropriate for this
|
||||||
|
adapter to respond to the user input.
|
||||||
|
"""
|
||||||
|
response = self.process(statement)
|
||||||
|
self.cache[statement.text] = response
|
||||||
|
return response.confidence == 1
|
||||||
|
|
||||||
|
def process(self, statement):
|
||||||
|
"""
|
||||||
|
Takes a statement string.
|
||||||
|
Returns the equation from the statement with the mathematical terms solved.
|
||||||
|
"""
|
||||||
|
from mathparse import mathparse
|
||||||
|
|
||||||
|
input_text = statement.text
|
||||||
|
|
||||||
|
# Use the result cached by the process method if it exists
|
||||||
|
if input_text in self.cache:
|
||||||
|
cached_result = self.cache[input_text]
|
||||||
|
self.cache = {}
|
||||||
|
return cached_result
|
||||||
|
|
||||||
|
# Getting the mathematical terms within the input statement
|
||||||
|
expression = mathparse.extract_expression(input_text, language=self.language)
|
||||||
|
|
||||||
|
response = Statement(text=expression)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response.text += ' = ' + str(
|
||||||
|
mathparse.parse(expression, language=self.language)
|
||||||
|
)
|
||||||
|
|
||||||
|
# The confidence is 1 if the expression could be evaluated
|
||||||
|
response.confidence = 1
|
||||||
|
except mathparse.PostfixTokenEvaluationException:
|
||||||
|
response.confidence = 0
|
||||||
|
|
||||||
|
return response
|
@ -0,0 +1,153 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
from collections import Counter
|
||||||
|
from chatterbot import utils
|
||||||
|
from .logic_adapter import LogicAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class MultiLogicAdapter(LogicAdapter):
|
||||||
|
"""
|
||||||
|
MultiLogicAdapter allows ChatterBot to use multiple logic
|
||||||
|
adapters. It has methods that allow ChatterBot to add an
|
||||||
|
adapter, set the chat bot, and process an input statement
|
||||||
|
to get a response.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(MultiLogicAdapter, self).__init__(**kwargs)
|
||||||
|
|
||||||
|
# Logic adapters added by the chat bot
|
||||||
|
self.adapters = []
|
||||||
|
|
||||||
|
# Required logic adapters that must always be present
|
||||||
|
self.system_adapters = []
|
||||||
|
|
||||||
|
def get_initialization_functions(self):
|
||||||
|
"""
|
||||||
|
Get the initialization functions for each logic adapter.
|
||||||
|
"""
|
||||||
|
functions_dict = {}
|
||||||
|
|
||||||
|
# Iterate over each adapter and get its initialization functions
|
||||||
|
for logic_adapter in self.get_adapters():
|
||||||
|
functions = logic_adapter.get_initialization_functions()
|
||||||
|
functions_dict.update(functions)
|
||||||
|
|
||||||
|
return functions_dict
|
||||||
|
|
||||||
|
def process(self, statement):
|
||||||
|
"""
|
||||||
|
Returns the output of a selection of logic adapters
|
||||||
|
for a given input statement.
|
||||||
|
|
||||||
|
:param statement: The input statement to be processed.
|
||||||
|
"""
|
||||||
|
results = []
|
||||||
|
result = None
|
||||||
|
max_confidence = -1
|
||||||
|
|
||||||
|
for adapter in self.get_adapters():
|
||||||
|
if adapter.can_process(statement):
|
||||||
|
|
||||||
|
output = adapter.process(statement)
|
||||||
|
results.append((output.confidence, output, ))
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
'{} selected "{}" as a response with a confidence of {}'.format(
|
||||||
|
adapter.class_name, output.text, output.confidence
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if output.confidence > max_confidence:
|
||||||
|
result = output
|
||||||
|
max_confidence = output.confidence
|
||||||
|
else:
|
||||||
|
self.logger.info(
|
||||||
|
'Not processing the statement using {}'.format(adapter.class_name)
|
||||||
|
)
|
||||||
|
|
||||||
|
# If multiple adapters agree on the same statement,
|
||||||
|
# then that statement is more likely to be the correct response
|
||||||
|
if len(results) >= 3:
|
||||||
|
statements = [s[1] for s in results]
|
||||||
|
count = Counter(statements)
|
||||||
|
most_common = count.most_common()
|
||||||
|
if most_common[0][1] > 1:
|
||||||
|
result = most_common[0][0]
|
||||||
|
max_confidence = self.get_greatest_confidence(result, results)
|
||||||
|
|
||||||
|
result.confidence = max_confidence
|
||||||
|
return result
|
||||||
|
|
||||||
|
def get_greatest_confidence(self, statement, options):
|
||||||
|
"""
|
||||||
|
Returns the greatest confidence value for a statement that occurs
|
||||||
|
multiple times in the set of options.
|
||||||
|
|
||||||
|
:param statement: A statement object.
|
||||||
|
:param options: A tuple in the format of (confidence, statement).
|
||||||
|
"""
|
||||||
|
values = []
|
||||||
|
for option in options:
|
||||||
|
if option[1] == statement:
|
||||||
|
values.append(option[0])
|
||||||
|
|
||||||
|
return max(values)
|
||||||
|
|
||||||
|
def get_adapters(self):
|
||||||
|
"""
|
||||||
|
Return a list of all logic adapters being used, including system logic adapters.
|
||||||
|
"""
|
||||||
|
adapters = []
|
||||||
|
adapters.extend(self.adapters)
|
||||||
|
adapters.extend(self.system_adapters)
|
||||||
|
return adapters
|
||||||
|
|
||||||
|
def add_adapter(self, adapter, **kwargs):
|
||||||
|
"""
|
||||||
|
Appends a logic adapter to the list of logic adapters being used.
|
||||||
|
|
||||||
|
:param adapter: The logic adapter to be added.
|
||||||
|
:type adapter: `LogicAdapter`
|
||||||
|
"""
|
||||||
|
utils.validate_adapter_class(adapter, LogicAdapter)
|
||||||
|
adapter = utils.initialize_class(adapter, **kwargs)
|
||||||
|
self.adapters.append(adapter)
|
||||||
|
|
||||||
|
def insert_logic_adapter(self, logic_adapter, insert_index, **kwargs):
|
||||||
|
"""
|
||||||
|
Adds a logic adapter at a specified index.
|
||||||
|
|
||||||
|
:param logic_adapter: The string path to the logic adapter to add.
|
||||||
|
:type logic_adapter: str
|
||||||
|
|
||||||
|
:param insert_index: The index to insert the logic adapter into the list at.
|
||||||
|
:type insert_index: int
|
||||||
|
"""
|
||||||
|
utils.validate_adapter_class(logic_adapter, LogicAdapter)
|
||||||
|
|
||||||
|
NewAdapter = utils.import_module(logic_adapter)
|
||||||
|
adapter = NewAdapter(**kwargs)
|
||||||
|
|
||||||
|
self.adapters.insert(insert_index, adapter)
|
||||||
|
|
||||||
|
def remove_logic_adapter(self, adapter_name):
|
||||||
|
"""
|
||||||
|
Removes a logic adapter from the chat bot.
|
||||||
|
|
||||||
|
:param adapter_name: The class name of the adapter to remove.
|
||||||
|
:type adapter_name: str
|
||||||
|
"""
|
||||||
|
for index, adapter in enumerate(self.adapters):
|
||||||
|
if adapter_name == type(adapter).__name__:
|
||||||
|
del self.adapters[index]
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def set_chatbot(self, chatbot):
|
||||||
|
"""
|
||||||
|
Set the chatbot for each of the contained logic adapters.
|
||||||
|
"""
|
||||||
|
super(MultiLogicAdapter, self).set_chatbot(chatbot)
|
||||||
|
|
||||||
|
for adapter in self.get_adapters():
|
||||||
|
adapter.set_chatbot(chatbot)
|
@ -0,0 +1,26 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
from .logic_adapter import LogicAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class NoKnowledgeAdapter(LogicAdapter):
|
||||||
|
"""
|
||||||
|
This is a system adapter that is automatically added
|
||||||
|
to the list of logic adapters during initialization.
|
||||||
|
This adapter is placed at the beginning of the list
|
||||||
|
to be given the highest priority.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def process(self, statement):
|
||||||
|
"""
|
||||||
|
If there are no known responses in the database,
|
||||||
|
then a confidence of 1 should be returned with
|
||||||
|
the input statement.
|
||||||
|
Otherwise, a confidence of 0 should be returned.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if self.chatbot.storage.count():
|
||||||
|
statement.confidence = 0
|
||||||
|
else:
|
||||||
|
statement.confidence = 1
|
||||||
|
|
||||||
|
return statement
|
@ -0,0 +1,38 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
from .logic_adapter import LogicAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class SpecificResponseAdapter(LogicAdapter):
|
||||||
|
"""
|
||||||
|
Return a specific response to a specific input.
|
||||||
|
|
||||||
|
:kwargs:
|
||||||
|
* *input_text* (``str``) --
|
||||||
|
The input text that triggers this logic adapter.
|
||||||
|
* *output_text* (``str``) --
|
||||||
|
The output text returned by this logic adapter.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(SpecificResponseAdapter, self).__init__(**kwargs)
|
||||||
|
from chatterbot.conversation import Statement
|
||||||
|
|
||||||
|
self.input_text = kwargs.get('input_text')
|
||||||
|
|
||||||
|
output_text = kwargs.get('output_text')
|
||||||
|
self.response_statement = Statement(output_text)
|
||||||
|
|
||||||
|
def can_process(self, statement):
|
||||||
|
if statement == self.input_text:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def process(self, statement):
|
||||||
|
|
||||||
|
if statement == self.input_text:
|
||||||
|
self.response_statement.confidence = 1
|
||||||
|
else:
|
||||||
|
self.response_statement.confidence = 0
|
||||||
|
|
||||||
|
return self.response_statement
|
@ -0,0 +1,91 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
from datetime import datetime
|
||||||
|
from .logic_adapter import LogicAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class TimeLogicAdapter(LogicAdapter):
|
||||||
|
"""
|
||||||
|
The TimeLogicAdapter returns the current time.
|
||||||
|
|
||||||
|
:kwargs:
|
||||||
|
* *positive* (``list``) --
|
||||||
|
The time-related questions used to identify time questions.
|
||||||
|
Defaults to a list of English sentences.
|
||||||
|
* *negative* (``list``) --
|
||||||
|
The non-time-related questions used to identify time questions.
|
||||||
|
Defaults to a list of English sentences.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(TimeLogicAdapter, self).__init__(**kwargs)
|
||||||
|
from nltk import NaiveBayesClassifier
|
||||||
|
|
||||||
|
self.positive = kwargs.get('positive', [
|
||||||
|
'what time is it',
|
||||||
|
'hey what time is it',
|
||||||
|
'do you have the time',
|
||||||
|
'do you know the time',
|
||||||
|
'do you know what time it is',
|
||||||
|
'what is the time'
|
||||||
|
])
|
||||||
|
|
||||||
|
self.negative = kwargs.get('negative', [
|
||||||
|
'it is time to go to sleep',
|
||||||
|
'what is your favorite color',
|
||||||
|
'i had a great time',
|
||||||
|
'thyme is my favorite herb',
|
||||||
|
'do you have time to look at my essay',
|
||||||
|
'how do you have the time to do all this'
|
||||||
|
'what is it'
|
||||||
|
])
|
||||||
|
|
||||||
|
labeled_data = (
|
||||||
|
[(name, 0) for name in self.negative] +
|
||||||
|
[(name, 1) for name in self.positive]
|
||||||
|
)
|
||||||
|
|
||||||
|
train_set = [
|
||||||
|
(self.time_question_features(text), n) for (text, n) in labeled_data
|
||||||
|
]
|
||||||
|
|
||||||
|
self.classifier = NaiveBayesClassifier.train(train_set)
|
||||||
|
|
||||||
|
def time_question_features(self, text):
|
||||||
|
"""
|
||||||
|
Provide an analysis of significant features in the string.
|
||||||
|
"""
|
||||||
|
features = {}
|
||||||
|
|
||||||
|
# A list of all words from the known sentences
|
||||||
|
all_words = " ".join(self.positive + self.negative).split()
|
||||||
|
|
||||||
|
# A list of the first word in each of the known sentence
|
||||||
|
all_first_words = []
|
||||||
|
for sentence in self.positive + self.negative:
|
||||||
|
all_first_words.append(
|
||||||
|
sentence.split(' ', 1)[0]
|
||||||
|
)
|
||||||
|
|
||||||
|
for word in text.split():
|
||||||
|
features['first_word({})'.format(word)] = (word in all_first_words)
|
||||||
|
|
||||||
|
for word in text.split():
|
||||||
|
features['contains({})'.format(word)] = (word in all_words)
|
||||||
|
|
||||||
|
for letter in 'abcdefghijklmnopqrstuvwxyz':
|
||||||
|
features['count({})'.format(letter)] = text.lower().count(letter)
|
||||||
|
features['has({})'.format(letter)] = (letter in text.lower())
|
||||||
|
|
||||||
|
return features
|
||||||
|
|
||||||
|
def process(self, statement):
|
||||||
|
from chatterbot.conversation import Statement
|
||||||
|
|
||||||
|
now = datetime.now()
|
||||||
|
|
||||||
|
time_features = self.time_question_features(statement.text.lower())
|
||||||
|
confidence = self.classifier.classify(time_features)
|
||||||
|
response = Statement('The current time is ' + now.strftime('%I:%M %p'))
|
||||||
|
|
||||||
|
response.confidence = confidence
|
||||||
|
return response
|
@ -0,0 +1,15 @@
|
|||||||
|
from .output_adapter import OutputAdapter
|
||||||
|
from .microsoft import Microsoft
|
||||||
|
from .terminal import TerminalAdapter
|
||||||
|
from .mailgun import Mailgun
|
||||||
|
from .gitter import Gitter
|
||||||
|
from .hipchat import HipChat
|
||||||
|
|
||||||
|
__all__ = (
|
||||||
|
'OutputAdapter',
|
||||||
|
'Microsoft',
|
||||||
|
'TerminalAdapter',
|
||||||
|
'Mailgun',
|
||||||
|
'Gitter',
|
||||||
|
'HipChat',
|
||||||
|
)
|
@ -0,0 +1,85 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
from .output_adapter import OutputAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class Gitter(OutputAdapter):
|
||||||
|
"""
|
||||||
|
An output adapter that allows a ChatterBot instance to send
|
||||||
|
responses to a Gitter room.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(Gitter, self).__init__(**kwargs)
|
||||||
|
|
||||||
|
self.gitter_host = kwargs.get('gitter_host', 'https://api.gitter.im/v1/')
|
||||||
|
self.gitter_room = kwargs.get('gitter_room')
|
||||||
|
self.gitter_api_token = kwargs.get('gitter_api_token')
|
||||||
|
|
||||||
|
authorization_header = 'Bearer {}'.format(self.gitter_api_token)
|
||||||
|
|
||||||
|
self.headers = {
|
||||||
|
'Authorization': authorization_header,
|
||||||
|
'Content-Type': 'application/json; charset=utf-8',
|
||||||
|
'Accept': 'application/json'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Join the Gitter room
|
||||||
|
room_data = self.join_room(self.gitter_room)
|
||||||
|
self.room_id = room_data.get('id')
|
||||||
|
|
||||||
|
def _validate_status_code(self, response):
|
||||||
|
code = response.status_code
|
||||||
|
if code not in [200, 201]:
|
||||||
|
raise self.HTTPStatusException('{} status code recieved'.format(code))
|
||||||
|
|
||||||
|
def join_room(self, room_name):
|
||||||
|
"""
|
||||||
|
Join the specified Gitter room.
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
|
||||||
|
endpoint = '{}rooms'.format(self.gitter_host)
|
||||||
|
response = requests.post(
|
||||||
|
endpoint,
|
||||||
|
headers=self.headers,
|
||||||
|
json={'uri': room_name}
|
||||||
|
)
|
||||||
|
self.logger.info('{} status joining room {}'.format(
|
||||||
|
response.status_code, endpoint
|
||||||
|
))
|
||||||
|
self._validate_status_code(response)
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
def send_message(self, text):
|
||||||
|
"""
|
||||||
|
Send a message to a Gitter room.
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
|
||||||
|
endpoint = '{}rooms/{}/chatMessages'.format(self.gitter_host, self.room_id)
|
||||||
|
response = requests.post(
|
||||||
|
endpoint,
|
||||||
|
headers=self.headers,
|
||||||
|
json={'text': text}
|
||||||
|
)
|
||||||
|
self.logger.info('{} sending message to {}'.format(
|
||||||
|
response.status_code, endpoint
|
||||||
|
))
|
||||||
|
self._validate_status_code(response)
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
def process_response(self, statement, session_id=None):
|
||||||
|
self.send_message(statement.text)
|
||||||
|
return statement
|
||||||
|
|
||||||
|
class HTTPStatusException(Exception):
|
||||||
|
"""
|
||||||
|
Exception raised when unexpected non-success HTTP
|
||||||
|
status codes are returned in a response.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, value):
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return repr(self.value)
|
@ -0,0 +1,67 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
import json
|
||||||
|
from .output_adapter import OutputAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class HipChat(OutputAdapter):
|
||||||
|
"""
|
||||||
|
An output adapter that allows a ChatterBot instance to send
|
||||||
|
responses to a HipChat room.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(HipChat, self).__init__(**kwargs)
|
||||||
|
|
||||||
|
self.hipchat_host = kwargs.get("hipchat_host")
|
||||||
|
self.hipchat_access_token = kwargs.get("hipchat_access_token")
|
||||||
|
self.hipchat_room = kwargs.get("hipchat_room")
|
||||||
|
|
||||||
|
authorization_header = "Bearer {}".format(self.hipchat_access_token)
|
||||||
|
|
||||||
|
self.headers = {
|
||||||
|
'Authorization': authorization_header,
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
|
||||||
|
import requests
|
||||||
|
self.session = requests.Session()
|
||||||
|
self.session.verify = kwargs.get('ssl_verify', True)
|
||||||
|
|
||||||
|
def send_message(self, room_id_or_name, message):
|
||||||
|
"""
|
||||||
|
Send a message to a HipChat room.
|
||||||
|
https://www.hipchat.com/docs/apiv2/method/send_message
|
||||||
|
"""
|
||||||
|
message_url = "{}/v2/room/{}/message".format(
|
||||||
|
self.hipchat_host,
|
||||||
|
room_id_or_name
|
||||||
|
)
|
||||||
|
|
||||||
|
response = self.session.post(
|
||||||
|
message_url,
|
||||||
|
headers=self.headers,
|
||||||
|
data=json.dumps({
|
||||||
|
'message': message
|
||||||
|
})
|
||||||
|
)
|
||||||
|
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
def reply_to_message(self):
|
||||||
|
"""
|
||||||
|
The HipChat api supports responding to a given message.
|
||||||
|
This may be a good feature to implement in the future to
|
||||||
|
help with multi-user conversations.
|
||||||
|
https://www.hipchat.com/docs/apiv2/method/reply_to_message
|
||||||
|
"""
|
||||||
|
raise self.AdapterMethodNotImplementedError()
|
||||||
|
|
||||||
|
def process_response(self, statement, session_id=None):
|
||||||
|
data = self.send_message(self.hipchat_room, statement.text)
|
||||||
|
|
||||||
|
# Update the output statement with the message id
|
||||||
|
self.chatbot.storage.update(
|
||||||
|
statement.add_extra_data('hipchat_message_id', data['id'])
|
||||||
|
)
|
||||||
|
|
||||||
|
return statement
|
@ -0,0 +1,49 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
from .output_adapter import OutputAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class Mailgun(OutputAdapter):
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(Mailgun, self).__init__(**kwargs)
|
||||||
|
|
||||||
|
# Use the bot's name for the name of the sender
|
||||||
|
self.name = kwargs.get('name')
|
||||||
|
self.from_address = kwargs.get('mailgun_from_address')
|
||||||
|
self.api_key = kwargs.get('mailgun_api_key')
|
||||||
|
self.endpoint = kwargs.get('mailgun_api_endpoint')
|
||||||
|
self.recipients = kwargs.get('mailgun_recipients')
|
||||||
|
|
||||||
|
def send_message(self, subject, text, from_address, recipients):
|
||||||
|
"""
|
||||||
|
* subject: Subject of the email.
|
||||||
|
* text: Text body of the email.
|
||||||
|
* from_email: The email address that the message will be sent from.
|
||||||
|
* recipients: A list of recipient email addresses.
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
|
||||||
|
return requests.post(
|
||||||
|
self.endpoint,
|
||||||
|
auth=('api', self.api_key),
|
||||||
|
data={
|
||||||
|
'from': '%s <%s>' % (self.name, from_address),
|
||||||
|
'to': recipients,
|
||||||
|
'subject': subject,
|
||||||
|
'text': text
|
||||||
|
})
|
||||||
|
|
||||||
|
def process_response(self, statement, session_id=None):
|
||||||
|
"""
|
||||||
|
Send the response statement as an email.
|
||||||
|
"""
|
||||||
|
subject = 'Message from %s' % (self.name)
|
||||||
|
|
||||||
|
self.send_message(
|
||||||
|
subject,
|
||||||
|
statement.text,
|
||||||
|
self.from_address,
|
||||||
|
self.recipients
|
||||||
|
)
|
||||||
|
|
||||||
|
return statement
|
@ -0,0 +1,109 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
import json
|
||||||
|
from .output_adapter import OutputAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class Microsoft(OutputAdapter):
|
||||||
|
"""
|
||||||
|
An output adapter that allows a ChatterBot instance to send
|
||||||
|
responses to a Microsoft bot using *Direct Line client protocol*.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(Microsoft, self).__init__(**kwargs)
|
||||||
|
|
||||||
|
self.directline_host = kwargs.get(
|
||||||
|
'directline_host',
|
||||||
|
'https://directline.botframework.com'
|
||||||
|
)
|
||||||
|
self.direct_line_token_or_secret = kwargs.get(
|
||||||
|
'direct_line_token_or_secret'
|
||||||
|
)
|
||||||
|
self.conversation_id = kwargs.get('conversation_id')
|
||||||
|
|
||||||
|
authorization_header = 'BotConnector {}'.format(
|
||||||
|
self.direct_line_token_or_secret
|
||||||
|
)
|
||||||
|
|
||||||
|
self.headers = {
|
||||||
|
'Authorization': authorization_header,
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
|
||||||
|
def _validate_status_code(self, response):
|
||||||
|
status_code = response.status_code
|
||||||
|
if status_code not in [200, 204]:
|
||||||
|
raise self.HTTPStatusException('{} status code recieved'.format(status_code))
|
||||||
|
|
||||||
|
def get_most_recent_message(self):
|
||||||
|
"""
|
||||||
|
Return the most recently sent message.
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
endpoint = '{host}/api/conversations/{id}/messages'.format(
|
||||||
|
host=self.directline_host,
|
||||||
|
id=self.conversation_id
|
||||||
|
)
|
||||||
|
|
||||||
|
response = requests.get(
|
||||||
|
endpoint,
|
||||||
|
headers=self.headers,
|
||||||
|
verify=False
|
||||||
|
)
|
||||||
|
|
||||||
|
self.logger.info('{} retrieving most recent messages {}'.format(
|
||||||
|
response.status_code, endpoint
|
||||||
|
))
|
||||||
|
|
||||||
|
self._validate_status_code(response)
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
if data['messages']:
|
||||||
|
last_msg = int(data['watermark'])
|
||||||
|
return data['messages'][last_msg - 1]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def send_message(self, conversation_id, message):
|
||||||
|
"""
|
||||||
|
Send a message to a HipChat room.
|
||||||
|
https://www.hipchat.com/docs/apiv2/method/send_message
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
|
||||||
|
message_url = "{host}/api/conversations/{conversationId}/messages".format(
|
||||||
|
host=self.directline_host,
|
||||||
|
conversationId=conversation_id
|
||||||
|
)
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
message_url,
|
||||||
|
headers=self.headers,
|
||||||
|
data=json.dumps({
|
||||||
|
'message': message
|
||||||
|
})
|
||||||
|
)
|
||||||
|
|
||||||
|
self.logger.info('{} sending message {}'.format(
|
||||||
|
response.status_code, message_url
|
||||||
|
))
|
||||||
|
self._validate_status_code(response)
|
||||||
|
# Microsoft return 204 on operation succeeded and no content was returned.
|
||||||
|
return self.get_most_recent_message()
|
||||||
|
|
||||||
|
def process_response(self, statement, session_id=None):
|
||||||
|
data = self.send_message(self.conversation_id, statement.text)
|
||||||
|
self.logger.info('processing user response {}'.format(data))
|
||||||
|
return statement
|
||||||
|
|
||||||
|
class HTTPStatusException(Exception):
|
||||||
|
"""
|
||||||
|
Exception raised when unexpected non-success HTTP
|
||||||
|
status codes are returned in a response.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, value):
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return repr(self.value)
|
@ -0,0 +1,20 @@
|
|||||||
|
from chatterbot.adapters import Adapter
|
||||||
|
|
||||||
|
|
||||||
|
class OutputAdapter(Adapter):
|
||||||
|
"""
|
||||||
|
A generic class that can be overridden by a subclass to provide extended
|
||||||
|
functionality, such as delivering a response to an API endpoint.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def process_response(self, statement, session_id=None):
|
||||||
|
"""
|
||||||
|
Override this method in a subclass to implement customized functionality.
|
||||||
|
|
||||||
|
:param statement: The statement that the chat bot has produced in response to some input.
|
||||||
|
|
||||||
|
:param session_id: The unique id of the current chat session.
|
||||||
|
|
||||||
|
:returns: The response statement.
|
||||||
|
"""
|
||||||
|
return statement
|
@ -0,0 +1,16 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
from .output_adapter import OutputAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class TerminalAdapter(OutputAdapter):
|
||||||
|
"""
|
||||||
|
A simple adapter that allows ChatterBot to
|
||||||
|
communicate through the terminal.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def process_response(self, statement, session_id=None):
|
||||||
|
"""
|
||||||
|
Print the response to the user's input.
|
||||||
|
"""
|
||||||
|
print(statement.text)
|
||||||
|
return statement.text
|
@ -0,0 +1,751 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import re
|
||||||
|
from datetime import timedelta, datetime
|
||||||
|
import calendar
|
||||||
|
|
||||||
|
# Variations of dates that the parser can capture
|
||||||
|
year_variations = ['year', 'years', 'yrs']
|
||||||
|
day_variations = ['days', 'day']
|
||||||
|
minute_variations = ['minute', 'minutes', 'mins']
|
||||||
|
hour_variations = ['hrs', 'hours', 'hour']
|
||||||
|
week_variations = ['weeks', 'week', 'wks']
|
||||||
|
month_variations = ['month', 'months']
|
||||||
|
|
||||||
|
# Variables used for RegEx Matching
|
||||||
|
day_names = 'monday|tuesday|wednesday|thursday|friday|saturday|sunday'
|
||||||
|
month_names_long = (
|
||||||
|
'january|february|march|april|may|june|july|august|september|october|november|december'
|
||||||
|
)
|
||||||
|
month_names = month_names_long + '|jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec'
|
||||||
|
day_nearest_names = 'today|yesterday|tomorrow|tonight|tonite'
|
||||||
|
numbers = (
|
||||||
|
'(^a(?=\s)|one|two|three|four|five|six|seven|eight|nine|ten|'
|
||||||
|
'eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|'
|
||||||
|
'eighteen|nineteen|twenty|thirty|forty|fifty|sixty|seventy|'
|
||||||
|
'eighty|ninety|hundred|thousand)'
|
||||||
|
)
|
||||||
|
re_dmy = '(' + '|'.join(day_variations + minute_variations + year_variations + week_variations + month_variations) + ')'
|
||||||
|
re_duration = '(before|after|earlier|later|ago|from\snow)'
|
||||||
|
re_year = '(19|20)\d{2}|^(19|20)\d{2}'
|
||||||
|
re_timeframe = 'this|coming|next|following|previous|last|end\sof\sthe'
|
||||||
|
re_ordinal = 'st|nd|rd|th|first|second|third|fourth|fourth|' + re_timeframe
|
||||||
|
re_time = r'(?P<hour>\d{1,2})(\:(?P<minute>\d{1,2})|(?P<convention>am|pm))'
|
||||||
|
re_separator = 'of|at|on'
|
||||||
|
|
||||||
|
# A list tuple of regular expressions / parser fn to match
|
||||||
|
# Start with the widest match and narrow it down because the order of the match in this list matters
|
||||||
|
regex = [
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(
|
||||||
|
((?P<dow>%s)[,\s]\s*)? #Matches Monday, 12 Jan 2012, 12 Jan 2012 etc
|
||||||
|
(?P<day>\d{1,2}) # Matches a digit
|
||||||
|
(%s)?
|
||||||
|
[-\s] # One or more space
|
||||||
|
(?P<month>%s) # Matches any month name
|
||||||
|
[-\s] # Space
|
||||||
|
(?P<year>%s) # Year
|
||||||
|
((\s|,\s|\s(%s))?\s*(%s))?
|
||||||
|
)
|
||||||
|
''' % (day_names, re_ordinal, month_names, re_year, re_separator, re_time),
|
||||||
|
(re.VERBOSE | re.IGNORECASE)
|
||||||
|
),
|
||||||
|
lambda m, base_date: datetime(
|
||||||
|
int(m.group('year') if m.group('year') else base_date.year),
|
||||||
|
HASHMONTHS[m.group('month').strip().lower()],
|
||||||
|
int(m.group('day') if m.group('day') else 1),
|
||||||
|
) + timedelta(**convert_time_to_hour_minute(
|
||||||
|
m.group('hour'),
|
||||||
|
m.group('minute'),
|
||||||
|
m.group('convention')
|
||||||
|
))
|
||||||
|
),
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(
|
||||||
|
((?P<dow>%s)[,\s][-\s]*)? #Matches Monday, Jan 12 2012, Jan 12 2012 etc
|
||||||
|
(?P<month>%s) # Matches any month name
|
||||||
|
[-\s] # Space
|
||||||
|
((?P<day>\d{1,2})) # Matches a digit
|
||||||
|
(%s)?
|
||||||
|
([-\s](?P<year>%s))? # Year
|
||||||
|
((\s|,\s|\s(%s))?\s*(%s))?
|
||||||
|
)
|
||||||
|
''' % (day_names, month_names, re_ordinal, re_year, re_separator, re_time),
|
||||||
|
(re.VERBOSE | re.IGNORECASE)
|
||||||
|
),
|
||||||
|
lambda m, base_date: datetime(
|
||||||
|
int(m.group('year') if m.group('year') else base_date.year),
|
||||||
|
HASHMONTHS[m.group('month').strip().lower()],
|
||||||
|
int(m.group('day') if m.group('day') else 1)
|
||||||
|
) + timedelta(**convert_time_to_hour_minute(
|
||||||
|
m.group('hour'),
|
||||||
|
m.group('minute'),
|
||||||
|
m.group('convention')
|
||||||
|
))
|
||||||
|
),
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(
|
||||||
|
(?P<month>%s) # Matches any month name
|
||||||
|
[-\s] # One or more space
|
||||||
|
(?P<day>\d{1,2}) # Matches a digit
|
||||||
|
(%s)?
|
||||||
|
[-\s]\s*?
|
||||||
|
(?P<year>%s) # Year
|
||||||
|
((\s|,\s|\s(%s))?\s*(%s))?
|
||||||
|
)
|
||||||
|
''' % (month_names, re_ordinal, re_year, re_separator, re_time),
|
||||||
|
(re.VERBOSE | re.IGNORECASE)
|
||||||
|
),
|
||||||
|
lambda m, base_date: datetime(
|
||||||
|
int(m.group('year') if m.group('year') else base_date.year),
|
||||||
|
HASHMONTHS[m.group('month').strip().lower()],
|
||||||
|
int(m.group('day') if m.group('day') else 1),
|
||||||
|
) + timedelta(**convert_time_to_hour_minute(
|
||||||
|
m.group('hour'),
|
||||||
|
m.group('minute'),
|
||||||
|
m.group('convention')
|
||||||
|
))
|
||||||
|
),
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(
|
||||||
|
((?P<number>\d+|(%s[-\s]?)+)\s)? # Matches any number or string 25 or twenty five
|
||||||
|
(?P<unit>%s)s?\s # Matches days, months, years, weeks, minutes
|
||||||
|
(?P<duration>%s) # before, after, earlier, later, ago, from now
|
||||||
|
(\s*(?P<base_time>(%s)))?
|
||||||
|
((\s|,\s|\s(%s))?\s*(%s))?
|
||||||
|
)
|
||||||
|
''' % (numbers, re_dmy, re_duration, day_nearest_names, re_separator, re_time),
|
||||||
|
(re.VERBOSE | re.IGNORECASE)
|
||||||
|
),
|
||||||
|
lambda m, base_date: date_from_duration(
|
||||||
|
base_date,
|
||||||
|
m.group('number'),
|
||||||
|
m.group('unit').lower(),
|
||||||
|
m.group('duration').lower(),
|
||||||
|
m.group('base_time')
|
||||||
|
) + timedelta(**convert_time_to_hour_minute(
|
||||||
|
m.group('hour'),
|
||||||
|
m.group('minute'),
|
||||||
|
m.group('convention')
|
||||||
|
))
|
||||||
|
),
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(
|
||||||
|
(?P<ordinal>%s) # First quarter of 2014
|
||||||
|
\s+
|
||||||
|
quarter\sof
|
||||||
|
\s+
|
||||||
|
(?P<year>%s)
|
||||||
|
)
|
||||||
|
''' % (re_ordinal, re_year),
|
||||||
|
(re.VERBOSE | re.IGNORECASE)
|
||||||
|
),
|
||||||
|
lambda m, base_date: date_from_quarter(
|
||||||
|
base_date,
|
||||||
|
HASHORDINALS[m.group('ordinal').lower()],
|
||||||
|
int(m.group('year') if m.group('year') else base_date.year)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(
|
||||||
|
(?P<ordinal_value>\d+)
|
||||||
|
(?P<ordinal>%s) # 1st January 2012
|
||||||
|
((\s|,\s|\s(%s))?\s*)?
|
||||||
|
(?P<month>%s)
|
||||||
|
([,\s]\s*(?P<year>%s))?
|
||||||
|
)
|
||||||
|
''' % (re_ordinal, re_separator, month_names, re_year),
|
||||||
|
(re.VERBOSE | re.IGNORECASE)
|
||||||
|
),
|
||||||
|
lambda m, base_date: datetime(
|
||||||
|
int(m.group('year') if m.group('year') else base_date.year),
|
||||||
|
int(HASHMONTHS[m.group('month').lower()] if m.group('month') else 1),
|
||||||
|
int(m.group('ordinal_value') if m.group('ordinal_value') else 1),
|
||||||
|
)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(
|
||||||
|
(?P<month>%s)
|
||||||
|
\s+
|
||||||
|
(?P<ordinal_value>\d+)
|
||||||
|
(?P<ordinal>%s) # January 1st 2012
|
||||||
|
([,\s]\s*(?P<year>%s))?
|
||||||
|
)
|
||||||
|
''' % (month_names, re_ordinal, re_year),
|
||||||
|
(re.VERBOSE | re.IGNORECASE)
|
||||||
|
),
|
||||||
|
lambda m, base_date: datetime(
|
||||||
|
int(m.group('year') if m.group('year') else base_date.year),
|
||||||
|
int(HASHMONTHS[m.group('month').lower()] if m.group('month') else 1),
|
||||||
|
int(m.group('ordinal_value') if m.group('ordinal_value') else 1),
|
||||||
|
)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(?P<time>%s) # this, next, following, previous, last
|
||||||
|
\s+
|
||||||
|
((?P<number>\d+|(%s[-\s]?)+)\s)?
|
||||||
|
(?P<dmy>%s) # year, day, week, month, night, minute, min
|
||||||
|
((\s|,\s|\s(%s))?\s*(%s))?
|
||||||
|
''' % (re_timeframe, numbers, re_dmy, re_separator, re_time),
|
||||||
|
(re.VERBOSE | re.IGNORECASE),
|
||||||
|
),
|
||||||
|
lambda m, base_date: date_from_relative_week_year(
|
||||||
|
base_date,
|
||||||
|
m.group('time'),
|
||||||
|
m.group('dmy'),
|
||||||
|
m.group('number')
|
||||||
|
) + timedelta(**convert_time_to_hour_minute(
|
||||||
|
m.group('hour'),
|
||||||
|
m.group('minute'),
|
||||||
|
m.group('convention')
|
||||||
|
))
|
||||||
|
),
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(?P<time>%s) # this, next, following, previous, last
|
||||||
|
\s+
|
||||||
|
(?P<dow>%s) # mon - fri
|
||||||
|
((\s|,\s|\s(%s))?\s*(%s))?
|
||||||
|
''' % (re_timeframe, day_names, re_separator, re_time),
|
||||||
|
(re.VERBOSE | re.IGNORECASE),
|
||||||
|
),
|
||||||
|
lambda m, base_date: date_from_relative_day(
|
||||||
|
base_date,
|
||||||
|
m.group('time'),
|
||||||
|
m.group('dow')
|
||||||
|
) + timedelta(**convert_time_to_hour_minute(
|
||||||
|
m.group('hour'),
|
||||||
|
m.group('minute'),
|
||||||
|
m.group('convention')
|
||||||
|
))
|
||||||
|
),
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(
|
||||||
|
(?P<day>\d{1,2}) # Day, Month
|
||||||
|
(%s)
|
||||||
|
[-\s] # One or more space
|
||||||
|
(?P<month>%s)
|
||||||
|
)
|
||||||
|
''' % (re_ordinal, month_names),
|
||||||
|
(re.VERBOSE | re.IGNORECASE)
|
||||||
|
),
|
||||||
|
lambda m, base_date: datetime(
|
||||||
|
base_date.year,
|
||||||
|
HASHMONTHS[m.group('month').strip().lower()],
|
||||||
|
int(m.group('day') if m.group('day') else 1)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(
|
||||||
|
(?P<month>%s) # Month, day
|
||||||
|
[-\s] # One or more space
|
||||||
|
((?P<day>\d{1,2})\b) # Matches a digit January 12
|
||||||
|
(%s)?
|
||||||
|
)
|
||||||
|
''' % (month_names, re_ordinal),
|
||||||
|
(re.VERBOSE | re.IGNORECASE)
|
||||||
|
),
|
||||||
|
lambda m, base_date: datetime(
|
||||||
|
base_date.year,
|
||||||
|
HASHMONTHS[m.group('month').strip().lower()],
|
||||||
|
int(m.group('day') if m.group('day') else 1)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(
|
||||||
|
(?P<month>%s) # Month, year
|
||||||
|
[-\s] # One or more space
|
||||||
|
((?P<year>\d{1,4})\b) # Matches a digit January 12
|
||||||
|
)
|
||||||
|
''' % (month_names),
|
||||||
|
(re.VERBOSE | re.IGNORECASE)
|
||||||
|
),
|
||||||
|
lambda m, base_date: datetime(
|
||||||
|
int(m.group('year')),
|
||||||
|
HASHMONTHS[m.group('month').strip().lower()],
|
||||||
|
1
|
||||||
|
)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(
|
||||||
|
(?P<month>\d{1,2}) # MM/DD or MM/DD/YYYY
|
||||||
|
/
|
||||||
|
((?P<day>\d{1,2}))
|
||||||
|
(/(?P<year>%s))?
|
||||||
|
)
|
||||||
|
''' % (re_year),
|
||||||
|
(re.VERBOSE | re.IGNORECASE)
|
||||||
|
),
|
||||||
|
lambda m, base_date: datetime(
|
||||||
|
int(m.group('year') if m.group('year') else base_date.year),
|
||||||
|
int(m.group('month').strip()),
|
||||||
|
int(m.group('day'))
|
||||||
|
)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(?P<adverb>%s) # today, yesterday, tomorrow, tonight
|
||||||
|
((\s|,\s|\s(%s))?\s*(%s))?
|
||||||
|
''' % (day_nearest_names, re_separator, re_time),
|
||||||
|
(re.VERBOSE | re.IGNORECASE)
|
||||||
|
),
|
||||||
|
lambda m, base_date: date_from_adverb(
|
||||||
|
base_date,
|
||||||
|
m.group('adverb')
|
||||||
|
) + timedelta(**convert_time_to_hour_minute(
|
||||||
|
m.group('hour'),
|
||||||
|
m.group('minute'),
|
||||||
|
m.group('convention')
|
||||||
|
))
|
||||||
|
),
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(?P<named_day>%s) # Mon - Sun
|
||||||
|
''' % (day_names),
|
||||||
|
(re.VERBOSE | re.IGNORECASE)
|
||||||
|
),
|
||||||
|
lambda m, base_date: this_week_day(
|
||||||
|
base_date,
|
||||||
|
HASHWEEKDAYS[m.group('named_day').lower()]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(?P<year>%s) # Year
|
||||||
|
''' % (re_year),
|
||||||
|
(re.VERBOSE | re.IGNORECASE)
|
||||||
|
),
|
||||||
|
lambda m, base_date: datetime(int(m.group('year')), 1, 1)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(?P<month>%s) # Month
|
||||||
|
''' % (month_names_long),
|
||||||
|
(re.VERBOSE | re.IGNORECASE)
|
||||||
|
),
|
||||||
|
lambda m, base_date: datetime(
|
||||||
|
base_date.year,
|
||||||
|
HASHMONTHS[m.group('month').lower()],
|
||||||
|
1
|
||||||
|
)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(%s) # Matches time 12:00
|
||||||
|
''' % (re_time),
|
||||||
|
(re.VERBOSE | re.IGNORECASE),
|
||||||
|
),
|
||||||
|
lambda m, base_date: datetime(
|
||||||
|
base_date.year,
|
||||||
|
base_date.month,
|
||||||
|
base_date.day
|
||||||
|
) + timedelta(**convert_time_to_hour_minute(
|
||||||
|
m.group('hour'),
|
||||||
|
m.group('minute'),
|
||||||
|
m.group('convention')
|
||||||
|
))
|
||||||
|
),
|
||||||
|
(
|
||||||
|
re.compile(
|
||||||
|
r'''
|
||||||
|
(
|
||||||
|
(?P<hour>\d+) # Matches 12 hours, 2 hrs
|
||||||
|
\s+
|
||||||
|
(%s)
|
||||||
|
)
|
||||||
|
''' % ('|'.join(hour_variations)),
|
||||||
|
(re.VERBOSE | re.IGNORECASE),
|
||||||
|
),
|
||||||
|
lambda m, base_date: datetime(
|
||||||
|
base_date.year,
|
||||||
|
base_date.month,
|
||||||
|
base_date.day,
|
||||||
|
int(m.group('hour'))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def hashnum(number):
|
||||||
|
"""
|
||||||
|
Hash of numbers
|
||||||
|
Append more number to modify your match
|
||||||
|
"""
|
||||||
|
if re.match(r'one|^a\b', number, re.IGNORECASE):
|
||||||
|
return 1
|
||||||
|
if re.match(r'two', number, re.IGNORECASE):
|
||||||
|
return 2
|
||||||
|
if re.match(r'three', number, re.IGNORECASE):
|
||||||
|
return 3
|
||||||
|
if re.match(r'four', number, re.IGNORECASE):
|
||||||
|
return 4
|
||||||
|
if re.match(r'five', number, re.IGNORECASE):
|
||||||
|
return 5
|
||||||
|
if re.match(r'six', number, re.IGNORECASE):
|
||||||
|
return 6
|
||||||
|
if re.match(r'seven', number, re.IGNORECASE):
|
||||||
|
return 7
|
||||||
|
if re.match(r'eight', number, re.IGNORECASE):
|
||||||
|
return 8
|
||||||
|
if re.match(r'nine', number, re.IGNORECASE):
|
||||||
|
return 9
|
||||||
|
if re.match(r'ten', number, re.IGNORECASE):
|
||||||
|
return 10
|
||||||
|
if re.match(r'eleven', number, re.IGNORECASE):
|
||||||
|
return 11
|
||||||
|
if re.match(r'twelve', number, re.IGNORECASE):
|
||||||
|
return 12
|
||||||
|
if re.match(r'thirteen', number, re.IGNORECASE):
|
||||||
|
return 13
|
||||||
|
if re.match(r'fourteen', number, re.IGNORECASE):
|
||||||
|
return 14
|
||||||
|
if re.match(r'fifteen', number, re.IGNORECASE):
|
||||||
|
return 15
|
||||||
|
if re.match(r'sixteen', number, re.IGNORECASE):
|
||||||
|
return 16
|
||||||
|
if re.match(r'seventeen', number, re.IGNORECASE):
|
||||||
|
return 17
|
||||||
|
if re.match(r'eighteen', number, re.IGNORECASE):
|
||||||
|
return 18
|
||||||
|
if re.match(r'nineteen', number, re.IGNORECASE):
|
||||||
|
return 19
|
||||||
|
if re.match(r'twenty', number, re.IGNORECASE):
|
||||||
|
return 20
|
||||||
|
if re.match(r'thirty', number, re.IGNORECASE):
|
||||||
|
return 30
|
||||||
|
if re.match(r'forty', number, re.IGNORECASE):
|
||||||
|
return 40
|
||||||
|
if re.match(r'fifty', number, re.IGNORECASE):
|
||||||
|
return 50
|
||||||
|
if re.match(r'sixty', number, re.IGNORECASE):
|
||||||
|
return 60
|
||||||
|
if re.match(r'seventy', number, re.IGNORECASE):
|
||||||
|
return 70
|
||||||
|
if re.match(r'eighty', number, re.IGNORECASE):
|
||||||
|
return 80
|
||||||
|
if re.match(r'ninety', number, re.IGNORECASE):
|
||||||
|
return 90
|
||||||
|
if re.match(r'hundred', number, re.IGNORECASE):
|
||||||
|
return 100
|
||||||
|
if re.match(r'thousand', number, re.IGNORECASE):
|
||||||
|
return 1000
|
||||||
|
|
||||||
|
|
||||||
|
def convert_string_to_number(value):
|
||||||
|
"""
|
||||||
|
Convert strings to numbers
|
||||||
|
"""
|
||||||
|
if value is None:
|
||||||
|
return 1
|
||||||
|
if isinstance(value, int):
|
||||||
|
return value
|
||||||
|
if value.isdigit():
|
||||||
|
return int(value)
|
||||||
|
num_list = map(lambda s: hashnum(s), re.findall(numbers + '+', value, re.IGNORECASE))
|
||||||
|
return sum(num_list)
|
||||||
|
|
||||||
|
|
||||||
|
def convert_time_to_hour_minute(hour, minute, convention):
|
||||||
|
"""
|
||||||
|
Convert time to hour, minute
|
||||||
|
"""
|
||||||
|
if hour is None:
|
||||||
|
hour = 0
|
||||||
|
if minute is None:
|
||||||
|
minute = 0
|
||||||
|
if convention is None:
|
||||||
|
convention = 'am'
|
||||||
|
|
||||||
|
hour = int(hour)
|
||||||
|
minute = int(minute)
|
||||||
|
|
||||||
|
if convention == 'pm':
|
||||||
|
hour += 12
|
||||||
|
|
||||||
|
return {'hours': hour, 'minutes': minute}
|
||||||
|
|
||||||
|
|
||||||
|
def date_from_quarter(base_date, ordinal, year):
|
||||||
|
"""
|
||||||
|
Extract date from quarter of a year
|
||||||
|
"""
|
||||||
|
interval = 3
|
||||||
|
month_start = interval * (ordinal - 1)
|
||||||
|
if month_start < 0:
|
||||||
|
month_start = 9
|
||||||
|
month_end = month_start + interval
|
||||||
|
if month_start == 0:
|
||||||
|
month_start = 1
|
||||||
|
return [
|
||||||
|
datetime(year, month_start, 1),
|
||||||
|
datetime(year, month_end, calendar.monthrange(year, month_end)[1])
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def date_from_relative_day(base_date, time, dow):
|
||||||
|
"""
|
||||||
|
Converts relative day to time
|
||||||
|
Ex: this tuesday, last tuesday
|
||||||
|
"""
|
||||||
|
# Reset date to start of the day
|
||||||
|
base_date = datetime(base_date.year, base_date.month, base_date.day)
|
||||||
|
time = time.lower()
|
||||||
|
dow = dow.lower()
|
||||||
|
if time == 'this' or time == 'coming':
|
||||||
|
# Else day of week
|
||||||
|
num = HASHWEEKDAYS[dow]
|
||||||
|
return this_week_day(base_date, num)
|
||||||
|
elif time == 'last' or time == 'previous':
|
||||||
|
# Else day of week
|
||||||
|
num = HASHWEEKDAYS[dow]
|
||||||
|
return previous_week_day(base_date, num)
|
||||||
|
elif time == 'next' or time == 'following':
|
||||||
|
# Else day of week
|
||||||
|
num = HASHWEEKDAYS[dow]
|
||||||
|
return next_week_day(base_date, num)
|
||||||
|
|
||||||
|
|
||||||
|
def date_from_relative_week_year(base_date, time, dow, ordinal=1):
|
||||||
|
"""
|
||||||
|
Converts relative day to time
|
||||||
|
Eg. this tuesday, last tuesday
|
||||||
|
"""
|
||||||
|
# If there is an ordinal (next 3 weeks) => return a start and end range
|
||||||
|
# Reset date to start of the day
|
||||||
|
relative_date = datetime(base_date.year, base_date.month, base_date.day)
|
||||||
|
if dow in year_variations:
|
||||||
|
if time == 'this' or time == 'coming':
|
||||||
|
return datetime(relative_date.year, 1, 1)
|
||||||
|
elif time == 'last' or time == 'previous':
|
||||||
|
return datetime(relative_date.year - 1, relative_date.month, 1)
|
||||||
|
elif time == 'next' or time == 'following':
|
||||||
|
return relative_date + timedelta(relative_date.year + 1)
|
||||||
|
elif time == 'end of the':
|
||||||
|
return datetime(relative_date.year, 12, 31)
|
||||||
|
elif dow in month_variations:
|
||||||
|
if time == 'this':
|
||||||
|
return datetime(relative_date.year, relative_date.month, relative_date.day)
|
||||||
|
elif time == 'last' or time == 'previous':
|
||||||
|
return datetime(relative_date.year, relative_date.month - 1, relative_date.day)
|
||||||
|
elif time == 'next' or time == 'following':
|
||||||
|
return datetime(relative_date.year, relative_date.month + 1, relative_date.day)
|
||||||
|
elif time == 'end of the':
|
||||||
|
return datetime(
|
||||||
|
relative_date.year,
|
||||||
|
relative_date.month,
|
||||||
|
calendar.monthrange(relative_date.year, relative_date.month)[1]
|
||||||
|
)
|
||||||
|
elif dow in week_variations:
|
||||||
|
if time == 'this':
|
||||||
|
return relative_date - timedelta(days=relative_date.weekday())
|
||||||
|
elif time == 'last' or time == 'previous':
|
||||||
|
return relative_date - timedelta(weeks=1)
|
||||||
|
elif time == 'next' or time == 'following':
|
||||||
|
return relative_date + timedelta(weeks=1)
|
||||||
|
elif time == 'end of the':
|
||||||
|
day_of_week = base_date.weekday()
|
||||||
|
return day_of_week + timedelta(days=6 - relative_date.weekday())
|
||||||
|
elif dow in day_variations:
|
||||||
|
if time == 'this':
|
||||||
|
return relative_date
|
||||||
|
elif time == 'last' or time == 'previous':
|
||||||
|
return relative_date - timedelta(days=1)
|
||||||
|
elif time == 'next' or time == 'following':
|
||||||
|
return relative_date + timedelta(days=1)
|
||||||
|
elif time == 'end of the':
|
||||||
|
return datetime(relative_date.year, relative_date.month, relative_date.day, 23, 59, 59)
|
||||||
|
|
||||||
|
|
||||||
|
def date_from_adverb(base_date, name):
|
||||||
|
"""
|
||||||
|
Convert Day adverbs to dates
|
||||||
|
Tomorrow => Date
|
||||||
|
Today => Date
|
||||||
|
"""
|
||||||
|
# Reset date to start of the day
|
||||||
|
adverb_date = datetime(base_date.year, base_date.month, base_date.day)
|
||||||
|
if name == 'today' or name == 'tonite' or name == 'tonight':
|
||||||
|
return adverb_date.today()
|
||||||
|
elif name == 'yesterday':
|
||||||
|
return adverb_date - timedelta(days=1)
|
||||||
|
elif name == 'tomorrow' or name == 'tom':
|
||||||
|
return adverb_date + timedelta(days=1)
|
||||||
|
|
||||||
|
|
||||||
|
def date_from_duration(base_date, number_as_string, unit, duration, base_time=None):
|
||||||
|
"""
|
||||||
|
Find dates from duration
|
||||||
|
Eg: 20 days from now
|
||||||
|
Currently does not support strings like "20 days from last monday".
|
||||||
|
"""
|
||||||
|
# Check if query is `2 days before yesterday` or `day before yesterday`
|
||||||
|
if base_time is not None:
|
||||||
|
base_date = date_from_adverb(base_date, base_time)
|
||||||
|
num = convert_string_to_number(number_as_string)
|
||||||
|
if unit in day_variations:
|
||||||
|
args = {'days': num}
|
||||||
|
elif unit in minute_variations:
|
||||||
|
args = {'minutes': num}
|
||||||
|
elif unit in week_variations:
|
||||||
|
args = {'weeks': num}
|
||||||
|
elif unit in month_variations:
|
||||||
|
args = {'days': 365 * num / 12}
|
||||||
|
elif unit in year_variations:
|
||||||
|
args = {'years': num}
|
||||||
|
if duration == 'ago' or duration == 'before' or duration == 'earlier':
|
||||||
|
if 'years' in args:
|
||||||
|
return datetime(base_date.year - args['years'], base_date.month, base_date.day)
|
||||||
|
return base_date - timedelta(**args)
|
||||||
|
elif duration == 'after' or duration == 'later' or duration == 'from now':
|
||||||
|
if 'years' in args:
|
||||||
|
return datetime(base_date.year + args['years'], base_date.month, base_date.day)
|
||||||
|
return base_date + timedelta(**args)
|
||||||
|
|
||||||
|
|
||||||
|
def this_week_day(base_date, weekday):
|
||||||
|
"""
|
||||||
|
Finds coming weekday
|
||||||
|
"""
|
||||||
|
day_of_week = base_date.weekday()
|
||||||
|
# If today is Tuesday and the query is `this monday`
|
||||||
|
# We should output the next_week monday
|
||||||
|
if day_of_week > weekday:
|
||||||
|
return next_week_day(base_date, weekday)
|
||||||
|
start_of_this_week = base_date - timedelta(days=day_of_week + 1)
|
||||||
|
day = start_of_this_week + timedelta(days=1)
|
||||||
|
while day.weekday() != weekday:
|
||||||
|
day = day + timedelta(days=1)
|
||||||
|
return day
|
||||||
|
|
||||||
|
|
||||||
|
def previous_week_day(base_date, weekday):
|
||||||
|
"""
|
||||||
|
Finds previous weekday
|
||||||
|
"""
|
||||||
|
day = base_date - timedelta(days=1)
|
||||||
|
while day.weekday() != weekday:
|
||||||
|
day = day - timedelta(days=1)
|
||||||
|
return day
|
||||||
|
|
||||||
|
|
||||||
|
def next_week_day(base_date, weekday):
|
||||||
|
"""
|
||||||
|
Finds next weekday
|
||||||
|
"""
|
||||||
|
day_of_week = base_date.weekday()
|
||||||
|
end_of_this_week = base_date + timedelta(days=6 - day_of_week)
|
||||||
|
day = end_of_this_week + timedelta(days=1)
|
||||||
|
while day.weekday() != weekday:
|
||||||
|
day = day + timedelta(days=1)
|
||||||
|
return day
|
||||||
|
|
||||||
|
|
||||||
|
# Mapping of Month name and Value
|
||||||
|
HASHMONTHS = {
|
||||||
|
'january': 1,
|
||||||
|
'jan': 1,
|
||||||
|
'february': 2,
|
||||||
|
'feb': 2,
|
||||||
|
'march': 3,
|
||||||
|
'mar': 3,
|
||||||
|
'april': 4,
|
||||||
|
'apr': 4,
|
||||||
|
'may': 5,
|
||||||
|
'june': 6,
|
||||||
|
'jun': 6,
|
||||||
|
'july': 7,
|
||||||
|
'jul': 7,
|
||||||
|
'august': 8,
|
||||||
|
'aug': 8,
|
||||||
|
'september': 9,
|
||||||
|
'sep': 9,
|
||||||
|
'october': 10,
|
||||||
|
'oct': 10,
|
||||||
|
'november': 11,
|
||||||
|
'nov': 11,
|
||||||
|
'december': 12,
|
||||||
|
'dec': 12
|
||||||
|
}
|
||||||
|
|
||||||
|
# Days to number mapping
|
||||||
|
HASHWEEKDAYS = {
|
||||||
|
'monday': 0,
|
||||||
|
'mon': 0,
|
||||||
|
'tuesday': 1,
|
||||||
|
'tue': 1,
|
||||||
|
'wednesday': 2,
|
||||||
|
'wed': 2,
|
||||||
|
'thursday': 3,
|
||||||
|
'thu': 3,
|
||||||
|
'friday': 4,
|
||||||
|
'fri': 4,
|
||||||
|
'saturday': 5,
|
||||||
|
'sat': 5,
|
||||||
|
'sunday': 6,
|
||||||
|
'sun': 6
|
||||||
|
}
|
||||||
|
|
||||||
|
# Ordinal to number
|
||||||
|
HASHORDINALS = {
|
||||||
|
'first': 1,
|
||||||
|
'second': 2,
|
||||||
|
'third': 3,
|
||||||
|
'fourth': 4,
|
||||||
|
'forth': 4,
|
||||||
|
'last': -1
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def datetime_parsing(text, base_date=datetime.now()):
|
||||||
|
"""
|
||||||
|
Extract datetime objects from a string of text.
|
||||||
|
"""
|
||||||
|
matches = []
|
||||||
|
found_array = []
|
||||||
|
|
||||||
|
# Find the position in the string
|
||||||
|
for expression, function in regex:
|
||||||
|
for match in expression.finditer(text):
|
||||||
|
matches.append((match.group(), function(match, base_date), match.span()))
|
||||||
|
|
||||||
|
# Wrap the matched text with TAG element to prevent nested selections
|
||||||
|
for match, value, spans in matches:
|
||||||
|
subn = re.subn(
|
||||||
|
'(?!<TAG[^>]*?>)' + match + '(?![^<]*?</TAG>)', '<TAG>' + match + '</TAG>', text
|
||||||
|
)
|
||||||
|
text = subn[0]
|
||||||
|
is_substituted = subn[1]
|
||||||
|
if is_substituted != 0:
|
||||||
|
found_array.append((match, value, spans))
|
||||||
|
|
||||||
|
# To preserve order of the match, sort based on the start position
|
||||||
|
return sorted(found_array, key=lambda match: match and match[2][0])
|
@ -0,0 +1,60 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Statement pre-processors.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def clean_whitespace(chatbot, statement):
|
||||||
|
"""
|
||||||
|
Remove any consecutive whitespace characters from the statement text.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Replace linebreaks and tabs with spaces
|
||||||
|
statement.text = statement.text.replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')
|
||||||
|
|
||||||
|
# Remove any leeding or trailing whitespace
|
||||||
|
statement.text = statement.text.strip()
|
||||||
|
|
||||||
|
# Remove consecutive spaces
|
||||||
|
statement.text = re.sub(' +', ' ', statement.text)
|
||||||
|
|
||||||
|
return statement
|
||||||
|
|
||||||
|
|
||||||
|
def unescape_html(chatbot, statement):
|
||||||
|
"""
|
||||||
|
Convert escaped html characters into unescaped html characters.
|
||||||
|
For example: "<b>" becomes "<b>".
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Replace HTML escape characters
|
||||||
|
if sys.version_info[0] < 3:
|
||||||
|
from HTMLParser import HTMLParser
|
||||||
|
html = HTMLParser()
|
||||||
|
else:
|
||||||
|
import html
|
||||||
|
|
||||||
|
statement.text = html.unescape(statement.text)
|
||||||
|
|
||||||
|
return statement
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_ascii(chatbot, statement):
|
||||||
|
"""
|
||||||
|
Converts unicode characters to ASCII character equivalents.
|
||||||
|
For example: "på fédéral" becomes "pa federal".
|
||||||
|
"""
|
||||||
|
import unicodedata
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Normalize unicode characters
|
||||||
|
if sys.version_info[0] < 3:
|
||||||
|
statement.text = unicode(statement.text) # NOQA
|
||||||
|
|
||||||
|
text = unicodedata.normalize('NFKD', statement.text)
|
||||||
|
text = text.encode('ascii', 'ignore').decode('utf-8')
|
||||||
|
|
||||||
|
statement.text = str(text)
|
||||||
|
return statement
|
@ -0,0 +1,71 @@
|
|||||||
|
"""
|
||||||
|
Response selection methods determines which response should be used in
|
||||||
|
the event that multiple responses are generated within a logic adapter.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
def get_most_frequent_response(input_statement, response_list):
|
||||||
|
"""
|
||||||
|
:param input_statement: A statement, that closely matches an input to the chat bot.
|
||||||
|
:type input_statement: Statement
|
||||||
|
|
||||||
|
:param response_list: A list of statement options to choose a response from.
|
||||||
|
:type response_list: list
|
||||||
|
|
||||||
|
:return: The response statement with the greatest number of occurrences.
|
||||||
|
:rtype: Statement
|
||||||
|
"""
|
||||||
|
matching_response = None
|
||||||
|
occurrence_count = -1
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.info(u'Selecting response with greatest number of occurrences.')
|
||||||
|
|
||||||
|
for statement in response_list:
|
||||||
|
count = statement.get_response_count(input_statement)
|
||||||
|
|
||||||
|
# Keep the more common statement
|
||||||
|
if count >= occurrence_count:
|
||||||
|
matching_response = statement
|
||||||
|
occurrence_count = count
|
||||||
|
|
||||||
|
# Choose the most commonly occuring matching response
|
||||||
|
return matching_response
|
||||||
|
|
||||||
|
|
||||||
|
def get_first_response(input_statement, response_list):
|
||||||
|
"""
|
||||||
|
:param input_statement: A statement, that closely matches an input to the chat bot.
|
||||||
|
:type input_statement: Statement
|
||||||
|
|
||||||
|
:param response_list: A list of statement options to choose a response from.
|
||||||
|
:type response_list: list
|
||||||
|
|
||||||
|
:return: Return the first statement in the response list.
|
||||||
|
:rtype: Statement
|
||||||
|
"""
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.info(u'Selecting first response from list of {} options.'.format(
|
||||||
|
len(response_list)
|
||||||
|
))
|
||||||
|
return response_list[0]
|
||||||
|
|
||||||
|
|
||||||
|
def get_random_response(input_statement, response_list):
|
||||||
|
"""
|
||||||
|
:param input_statement: A statement, that closely matches an input to the chat bot.
|
||||||
|
:type input_statement: Statement
|
||||||
|
|
||||||
|
:param response_list: A list of statement options to choose a response from.
|
||||||
|
:type response_list: list
|
||||||
|
|
||||||
|
:return: Choose a random response from the selection.
|
||||||
|
:rtype: Statement
|
||||||
|
"""
|
||||||
|
from random import choice
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.info(u'Selecting a response from list of {} options.'.format(
|
||||||
|
len(response_list)
|
||||||
|
))
|
||||||
|
return choice(response_list)
|
@ -0,0 +1,12 @@
|
|||||||
|
from .storage_adapter import StorageAdapter
|
||||||
|
from .django_storage import DjangoStorageAdapter
|
||||||
|
from .mongodb import MongoDatabaseAdapter
|
||||||
|
from .sql_storage import SQLStorageAdapter
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = (
|
||||||
|
'StorageAdapter',
|
||||||
|
'DjangoStorageAdapter',
|
||||||
|
'MongoDatabaseAdapter',
|
||||||
|
'SQLStorageAdapter',
|
||||||
|
)
|
@ -0,0 +1,220 @@
|
|||||||
|
from chatterbot.storage import StorageAdapter
|
||||||
|
from chatterbot import constants
|
||||||
|
|
||||||
|
|
||||||
|
class DjangoStorageAdapter(StorageAdapter):
|
||||||
|
"""
|
||||||
|
Storage adapter that allows ChatterBot to interact with
|
||||||
|
Django storage backends.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(DjangoStorageAdapter, self).__init__(**kwargs)
|
||||||
|
|
||||||
|
self.adapter_supports_queries = False
|
||||||
|
self.django_app_name = kwargs.get(
|
||||||
|
'django_app_name',
|
||||||
|
constants.DEFAULT_DJANGO_APP_NAME
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_statement_model(self):
|
||||||
|
from django.apps import apps
|
||||||
|
return apps.get_model(self.django_app_name, 'Statement')
|
||||||
|
|
||||||
|
def get_response_model(self):
|
||||||
|
from django.apps import apps
|
||||||
|
return apps.get_model(self.django_app_name, 'Response')
|
||||||
|
|
||||||
|
def get_conversation_model(self):
|
||||||
|
from django.apps import apps
|
||||||
|
return apps.get_model(self.django_app_name, 'Conversation')
|
||||||
|
|
||||||
|
def get_tag_model(self):
|
||||||
|
from django.apps import apps
|
||||||
|
return apps.get_model(self.django_app_name, 'Tag')
|
||||||
|
|
||||||
|
def count(self):
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
return Statement.objects.count()
|
||||||
|
|
||||||
|
def find(self, statement_text):
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
try:
|
||||||
|
return Statement.objects.get(text=statement_text)
|
||||||
|
except Statement.DoesNotExist as e:
|
||||||
|
self.logger.info(str(e))
|
||||||
|
return None
|
||||||
|
|
||||||
|
def filter(self, **kwargs):
|
||||||
|
"""
|
||||||
|
Returns a list of statements in the database
|
||||||
|
that match the parameters specified.
|
||||||
|
"""
|
||||||
|
from django.db.models import Q
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
|
||||||
|
order = kwargs.pop('order_by', None)
|
||||||
|
|
||||||
|
RESPONSE_CONTAINS = 'in_response_to__contains'
|
||||||
|
|
||||||
|
if RESPONSE_CONTAINS in kwargs:
|
||||||
|
value = kwargs[RESPONSE_CONTAINS]
|
||||||
|
del kwargs[RESPONSE_CONTAINS]
|
||||||
|
kwargs['in_response__response__text'] = value
|
||||||
|
|
||||||
|
kwargs_copy = kwargs.copy()
|
||||||
|
|
||||||
|
for kwarg in kwargs_copy:
|
||||||
|
value = kwargs[kwarg]
|
||||||
|
del kwargs[kwarg]
|
||||||
|
kwarg = kwarg.replace('in_response_to', 'in_response')
|
||||||
|
kwargs[kwarg] = value
|
||||||
|
|
||||||
|
if 'in_response' in kwargs:
|
||||||
|
responses = kwargs['in_response']
|
||||||
|
del kwargs['in_response']
|
||||||
|
|
||||||
|
if responses:
|
||||||
|
kwargs['in_response__response__text__in'] = []
|
||||||
|
for response in responses:
|
||||||
|
kwargs['in_response__response__text__in'].append(response)
|
||||||
|
else:
|
||||||
|
kwargs['in_response'] = None
|
||||||
|
|
||||||
|
parameters = {}
|
||||||
|
if 'in_response__response__text' in kwargs:
|
||||||
|
value = kwargs['in_response__response__text']
|
||||||
|
parameters['responses__statement__text'] = value
|
||||||
|
|
||||||
|
statements = Statement.objects.filter(Q(**kwargs) | Q(**parameters))
|
||||||
|
|
||||||
|
if order:
|
||||||
|
statements = statements.order_by(order)
|
||||||
|
|
||||||
|
return statements
|
||||||
|
|
||||||
|
def update(self, statement):
|
||||||
|
"""
|
||||||
|
Update the provided statement.
|
||||||
|
"""
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
Response = self.get_model('response')
|
||||||
|
|
||||||
|
response_statement_cache = statement.response_statement_cache
|
||||||
|
|
||||||
|
statement, created = Statement.objects.get_or_create(text=statement.text)
|
||||||
|
statement.extra_data = getattr(statement, 'extra_data', '')
|
||||||
|
statement.save()
|
||||||
|
|
||||||
|
for _response_statement in response_statement_cache:
|
||||||
|
|
||||||
|
response_statement, created = Statement.objects.get_or_create(
|
||||||
|
text=_response_statement.text
|
||||||
|
)
|
||||||
|
response_statement.extra_data = getattr(_response_statement, 'extra_data', '')
|
||||||
|
response_statement.save()
|
||||||
|
|
||||||
|
Response.objects.create(
|
||||||
|
statement=response_statement,
|
||||||
|
response=statement
|
||||||
|
)
|
||||||
|
|
||||||
|
return statement
|
||||||
|
|
||||||
|
def get_random(self):
|
||||||
|
"""
|
||||||
|
Returns a random statement from the database
|
||||||
|
"""
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
return Statement.objects.order_by('?').first()
|
||||||
|
|
||||||
|
def remove(self, statement_text):
|
||||||
|
"""
|
||||||
|
Removes the statement that matches the input text.
|
||||||
|
Removes any responses from statements if the response text matches the
|
||||||
|
input text.
|
||||||
|
"""
|
||||||
|
from django.db.models import Q
|
||||||
|
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
Response = self.get_model('response')
|
||||||
|
|
||||||
|
statements = Statement.objects.filter(text=statement_text)
|
||||||
|
|
||||||
|
responses = Response.objects.filter(
|
||||||
|
Q(statement__text=statement_text) | Q(response__text=statement_text)
|
||||||
|
)
|
||||||
|
|
||||||
|
responses.delete()
|
||||||
|
statements.delete()
|
||||||
|
|
||||||
|
def get_latest_response(self, conversation_id):
|
||||||
|
"""
|
||||||
|
Returns the latest response in a conversation if it exists.
|
||||||
|
Returns None if a matching conversation cannot be found.
|
||||||
|
"""
|
||||||
|
Response = self.get_model('response')
|
||||||
|
|
||||||
|
response = Response.objects.filter(
|
||||||
|
conversations__id=conversation_id
|
||||||
|
).order_by(
|
||||||
|
'created_at'
|
||||||
|
).last()
|
||||||
|
|
||||||
|
if not response:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return response.response
|
||||||
|
|
||||||
|
def create_conversation(self):
|
||||||
|
"""
|
||||||
|
Create a new conversation.
|
||||||
|
"""
|
||||||
|
Conversation = self.get_model('conversation')
|
||||||
|
conversation = Conversation.objects.create()
|
||||||
|
return conversation.id
|
||||||
|
|
||||||
|
def add_to_conversation(self, conversation_id, statement, response):
|
||||||
|
"""
|
||||||
|
Add the statement and response to the conversation.
|
||||||
|
"""
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
Response = self.get_model('response')
|
||||||
|
|
||||||
|
first_statement, created = Statement.objects.get_or_create(text=statement.text)
|
||||||
|
first_response, created = Statement.objects.get_or_create(text=response.text)
|
||||||
|
|
||||||
|
response = Response.objects.create(
|
||||||
|
statement=first_statement,
|
||||||
|
response=first_response
|
||||||
|
)
|
||||||
|
|
||||||
|
response.conversations.add(conversation_id)
|
||||||
|
|
||||||
|
def drop(self):
|
||||||
|
"""
|
||||||
|
Remove all data from the database.
|
||||||
|
"""
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
Response = self.get_model('response')
|
||||||
|
Conversation = self.get_model('conversation')
|
||||||
|
Tag = self.get_model('tag')
|
||||||
|
|
||||||
|
Statement.objects.all().delete()
|
||||||
|
Response.objects.all().delete()
|
||||||
|
Conversation.objects.all().delete()
|
||||||
|
Tag.objects.all().delete()
|
||||||
|
|
||||||
|
def get_response_statements(self):
|
||||||
|
"""
|
||||||
|
Return only statements that are in response to another statement.
|
||||||
|
A statement must exist which lists the closest matching statement in the
|
||||||
|
in_response_to field. Otherwise, the logic adapter may find a closest
|
||||||
|
matching statement that does not have a known response.
|
||||||
|
"""
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
Response = self.get_model('response')
|
||||||
|
|
||||||
|
responses = Response.objects.all()
|
||||||
|
|
||||||
|
return Statement.objects.filter(in_response__in=responses)
|
@ -0,0 +1,394 @@
|
|||||||
|
from chatterbot.storage import StorageAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class Query(object):
|
||||||
|
|
||||||
|
def __init__(self, query={}):
|
||||||
|
self.query = query
|
||||||
|
|
||||||
|
def value(self):
|
||||||
|
return self.query.copy()
|
||||||
|
|
||||||
|
def raw(self, data):
|
||||||
|
query = self.query.copy()
|
||||||
|
|
||||||
|
query.update(data)
|
||||||
|
|
||||||
|
return Query(query)
|
||||||
|
|
||||||
|
def statement_text_equals(self, statement_text):
|
||||||
|
query = self.query.copy()
|
||||||
|
|
||||||
|
query['text'] = statement_text
|
||||||
|
|
||||||
|
return Query(query)
|
||||||
|
|
||||||
|
def statement_text_not_in(self, statements):
|
||||||
|
query = self.query.copy()
|
||||||
|
|
||||||
|
if 'text' not in query:
|
||||||
|
query['text'] = {}
|
||||||
|
|
||||||
|
if '$nin' not in query['text']:
|
||||||
|
query['text']['$nin'] = []
|
||||||
|
|
||||||
|
query['text']['$nin'].extend(statements)
|
||||||
|
|
||||||
|
return Query(query)
|
||||||
|
|
||||||
|
def statement_response_list_contains(self, statement_text):
|
||||||
|
query = self.query.copy()
|
||||||
|
|
||||||
|
if 'in_response_to' not in query:
|
||||||
|
query['in_response_to'] = {}
|
||||||
|
|
||||||
|
if '$elemMatch' not in query['in_response_to']:
|
||||||
|
query['in_response_to']['$elemMatch'] = {}
|
||||||
|
|
||||||
|
query['in_response_to']['$elemMatch']['text'] = statement_text
|
||||||
|
|
||||||
|
return Query(query)
|
||||||
|
|
||||||
|
def statement_response_list_equals(self, response_list):
|
||||||
|
query = self.query.copy()
|
||||||
|
|
||||||
|
query['in_response_to'] = response_list
|
||||||
|
|
||||||
|
return Query(query)
|
||||||
|
|
||||||
|
|
||||||
|
class MongoDatabaseAdapter(StorageAdapter):
|
||||||
|
"""
|
||||||
|
The MongoDatabaseAdapter is an interface that allows
|
||||||
|
ChatterBot to store statements in a MongoDB database.
|
||||||
|
|
||||||
|
:keyword database: The name of the database you wish to connect to.
|
||||||
|
:type database: str
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
database='chatterbot-database'
|
||||||
|
|
||||||
|
:keyword database_uri: The URI of a remote instance of MongoDB.
|
||||||
|
:type database_uri: str
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
database_uri='mongodb://example.com:8100/'
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(MongoDatabaseAdapter, self).__init__(**kwargs)
|
||||||
|
from pymongo import MongoClient
|
||||||
|
from pymongo.errors import OperationFailure
|
||||||
|
|
||||||
|
self.database_name = self.kwargs.get(
|
||||||
|
'database', 'chatterbot-database'
|
||||||
|
)
|
||||||
|
self.database_uri = self.kwargs.get(
|
||||||
|
'database_uri', 'mongodb://localhost:27017/'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Use the default host and port
|
||||||
|
self.client = MongoClient(self.database_uri)
|
||||||
|
|
||||||
|
# Increase the sort buffer to 42M if possible
|
||||||
|
try:
|
||||||
|
self.client.admin.command({'setParameter': 1, 'internalQueryExecMaxBlockingSortBytes': 44040192})
|
||||||
|
except OperationFailure:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Specify the name of the database
|
||||||
|
self.database = self.client[self.database_name]
|
||||||
|
|
||||||
|
# The mongo collection of statement documents
|
||||||
|
self.statements = self.database['statements']
|
||||||
|
|
||||||
|
# The mongo collection of conversation documents
|
||||||
|
self.conversations = self.database['conversations']
|
||||||
|
|
||||||
|
# Set a requirement for the text attribute to be unique
|
||||||
|
self.statements.create_index('text', unique=True)
|
||||||
|
|
||||||
|
self.base_query = Query()
|
||||||
|
|
||||||
|
def get_statement_model(self):
|
||||||
|
"""
|
||||||
|
Return the class for the statement model.
|
||||||
|
"""
|
||||||
|
from chatterbot.conversation import Statement
|
||||||
|
|
||||||
|
# Create a storage-aware statement
|
||||||
|
statement = Statement
|
||||||
|
statement.storage = self
|
||||||
|
|
||||||
|
return statement
|
||||||
|
|
||||||
|
def get_response_model(self):
|
||||||
|
"""
|
||||||
|
Return the class for the response model.
|
||||||
|
"""
|
||||||
|
from chatterbot.conversation import Response
|
||||||
|
|
||||||
|
# Create a storage-aware response
|
||||||
|
response = Response
|
||||||
|
response.storage = self
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
def count(self):
|
||||||
|
return self.statements.count()
|
||||||
|
|
||||||
|
def find(self, statement_text):
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
query = self.base_query.statement_text_equals(statement_text)
|
||||||
|
|
||||||
|
values = self.statements.find_one(query.value())
|
||||||
|
|
||||||
|
if not values:
|
||||||
|
return None
|
||||||
|
|
||||||
|
del values['text']
|
||||||
|
|
||||||
|
# Build the objects for the response list
|
||||||
|
values['in_response_to'] = self.deserialize_responses(
|
||||||
|
values.get('in_response_to', [])
|
||||||
|
)
|
||||||
|
|
||||||
|
return Statement(statement_text, **values)
|
||||||
|
|
||||||
|
def deserialize_responses(self, response_list):
|
||||||
|
"""
|
||||||
|
Takes the list of response items and returns
|
||||||
|
the list converted to Response objects.
|
||||||
|
"""
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
Response = self.get_model('response')
|
||||||
|
proxy_statement = Statement('')
|
||||||
|
|
||||||
|
for response in response_list:
|
||||||
|
text = response['text']
|
||||||
|
del response['text']
|
||||||
|
|
||||||
|
proxy_statement.add_response(
|
||||||
|
Response(text, **response)
|
||||||
|
)
|
||||||
|
|
||||||
|
return proxy_statement.in_response_to
|
||||||
|
|
||||||
|
def mongo_to_object(self, statement_data):
|
||||||
|
"""
|
||||||
|
Return Statement object when given data
|
||||||
|
returned from Mongo DB.
|
||||||
|
"""
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
statement_text = statement_data['text']
|
||||||
|
del statement_data['text']
|
||||||
|
|
||||||
|
statement_data['in_response_to'] = self.deserialize_responses(
|
||||||
|
statement_data.get('in_response_to', [])
|
||||||
|
)
|
||||||
|
|
||||||
|
return Statement(statement_text, **statement_data)
|
||||||
|
|
||||||
|
def filter(self, **kwargs):
|
||||||
|
"""
|
||||||
|
Returns a list of statements in the database
|
||||||
|
that match the parameters specified.
|
||||||
|
"""
|
||||||
|
import pymongo
|
||||||
|
|
||||||
|
query = self.base_query
|
||||||
|
|
||||||
|
order_by = kwargs.pop('order_by', None)
|
||||||
|
|
||||||
|
# Convert Response objects to data
|
||||||
|
if 'in_response_to' in kwargs:
|
||||||
|
serialized_responses = []
|
||||||
|
for response in kwargs['in_response_to']:
|
||||||
|
serialized_responses.append({'text': response})
|
||||||
|
|
||||||
|
query = query.statement_response_list_equals(serialized_responses)
|
||||||
|
del kwargs['in_response_to']
|
||||||
|
|
||||||
|
if 'in_response_to__contains' in kwargs:
|
||||||
|
query = query.statement_response_list_contains(
|
||||||
|
kwargs['in_response_to__contains']
|
||||||
|
)
|
||||||
|
del kwargs['in_response_to__contains']
|
||||||
|
|
||||||
|
query = query.raw(kwargs)
|
||||||
|
|
||||||
|
matches = self.statements.find(query.value())
|
||||||
|
|
||||||
|
if order_by:
|
||||||
|
|
||||||
|
direction = pymongo.ASCENDING
|
||||||
|
|
||||||
|
# Sort so that newer datetimes appear first
|
||||||
|
if order_by == 'created_at':
|
||||||
|
direction = pymongo.DESCENDING
|
||||||
|
|
||||||
|
matches = matches.sort(order_by, direction)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for match in list(matches):
|
||||||
|
results.append(self.mongo_to_object(match))
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def update(self, statement):
|
||||||
|
from pymongo import UpdateOne
|
||||||
|
from pymongo.errors import BulkWriteError
|
||||||
|
|
||||||
|
data = statement.serialize()
|
||||||
|
|
||||||
|
operations = []
|
||||||
|
|
||||||
|
update_operation = UpdateOne(
|
||||||
|
{'text': statement.text},
|
||||||
|
{'$set': data},
|
||||||
|
upsert=True
|
||||||
|
)
|
||||||
|
operations.append(update_operation)
|
||||||
|
|
||||||
|
# Make sure that an entry for each response is saved
|
||||||
|
for response_dict in data.get('in_response_to', []):
|
||||||
|
response_text = response_dict.get('text')
|
||||||
|
|
||||||
|
# $setOnInsert does nothing if the document is not created
|
||||||
|
update_operation = UpdateOne(
|
||||||
|
{'text': response_text},
|
||||||
|
{'$set': response_dict},
|
||||||
|
upsert=True
|
||||||
|
)
|
||||||
|
operations.append(update_operation)
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.statements.bulk_write(operations, ordered=False)
|
||||||
|
except BulkWriteError as bwe:
|
||||||
|
# Log the details of a bulk write error
|
||||||
|
self.logger.error(str(bwe.details))
|
||||||
|
|
||||||
|
return statement
|
||||||
|
|
||||||
|
def create_conversation(self):
|
||||||
|
"""
|
||||||
|
Create a new conversation.
|
||||||
|
"""
|
||||||
|
conversation_id = self.conversations.insert_one({}).inserted_id
|
||||||
|
return conversation_id
|
||||||
|
|
||||||
|
def get_latest_response(self, conversation_id):
|
||||||
|
"""
|
||||||
|
Returns the latest response in a conversation if it exists.
|
||||||
|
Returns None if a matching conversation cannot be found.
|
||||||
|
"""
|
||||||
|
from pymongo import DESCENDING
|
||||||
|
|
||||||
|
statements = list(self.statements.find({
|
||||||
|
'conversations.id': conversation_id
|
||||||
|
}).sort('conversations.created_at', DESCENDING))
|
||||||
|
|
||||||
|
if not statements:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return self.mongo_to_object(statements[-2])
|
||||||
|
|
||||||
|
def add_to_conversation(self, conversation_id, statement, response):
|
||||||
|
"""
|
||||||
|
Add the statement and response to the conversation.
|
||||||
|
"""
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
self.statements.update_one(
|
||||||
|
{
|
||||||
|
'text': statement.text
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'$push': {
|
||||||
|
'conversations': {
|
||||||
|
'id': conversation_id,
|
||||||
|
'created_at': datetime.utcnow()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
self.statements.update_one(
|
||||||
|
{
|
||||||
|
'text': response.text
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'$push': {
|
||||||
|
'conversations': {
|
||||||
|
'id': conversation_id,
|
||||||
|
# Force the response to be at least one millisecond after the input statement
|
||||||
|
'created_at': datetime.utcnow() + timedelta(milliseconds=1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_random(self):
|
||||||
|
"""
|
||||||
|
Returns a random statement from the database
|
||||||
|
"""
|
||||||
|
from random import randint
|
||||||
|
|
||||||
|
count = self.count()
|
||||||
|
|
||||||
|
if count < 1:
|
||||||
|
raise self.EmptyDatabaseException()
|
||||||
|
|
||||||
|
random_integer = randint(0, count - 1)
|
||||||
|
|
||||||
|
statements = self.statements.find().limit(1).skip(random_integer)
|
||||||
|
|
||||||
|
return self.mongo_to_object(list(statements)[0])
|
||||||
|
|
||||||
|
def remove(self, statement_text):
|
||||||
|
"""
|
||||||
|
Removes the statement that matches the input text.
|
||||||
|
Removes any responses from statements if the response text matches the
|
||||||
|
input text.
|
||||||
|
"""
|
||||||
|
for statement in self.filter(in_response_to__contains=statement_text):
|
||||||
|
statement.remove_response(statement_text)
|
||||||
|
self.update(statement)
|
||||||
|
|
||||||
|
self.statements.delete_one({'text': statement_text})
|
||||||
|
|
||||||
|
def get_response_statements(self):
|
||||||
|
"""
|
||||||
|
Return only statements that are in response to another statement.
|
||||||
|
A statement must exist which lists the closest matching statement in the
|
||||||
|
in_response_to field. Otherwise, the logic adapter may find a closest
|
||||||
|
matching statement that does not have a known response.
|
||||||
|
"""
|
||||||
|
response_query = self.statements.aggregate([{'$group': {'_id': '$in_response_to.text'}}])
|
||||||
|
|
||||||
|
responses = []
|
||||||
|
for r in response_query:
|
||||||
|
try:
|
||||||
|
responses.extend(r['_id'])
|
||||||
|
except TypeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
_statement_query = {
|
||||||
|
'text': {
|
||||||
|
'$in': responses
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_statement_query.update(self.base_query.value())
|
||||||
|
statement_query = self.statements.find(_statement_query)
|
||||||
|
statement_objects = []
|
||||||
|
for statement in list(statement_query):
|
||||||
|
statement_objects.append(self.mongo_to_object(statement))
|
||||||
|
return statement_objects
|
||||||
|
|
||||||
|
def drop(self):
|
||||||
|
"""
|
||||||
|
Remove the database.
|
||||||
|
"""
|
||||||
|
self.client.drop_database(self.database_name)
|
@ -0,0 +1,403 @@
|
|||||||
|
from chatterbot.storage import StorageAdapter
|
||||||
|
|
||||||
|
|
||||||
|
def get_response_table(response):
|
||||||
|
from chatterbot.ext.sqlalchemy_app.models import Response
|
||||||
|
return Response(text=response.text, occurrence=response.occurrence)
|
||||||
|
|
||||||
|
|
||||||
|
class SQLStorageAdapter(StorageAdapter):
|
||||||
|
"""
|
||||||
|
SQLStorageAdapter allows ChatterBot to store conversation
|
||||||
|
data semi-structured T-SQL database, virtually, any database
|
||||||
|
that SQL Alchemy supports.
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
Tables may change (and will), so, save your training data.
|
||||||
|
There is no data migration (yet).
|
||||||
|
Performance test not done yet.
|
||||||
|
Tests using other databases not finished.
|
||||||
|
|
||||||
|
All parameters are optional, by default a sqlite database is used.
|
||||||
|
|
||||||
|
It will check if tables are present, if they are not, it will attempt
|
||||||
|
to create the required tables.
|
||||||
|
|
||||||
|
:keyword database: Used for sqlite database. Ignored if database_uri is specified.
|
||||||
|
:type database: str
|
||||||
|
|
||||||
|
:keyword database_uri: eg: sqlite:///database_test.db", use database_uri or database,
|
||||||
|
database_uri can be specified to choose database driver (database parameter will be ignored).
|
||||||
|
:type database_uri: str
|
||||||
|
|
||||||
|
:keyword read_only: False by default, makes all operations read only, has priority over all DB operations
|
||||||
|
so, create, update, delete will NOT be executed
|
||||||
|
:type read_only: bool
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(SQLStorageAdapter, self).__init__(**kwargs)
|
||||||
|
|
||||||
|
from sqlalchemy import create_engine
|
||||||
|
from sqlalchemy.orm import sessionmaker
|
||||||
|
|
||||||
|
default_uri = "sqlite:///db.sqlite3"
|
||||||
|
|
||||||
|
database_name = self.kwargs.get("database", False)
|
||||||
|
|
||||||
|
# None results in a sqlite in-memory database as the default
|
||||||
|
if database_name is None:
|
||||||
|
default_uri = "sqlite://"
|
||||||
|
|
||||||
|
self.database_uri = self.kwargs.get(
|
||||||
|
"database_uri", default_uri
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create a sqlite file if a database name is provided
|
||||||
|
if database_name:
|
||||||
|
self.database_uri = "sqlite:///" + database_name
|
||||||
|
|
||||||
|
self.engine = create_engine(self.database_uri, convert_unicode=True)
|
||||||
|
|
||||||
|
from re import search
|
||||||
|
|
||||||
|
if search('^sqlite://', self.database_uri):
|
||||||
|
from sqlalchemy.engine import Engine
|
||||||
|
from sqlalchemy import event
|
||||||
|
|
||||||
|
@event.listens_for(Engine, "connect")
|
||||||
|
def set_sqlite_pragma(dbapi_connection, connection_record):
|
||||||
|
dbapi_connection.execute('PRAGMA journal_mode=WAL')
|
||||||
|
dbapi_connection.execute('PRAGMA synchronous=NORMAL')
|
||||||
|
|
||||||
|
self.read_only = self.kwargs.get(
|
||||||
|
"read_only", False
|
||||||
|
)
|
||||||
|
|
||||||
|
if not self.engine.dialect.has_table(self.engine, 'Statement'):
|
||||||
|
self.create()
|
||||||
|
|
||||||
|
self.Session = sessionmaker(bind=self.engine, expire_on_commit=True)
|
||||||
|
|
||||||
|
# ChatterBot's internal query builder is not yet supported for this adapter
|
||||||
|
self.adapter_supports_queries = False
|
||||||
|
|
||||||
|
def get_statement_model(self):
|
||||||
|
"""
|
||||||
|
Return the statement model.
|
||||||
|
"""
|
||||||
|
from chatterbot.ext.sqlalchemy_app.models import Statement
|
||||||
|
return Statement
|
||||||
|
|
||||||
|
def get_response_model(self):
|
||||||
|
"""
|
||||||
|
Return the response model.
|
||||||
|
"""
|
||||||
|
from chatterbot.ext.sqlalchemy_app.models import Response
|
||||||
|
return Response
|
||||||
|
|
||||||
|
def get_conversation_model(self):
|
||||||
|
"""
|
||||||
|
Return the conversation model.
|
||||||
|
"""
|
||||||
|
from chatterbot.ext.sqlalchemy_app.models import Conversation
|
||||||
|
return Conversation
|
||||||
|
|
||||||
|
def get_tag_model(self):
|
||||||
|
"""
|
||||||
|
Return the conversation model.
|
||||||
|
"""
|
||||||
|
from chatterbot.ext.sqlalchemy_app.models import Tag
|
||||||
|
return Tag
|
||||||
|
|
||||||
|
def count(self):
|
||||||
|
"""
|
||||||
|
Return the number of entries in the database.
|
||||||
|
"""
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
|
||||||
|
session = self.Session()
|
||||||
|
statement_count = session.query(Statement).count()
|
||||||
|
session.close()
|
||||||
|
return statement_count
|
||||||
|
|
||||||
|
def find(self, statement_text):
|
||||||
|
"""
|
||||||
|
Returns a statement if it exists otherwise None
|
||||||
|
"""
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
session = self.Session()
|
||||||
|
|
||||||
|
query = session.query(Statement).filter_by(text=statement_text)
|
||||||
|
record = query.first()
|
||||||
|
if record:
|
||||||
|
statement = record.get_statement()
|
||||||
|
session.close()
|
||||||
|
return statement
|
||||||
|
|
||||||
|
session.close()
|
||||||
|
return None
|
||||||
|
|
||||||
|
def remove(self, statement_text):
|
||||||
|
"""
|
||||||
|
Removes the statement that matches the input text.
|
||||||
|
Removes any responses from statements where the response text matches
|
||||||
|
the input text.
|
||||||
|
"""
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
session = self.Session()
|
||||||
|
|
||||||
|
query = session.query(Statement).filter_by(text=statement_text)
|
||||||
|
record = query.first()
|
||||||
|
|
||||||
|
session.delete(record)
|
||||||
|
|
||||||
|
self._session_finish(session)
|
||||||
|
|
||||||
|
def filter(self, **kwargs):
|
||||||
|
"""
|
||||||
|
Returns a list of objects from the database.
|
||||||
|
The kwargs parameter can contain any number
|
||||||
|
of attributes. Only objects which contain
|
||||||
|
all listed attributes and in which all values
|
||||||
|
match for all listed attributes will be returned.
|
||||||
|
"""
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
Response = self.get_model('response')
|
||||||
|
|
||||||
|
session = self.Session()
|
||||||
|
|
||||||
|
filter_parameters = kwargs.copy()
|
||||||
|
|
||||||
|
statements = []
|
||||||
|
_query = None
|
||||||
|
|
||||||
|
if len(filter_parameters) == 0:
|
||||||
|
_response_query = session.query(Statement)
|
||||||
|
statements.extend(_response_query.all())
|
||||||
|
else:
|
||||||
|
for i, fp in enumerate(filter_parameters):
|
||||||
|
_filter = filter_parameters[fp]
|
||||||
|
if fp in ['in_response_to', 'in_response_to__contains']:
|
||||||
|
_response_query = session.query(Statement)
|
||||||
|
if isinstance(_filter, list):
|
||||||
|
if len(_filter) == 0:
|
||||||
|
_query = _response_query.filter(
|
||||||
|
Statement.in_response_to == None # NOQA Here must use == instead of is
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
for f in _filter:
|
||||||
|
_query = _response_query.filter(
|
||||||
|
Statement.in_response_to.contains(get_response_table(f)))
|
||||||
|
else:
|
||||||
|
if fp == 'in_response_to__contains':
|
||||||
|
_query = _response_query.join(Response).filter(Response.text == _filter)
|
||||||
|
else:
|
||||||
|
_query = _response_query.filter(Statement.in_response_to == None) # NOQA
|
||||||
|
else:
|
||||||
|
if _query:
|
||||||
|
_query = _query.filter(Response.statement_text.like('%' + _filter + '%'))
|
||||||
|
else:
|
||||||
|
_response_query = session.query(Response)
|
||||||
|
_query = _response_query.filter(Response.statement_text.like('%' + _filter + '%'))
|
||||||
|
|
||||||
|
if _query is None:
|
||||||
|
return []
|
||||||
|
if len(filter_parameters) == i + 1:
|
||||||
|
statements.extend(_query.all())
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for statement in statements:
|
||||||
|
if isinstance(statement, Response):
|
||||||
|
if statement and statement.statement_table:
|
||||||
|
results.append(statement.statement_table.get_statement())
|
||||||
|
else:
|
||||||
|
if statement:
|
||||||
|
results.append(statement.get_statement())
|
||||||
|
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def update(self, statement):
|
||||||
|
"""
|
||||||
|
Modifies an entry in the database.
|
||||||
|
Creates an entry if one does not exist.
|
||||||
|
"""
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
Response = self.get_model('response')
|
||||||
|
Tag = self.get_model('tag')
|
||||||
|
|
||||||
|
if statement:
|
||||||
|
session = self.Session()
|
||||||
|
|
||||||
|
query = session.query(Statement).filter_by(text=statement.text)
|
||||||
|
record = query.first()
|
||||||
|
|
||||||
|
# Create a new statement entry if one does not already exist
|
||||||
|
if not record:
|
||||||
|
record = Statement(text=statement.text)
|
||||||
|
|
||||||
|
record.extra_data = dict(statement.extra_data)
|
||||||
|
|
||||||
|
for _tag in statement.tags:
|
||||||
|
tag = session.query(Tag).filter_by(name=_tag).first()
|
||||||
|
|
||||||
|
if not tag:
|
||||||
|
# Create the record
|
||||||
|
tag = Tag(name=_tag)
|
||||||
|
|
||||||
|
record.tags.append(tag)
|
||||||
|
|
||||||
|
# Get or create the response records as needed
|
||||||
|
for response in statement.in_response_to:
|
||||||
|
_response = session.query(Response).filter_by(
|
||||||
|
text=response.text,
|
||||||
|
statement_text=statement.text
|
||||||
|
).first()
|
||||||
|
|
||||||
|
if _response:
|
||||||
|
_response.occurrence += 1
|
||||||
|
else:
|
||||||
|
# Create the record
|
||||||
|
_response = Response(
|
||||||
|
text=response.text,
|
||||||
|
statement_text=statement.text,
|
||||||
|
occurrence=response.occurrence
|
||||||
|
)
|
||||||
|
|
||||||
|
record.in_response_to.append(_response)
|
||||||
|
|
||||||
|
session.add(record)
|
||||||
|
|
||||||
|
self._session_finish(session)
|
||||||
|
|
||||||
|
def create_conversation(self):
|
||||||
|
"""
|
||||||
|
Create a new conversation.
|
||||||
|
"""
|
||||||
|
Conversation = self.get_model('conversation')
|
||||||
|
|
||||||
|
session = self.Session()
|
||||||
|
conversation = Conversation()
|
||||||
|
|
||||||
|
session.add(conversation)
|
||||||
|
session.flush()
|
||||||
|
|
||||||
|
session.refresh(conversation)
|
||||||
|
conversation_id = conversation.id
|
||||||
|
|
||||||
|
session.commit()
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
return conversation_id
|
||||||
|
|
||||||
|
def add_to_conversation(self, conversation_id, statement, response):
|
||||||
|
"""
|
||||||
|
Add the statement and response to the conversation.
|
||||||
|
"""
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
Conversation = self.get_model('conversation')
|
||||||
|
|
||||||
|
session = self.Session()
|
||||||
|
conversation = session.query(Conversation).get(conversation_id)
|
||||||
|
|
||||||
|
statement_query = session.query(Statement).filter_by(
|
||||||
|
text=statement.text
|
||||||
|
).first()
|
||||||
|
response_query = session.query(Statement).filter_by(
|
||||||
|
text=response.text
|
||||||
|
).first()
|
||||||
|
|
||||||
|
# Make sure the statements exist
|
||||||
|
if not statement_query:
|
||||||
|
self.update(statement)
|
||||||
|
statement_query = session.query(Statement).filter_by(
|
||||||
|
text=statement.text
|
||||||
|
).first()
|
||||||
|
|
||||||
|
if not response_query:
|
||||||
|
self.update(response)
|
||||||
|
response_query = session.query(Statement).filter_by(
|
||||||
|
text=response.text
|
||||||
|
).first()
|
||||||
|
|
||||||
|
conversation.statements.append(statement_query)
|
||||||
|
conversation.statements.append(response_query)
|
||||||
|
|
||||||
|
session.add(conversation)
|
||||||
|
self._session_finish(session)
|
||||||
|
|
||||||
|
def get_latest_response(self, conversation_id):
|
||||||
|
"""
|
||||||
|
Returns the latest response in a conversation if it exists.
|
||||||
|
Returns None if a matching conversation cannot be found.
|
||||||
|
"""
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
|
||||||
|
session = self.Session()
|
||||||
|
statement = None
|
||||||
|
|
||||||
|
statement_query = session.query(Statement).filter(
|
||||||
|
Statement.conversations.any(id=conversation_id)
|
||||||
|
).order_by(Statement.id)
|
||||||
|
|
||||||
|
if statement_query.count() >= 2:
|
||||||
|
statement = statement_query[-2].get_statement()
|
||||||
|
|
||||||
|
# Handle the case of the first statement in the list
|
||||||
|
elif statement_query.count() == 1:
|
||||||
|
statement = statement_query[0].get_statement()
|
||||||
|
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
return statement
|
||||||
|
|
||||||
|
def get_random(self):
|
||||||
|
"""
|
||||||
|
Returns a random statement from the database
|
||||||
|
"""
|
||||||
|
import random
|
||||||
|
|
||||||
|
Statement = self.get_model('statement')
|
||||||
|
|
||||||
|
session = self.Session()
|
||||||
|
count = self.count()
|
||||||
|
if count < 1:
|
||||||
|
raise self.EmptyDatabaseException()
|
||||||
|
|
||||||
|
rand = random.randrange(0, count)
|
||||||
|
stmt = session.query(Statement)[rand]
|
||||||
|
|
||||||
|
statement = stmt.get_statement()
|
||||||
|
|
||||||
|
session.close()
|
||||||
|
return statement
|
||||||
|
|
||||||
|
def drop(self):
|
||||||
|
"""
|
||||||
|
Drop the database attached to a given adapter.
|
||||||
|
"""
|
||||||
|
from chatterbot.ext.sqlalchemy_app.models import Base
|
||||||
|
Base.metadata.drop_all(self.engine)
|
||||||
|
|
||||||
|
def create(self):
|
||||||
|
"""
|
||||||
|
Populate the database with the tables.
|
||||||
|
"""
|
||||||
|
from chatterbot.ext.sqlalchemy_app.models import Base
|
||||||
|
Base.metadata.create_all(self.engine)
|
||||||
|
|
||||||
|
def _session_finish(self, session, statement_text=None):
|
||||||
|
from sqlalchemy.exc import InvalidRequestError
|
||||||
|
try:
|
||||||
|
if not self.read_only:
|
||||||
|
session.commit()
|
||||||
|
else:
|
||||||
|
session.rollback()
|
||||||
|
except InvalidRequestError:
|
||||||
|
# Log the statement text and the exception
|
||||||
|
self.logger.exception(statement_text)
|
||||||
|
finally:
|
||||||
|
session.close()
|
@ -0,0 +1,171 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
class StorageAdapter(object):
|
||||||
|
"""
|
||||||
|
This is an abstract class that represents the interface
|
||||||
|
that all storage adapters should implement.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, base_query=None, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Initialize common attributes shared by all storage adapters.
|
||||||
|
"""
|
||||||
|
self.kwargs = kwargs
|
||||||
|
self.logger = kwargs.get('logger', logging.getLogger(__name__))
|
||||||
|
self.adapter_supports_queries = True
|
||||||
|
self.base_query = None
|
||||||
|
|
||||||
|
def get_model(self, model_name):
|
||||||
|
"""
|
||||||
|
Return the model class for a given model name.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# The string must be lowercase
|
||||||
|
model_name = model_name.lower()
|
||||||
|
|
||||||
|
kwarg_model_key = '%s_model' % (model_name, )
|
||||||
|
|
||||||
|
if kwarg_model_key in self.kwargs:
|
||||||
|
return self.kwargs.get(kwarg_model_key)
|
||||||
|
|
||||||
|
get_model_method = getattr(self, 'get_%s_model' % (model_name, ))
|
||||||
|
|
||||||
|
return get_model_method()
|
||||||
|
|
||||||
|
def generate_base_query(self, chatterbot, session_id):
|
||||||
|
"""
|
||||||
|
Create a base query for the storage adapter.
|
||||||
|
"""
|
||||||
|
if self.adapter_supports_queries:
|
||||||
|
for filter_instance in chatterbot.filters:
|
||||||
|
self.base_query = filter_instance.filter_selection(chatterbot, session_id)
|
||||||
|
|
||||||
|
def count(self):
|
||||||
|
"""
|
||||||
|
Return the number of entries in the database.
|
||||||
|
"""
|
||||||
|
raise self.AdapterMethodNotImplementedError(
|
||||||
|
'The `count` method is not implemented by this adapter.'
|
||||||
|
)
|
||||||
|
|
||||||
|
def find(self, statement_text):
|
||||||
|
"""
|
||||||
|
Returns a object from the database if it exists
|
||||||
|
"""
|
||||||
|
raise self.AdapterMethodNotImplementedError(
|
||||||
|
'The `find` method is not implemented by this adapter.'
|
||||||
|
)
|
||||||
|
|
||||||
|
def remove(self, statement_text):
|
||||||
|
"""
|
||||||
|
Removes the statement that matches the input text.
|
||||||
|
Removes any responses from statements where the response text matches
|
||||||
|
the input text.
|
||||||
|
"""
|
||||||
|
raise self.AdapterMethodNotImplementedError(
|
||||||
|
'The `remove` method is not implemented by this adapter.'
|
||||||
|
)
|
||||||
|
|
||||||
|
def filter(self, **kwargs):
|
||||||
|
"""
|
||||||
|
Returns a list of objects from the database.
|
||||||
|
The kwargs parameter can contain any number
|
||||||
|
of attributes. Only objects which contain
|
||||||
|
all listed attributes and in which all values
|
||||||
|
match for all listed attributes will be returned.
|
||||||
|
"""
|
||||||
|
raise self.AdapterMethodNotImplementedError(
|
||||||
|
'The `filter` method is not implemented by this adapter.'
|
||||||
|
)
|
||||||
|
|
||||||
|
def update(self, statement):
|
||||||
|
"""
|
||||||
|
Modifies an entry in the database.
|
||||||
|
Creates an entry if one does not exist.
|
||||||
|
"""
|
||||||
|
raise self.AdapterMethodNotImplementedError(
|
||||||
|
'The `update` method is not implemented by this adapter.'
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_latest_response(self, conversation_id):
|
||||||
|
"""
|
||||||
|
Returns the latest response in a conversation if it exists.
|
||||||
|
Returns None if a matching conversation cannot be found.
|
||||||
|
"""
|
||||||
|
raise self.AdapterMethodNotImplementedError(
|
||||||
|
'The `get_latest_response` method is not implemented by this adapter.'
|
||||||
|
)
|
||||||
|
|
||||||
|
def create_conversation(self):
|
||||||
|
"""
|
||||||
|
Creates a new conversation.
|
||||||
|
"""
|
||||||
|
raise self.AdapterMethodNotImplementedError(
|
||||||
|
'The `create_conversation` method is not implemented by this adapter.'
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_to_conversation(self, conversation_id, statement, response):
|
||||||
|
"""
|
||||||
|
Add the statement and response to the conversation.
|
||||||
|
"""
|
||||||
|
raise self.AdapterMethodNotImplementedError(
|
||||||
|
'The `add_to_conversation` method is not implemented by this adapter.'
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_random(self):
|
||||||
|
"""
|
||||||
|
Returns a random statement from the database.
|
||||||
|
"""
|
||||||
|
raise self.AdapterMethodNotImplementedError(
|
||||||
|
'The `get_random` method is not implemented by this adapter.'
|
||||||
|
)
|
||||||
|
|
||||||
|
def drop(self):
|
||||||
|
"""
|
||||||
|
Drop the database attached to a given adapter.
|
||||||
|
"""
|
||||||
|
raise self.AdapterMethodNotImplementedError(
|
||||||
|
'The `drop` method is not implemented by this adapter.'
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_response_statements(self):
|
||||||
|
"""
|
||||||
|
Return only statements that are in response to another statement.
|
||||||
|
A statement must exist which lists the closest matching statement in the
|
||||||
|
in_response_to field. Otherwise, the logic adapter may find a closest
|
||||||
|
matching statement that does not have a known response.
|
||||||
|
|
||||||
|
This method may be overridden by a child class to provide more a
|
||||||
|
efficient method to get these results.
|
||||||
|
"""
|
||||||
|
statement_list = self.filter()
|
||||||
|
|
||||||
|
responses = set()
|
||||||
|
to_remove = list()
|
||||||
|
for statement in statement_list:
|
||||||
|
for response in statement.in_response_to:
|
||||||
|
responses.add(response.text)
|
||||||
|
for statement in statement_list:
|
||||||
|
if statement.text not in responses:
|
||||||
|
to_remove.append(statement)
|
||||||
|
|
||||||
|
for statement in to_remove:
|
||||||
|
statement_list.remove(statement)
|
||||||
|
|
||||||
|
return statement_list
|
||||||
|
|
||||||
|
class EmptyDatabaseException(Exception):
|
||||||
|
|
||||||
|
def __init__(self, value='The database currently contains no entries. At least one entry is expected. You may need to train your chat bot to populate your database.'):
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return repr(self.value)
|
||||||
|
|
||||||
|
class AdapterMethodNotImplementedError(NotImplementedError):
|
||||||
|
"""
|
||||||
|
An exception to be raised when a storage adapter method has not been implemented.
|
||||||
|
Typically this indicates that the method should be implement in a subclass.
|
||||||
|
"""
|
||||||
|
pass
|
@ -0,0 +1,426 @@
|
|||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from .conversation import Statement, Response
|
||||||
|
from . import utils
|
||||||
|
|
||||||
|
|
||||||
|
class Trainer(object):
|
||||||
|
"""
|
||||||
|
Base class for all other trainer classes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, storage, **kwargs):
|
||||||
|
self.chatbot = kwargs.get('chatbot')
|
||||||
|
self.storage = storage
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
|
self.show_training_progress = kwargs.get('show_training_progress', True)
|
||||||
|
|
||||||
|
def get_preprocessed_statement(self, input_statement):
|
||||||
|
"""
|
||||||
|
Preprocess the input statement.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# The chatbot is optional to prevent backwards-incompatible changes
|
||||||
|
if not self.chatbot:
|
||||||
|
return input_statement
|
||||||
|
|
||||||
|
for preprocessor in self.chatbot.preprocessors:
|
||||||
|
input_statement = preprocessor(self, input_statement)
|
||||||
|
|
||||||
|
return input_statement
|
||||||
|
|
||||||
|
def train(self, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
This method must be overridden by a child class.
|
||||||
|
"""
|
||||||
|
raise self.TrainerInitializationException()
|
||||||
|
|
||||||
|
def get_or_create(self, statement_text):
|
||||||
|
"""
|
||||||
|
Return a statement if it exists.
|
||||||
|
Create and return the statement if it does not exist.
|
||||||
|
"""
|
||||||
|
temp_statement = self.get_preprocessed_statement(
|
||||||
|
Statement(text=statement_text)
|
||||||
|
)
|
||||||
|
|
||||||
|
statement = self.storage.find(temp_statement.text)
|
||||||
|
|
||||||
|
if not statement:
|
||||||
|
statement = Statement(temp_statement.text)
|
||||||
|
|
||||||
|
return statement
|
||||||
|
|
||||||
|
class TrainerInitializationException(Exception):
|
||||||
|
"""
|
||||||
|
Exception raised when a base class has not overridden
|
||||||
|
the required methods on the Trainer base class.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, value=None):
|
||||||
|
default = (
|
||||||
|
'A training class must be specified before calling train(). ' +
|
||||||
|
'See http://chatterbot.readthedocs.io/en/stable/training.html'
|
||||||
|
)
|
||||||
|
self.value = value or default
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return repr(self.value)
|
||||||
|
|
||||||
|
def _generate_export_data(self):
|
||||||
|
result = []
|
||||||
|
for statement in self.storage.filter():
|
||||||
|
for response in statement.in_response_to:
|
||||||
|
result.append([response.text, statement.text])
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def export_for_training(self, file_path='./export.json'):
|
||||||
|
"""
|
||||||
|
Create a file from the database that can be used to
|
||||||
|
train other chat bots.
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
export = {'conversations': self._generate_export_data()}
|
||||||
|
with open(file_path, 'w+') as jsonfile:
|
||||||
|
json.dump(export, jsonfile, ensure_ascii=False)
|
||||||
|
|
||||||
|
|
||||||
|
class ListTrainer(Trainer):
|
||||||
|
"""
|
||||||
|
Allows a chat bot to be trained using a list of strings
|
||||||
|
where the list represents a conversation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def train(self, conversation):
|
||||||
|
"""
|
||||||
|
Train the chat bot based on the provided list of
|
||||||
|
statements that represents a single conversation.
|
||||||
|
"""
|
||||||
|
previous_statement_text = None
|
||||||
|
|
||||||
|
for conversation_count, text in enumerate(conversation):
|
||||||
|
if self.show_training_progress:
|
||||||
|
utils.print_progress_bar(
|
||||||
|
'List Trainer',
|
||||||
|
conversation_count + 1, len(conversation)
|
||||||
|
)
|
||||||
|
|
||||||
|
statement = self.get_or_create(text)
|
||||||
|
|
||||||
|
if previous_statement_text:
|
||||||
|
statement.add_response(
|
||||||
|
Response(previous_statement_text)
|
||||||
|
)
|
||||||
|
|
||||||
|
previous_statement_text = statement.text
|
||||||
|
self.storage.update(statement)
|
||||||
|
|
||||||
|
|
||||||
|
class ChatterBotCorpusTrainer(Trainer):
|
||||||
|
"""
|
||||||
|
Allows the chat bot to be trained using data from the
|
||||||
|
ChatterBot dialog corpus.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, storage, **kwargs):
|
||||||
|
super(ChatterBotCorpusTrainer, self).__init__(storage, **kwargs)
|
||||||
|
from .corpus import Corpus
|
||||||
|
|
||||||
|
self.corpus = Corpus()
|
||||||
|
|
||||||
|
def train(self, *corpus_paths):
|
||||||
|
|
||||||
|
# Allow a list of corpora to be passed instead of arguments
|
||||||
|
if len(corpus_paths) == 1:
|
||||||
|
if isinstance(corpus_paths[0], list):
|
||||||
|
corpus_paths = corpus_paths[0]
|
||||||
|
|
||||||
|
# Train the chat bot with each statement and response pair
|
||||||
|
for corpus_path in corpus_paths:
|
||||||
|
|
||||||
|
corpora = self.corpus.load_corpus(corpus_path)
|
||||||
|
|
||||||
|
corpus_files = self.corpus.list_corpus_files(corpus_path)
|
||||||
|
for corpus_count, corpus in enumerate(corpora):
|
||||||
|
for conversation_count, conversation in enumerate(corpus):
|
||||||
|
|
||||||
|
if self.show_training_progress:
|
||||||
|
utils.print_progress_bar(
|
||||||
|
str(os.path.basename(corpus_files[corpus_count])) + ' Training',
|
||||||
|
conversation_count + 1,
|
||||||
|
len(corpus)
|
||||||
|
)
|
||||||
|
|
||||||
|
previous_statement_text = None
|
||||||
|
|
||||||
|
for text in conversation:
|
||||||
|
statement = self.get_or_create(text)
|
||||||
|
statement.add_tags(corpus.categories)
|
||||||
|
|
||||||
|
if previous_statement_text:
|
||||||
|
statement.add_response(
|
||||||
|
Response(previous_statement_text)
|
||||||
|
)
|
||||||
|
|
||||||
|
previous_statement_text = statement.text
|
||||||
|
self.storage.update(statement)
|
||||||
|
|
||||||
|
|
||||||
|
class TwitterTrainer(Trainer):
|
||||||
|
"""
|
||||||
|
Allows the chat bot to be trained using data
|
||||||
|
gathered from Twitter.
|
||||||
|
|
||||||
|
:param random_seed_word: The seed word to be used to get random tweets from the Twitter API.
|
||||||
|
This parameter is optional. By default it is the word 'random'.
|
||||||
|
:param twitter_lang: Language for results as ISO 639-1 code.
|
||||||
|
This parameter is optional. Default is None (all languages).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, storage, **kwargs):
|
||||||
|
super(TwitterTrainer, self).__init__(storage, **kwargs)
|
||||||
|
from twitter import Api as TwitterApi
|
||||||
|
|
||||||
|
# The word to be used as the first search term when searching for tweets
|
||||||
|
self.random_seed_word = kwargs.get('random_seed_word', 'random')
|
||||||
|
self.lang = kwargs.get('twitter_lang')
|
||||||
|
|
||||||
|
self.api = TwitterApi(
|
||||||
|
consumer_key=kwargs.get('twitter_consumer_key'),
|
||||||
|
consumer_secret=kwargs.get('twitter_consumer_secret'),
|
||||||
|
access_token_key=kwargs.get('twitter_access_token_key'),
|
||||||
|
access_token_secret=kwargs.get('twitter_access_token_secret')
|
||||||
|
)
|
||||||
|
|
||||||
|
def random_word(self, base_word, lang=None):
|
||||||
|
"""
|
||||||
|
Generate a random word using the Twitter API.
|
||||||
|
|
||||||
|
Search twitter for recent tweets containing the term 'random'.
|
||||||
|
Then randomly select one word from those tweets and do another
|
||||||
|
search with that word. Return a randomly selected word from the
|
||||||
|
new set of results.
|
||||||
|
"""
|
||||||
|
import random
|
||||||
|
random_tweets = self.api.GetSearch(term=base_word, count=5, lang=lang)
|
||||||
|
random_words = self.get_words_from_tweets(random_tweets)
|
||||||
|
random_word = random.choice(list(random_words))
|
||||||
|
tweets = self.api.GetSearch(term=random_word, count=5, lang=lang)
|
||||||
|
words = self.get_words_from_tweets(tweets)
|
||||||
|
word = random.choice(list(words))
|
||||||
|
return word
|
||||||
|
|
||||||
|
def get_words_from_tweets(self, tweets):
|
||||||
|
"""
|
||||||
|
Given a list of tweets, return the set of
|
||||||
|
words from the tweets.
|
||||||
|
"""
|
||||||
|
words = set()
|
||||||
|
|
||||||
|
for tweet in tweets:
|
||||||
|
tweet_words = tweet.text.split()
|
||||||
|
|
||||||
|
for word in tweet_words:
|
||||||
|
# If the word contains only letters with a length from 4 to 9
|
||||||
|
if word.isalpha() and len(word) > 3 and len(word) <= 9:
|
||||||
|
words.add(word)
|
||||||
|
|
||||||
|
return words
|
||||||
|
|
||||||
|
def get_statements(self):
|
||||||
|
"""
|
||||||
|
Returns list of random statements from the API.
|
||||||
|
"""
|
||||||
|
from twitter import TwitterError
|
||||||
|
statements = []
|
||||||
|
|
||||||
|
# Generate a random word
|
||||||
|
random_word = self.random_word(self.random_seed_word, self.lang)
|
||||||
|
|
||||||
|
self.logger.info(u'Requesting 50 random tweets containing the word {}'.format(random_word))
|
||||||
|
tweets = self.api.GetSearch(term=random_word, count=50, lang=self.lang)
|
||||||
|
for tweet in tweets:
|
||||||
|
statement = Statement(tweet.text)
|
||||||
|
|
||||||
|
if tweet.in_reply_to_status_id:
|
||||||
|
try:
|
||||||
|
status = self.api.GetStatus(tweet.in_reply_to_status_id)
|
||||||
|
statement.add_response(Response(status.text))
|
||||||
|
statements.append(statement)
|
||||||
|
except TwitterError as error:
|
||||||
|
self.logger.warning(str(error))
|
||||||
|
|
||||||
|
self.logger.info('Adding {} tweets with responses'.format(len(statements)))
|
||||||
|
|
||||||
|
return statements
|
||||||
|
|
||||||
|
def train(self):
|
||||||
|
for _ in range(0, 10):
|
||||||
|
statements = self.get_statements()
|
||||||
|
for statement in statements:
|
||||||
|
self.storage.update(statement)
|
||||||
|
|
||||||
|
|
||||||
|
class UbuntuCorpusTrainer(Trainer):
|
||||||
|
"""
|
||||||
|
Allow chatbots to be trained with the data from
|
||||||
|
the Ubuntu Dialog Corpus.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, storage, **kwargs):
|
||||||
|
super(UbuntuCorpusTrainer, self).__init__(storage, **kwargs)
|
||||||
|
|
||||||
|
self.data_download_url = kwargs.get(
|
||||||
|
'ubuntu_corpus_data_download_url',
|
||||||
|
'http://cs.mcgill.ca/~jpineau/datasets/ubuntu-corpus-1.0/ubuntu_dialogs.tgz'
|
||||||
|
)
|
||||||
|
|
||||||
|
self.data_directory = kwargs.get(
|
||||||
|
'ubuntu_corpus_data_directory',
|
||||||
|
'./data/'
|
||||||
|
)
|
||||||
|
|
||||||
|
self.extracted_data_directory = os.path.join(
|
||||||
|
self.data_directory, 'ubuntu_dialogs'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create the data directory if it does not already exist
|
||||||
|
if not os.path.exists(self.data_directory):
|
||||||
|
os.makedirs(self.data_directory)
|
||||||
|
|
||||||
|
def is_downloaded(self, file_path):
|
||||||
|
"""
|
||||||
|
Check if the data file is already downloaded.
|
||||||
|
"""
|
||||||
|
if os.path.exists(file_path):
|
||||||
|
self.logger.info('File is already downloaded')
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def is_extracted(self, file_path):
|
||||||
|
"""
|
||||||
|
Check if the data file is already extracted.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if os.path.isdir(file_path):
|
||||||
|
self.logger.info('File is already extracted')
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def download(self, url, show_status=True):
|
||||||
|
"""
|
||||||
|
Download a file from the given url.
|
||||||
|
Show a progress indicator for the download status.
|
||||||
|
Based on: http://stackoverflow.com/a/15645088/1547223
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
|
||||||
|
file_name = url.split('/')[-1]
|
||||||
|
file_path = os.path.join(self.data_directory, file_name)
|
||||||
|
|
||||||
|
# Do not download the data if it already exists
|
||||||
|
if self.is_downloaded(file_path):
|
||||||
|
return file_path
|
||||||
|
|
||||||
|
with open(file_path, 'wb') as open_file:
|
||||||
|
print('Downloading %s' % url)
|
||||||
|
response = requests.get(url, stream=True)
|
||||||
|
total_length = response.headers.get('content-length')
|
||||||
|
|
||||||
|
if total_length is None:
|
||||||
|
# No content length header
|
||||||
|
open_file.write(response.content)
|
||||||
|
else:
|
||||||
|
download = 0
|
||||||
|
total_length = int(total_length)
|
||||||
|
for data in response.iter_content(chunk_size=4096):
|
||||||
|
download += len(data)
|
||||||
|
open_file.write(data)
|
||||||
|
if show_status:
|
||||||
|
done = int(50 * download / total_length)
|
||||||
|
sys.stdout.write('\r[%s%s]' % ('=' * done, ' ' * (50 - done)))
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
# Add a new line after the download bar
|
||||||
|
sys.stdout.write('\n')
|
||||||
|
|
||||||
|
print('Download location: %s' % file_path)
|
||||||
|
return file_path
|
||||||
|
|
||||||
|
def extract(self, file_path):
|
||||||
|
"""
|
||||||
|
Extract a tar file at the specified file path.
|
||||||
|
"""
|
||||||
|
import tarfile
|
||||||
|
|
||||||
|
print('Extracting {}'.format(file_path))
|
||||||
|
|
||||||
|
if not os.path.exists(self.extracted_data_directory):
|
||||||
|
os.makedirs(self.extracted_data_directory)
|
||||||
|
|
||||||
|
def track_progress(members):
|
||||||
|
sys.stdout.write('.')
|
||||||
|
for member in members:
|
||||||
|
# This will be the current file being extracted
|
||||||
|
yield member
|
||||||
|
|
||||||
|
with tarfile.open(file_path) as tar:
|
||||||
|
tar.extractall(path=self.extracted_data_directory, members=track_progress(tar))
|
||||||
|
|
||||||
|
self.logger.info('File extracted to {}'.format(self.extracted_data_directory))
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def train(self):
|
||||||
|
import glob
|
||||||
|
import csv
|
||||||
|
|
||||||
|
# Download and extract the Ubuntu dialog corpus if needed
|
||||||
|
corpus_download_path = self.download(self.data_download_url)
|
||||||
|
|
||||||
|
# Extract if the directory doesn not already exists
|
||||||
|
if not self.is_extracted(self.extracted_data_directory):
|
||||||
|
self.extract(corpus_download_path)
|
||||||
|
|
||||||
|
extracted_corpus_path = os.path.join(
|
||||||
|
self.extracted_data_directory,
|
||||||
|
'**', '**', '*.tsv'
|
||||||
|
)
|
||||||
|
|
||||||
|
file_kwargs = {}
|
||||||
|
|
||||||
|
if sys.version_info[0] > 2:
|
||||||
|
# Specify the encoding in Python versions 3 and up
|
||||||
|
file_kwargs['encoding'] = 'utf-8'
|
||||||
|
# WARNING: This might fail to read a unicode corpus file in Python 2.x
|
||||||
|
|
||||||
|
for file in glob.iglob(extracted_corpus_path):
|
||||||
|
self.logger.info('Training from: {}'.format(file))
|
||||||
|
|
||||||
|
with open(file, 'r', **file_kwargs) as tsv:
|
||||||
|
reader = csv.reader(tsv, delimiter='\t')
|
||||||
|
|
||||||
|
previous_statement_text = None
|
||||||
|
|
||||||
|
for row in reader:
|
||||||
|
if len(row) > 0:
|
||||||
|
text = row[3]
|
||||||
|
statement = self.get_or_create(text)
|
||||||
|
print(text, len(row))
|
||||||
|
|
||||||
|
statement.add_extra_data('datetime', row[0])
|
||||||
|
statement.add_extra_data('speaker', row[1])
|
||||||
|
|
||||||
|
if row[2].strip():
|
||||||
|
statement.add_extra_data('addressing_speaker', row[2])
|
||||||
|
|
||||||
|
if previous_statement_text:
|
||||||
|
statement.add_response(
|
||||||
|
Response(previous_statement_text)
|
||||||
|
)
|
||||||
|
|
||||||
|
previous_statement_text = statement.text
|
||||||
|
self.storage.update(statement)
|
@ -0,0 +1,200 @@
|
|||||||
|
"""
|
||||||
|
ChatterBot utility functions
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def import_module(dotted_path):
|
||||||
|
"""
|
||||||
|
Imports the specified module based on the
|
||||||
|
dot notated import path for the module.
|
||||||
|
"""
|
||||||
|
import importlib
|
||||||
|
|
||||||
|
module_parts = dotted_path.split('.')
|
||||||
|
module_path = '.'.join(module_parts[:-1])
|
||||||
|
module = importlib.import_module(module_path)
|
||||||
|
|
||||||
|
return getattr(module, module_parts[-1])
|
||||||
|
|
||||||
|
|
||||||
|
def initialize_class(data, **kwargs):
|
||||||
|
"""
|
||||||
|
:param data: A string or dictionary containing a import_path attribute.
|
||||||
|
"""
|
||||||
|
if isinstance(data, dict):
|
||||||
|
import_path = data.get('import_path')
|
||||||
|
data.update(kwargs)
|
||||||
|
Class = import_module(import_path)
|
||||||
|
|
||||||
|
return Class(**data)
|
||||||
|
else:
|
||||||
|
Class = import_module(data)
|
||||||
|
|
||||||
|
return Class(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_adapter_class(validate_class, adapter_class):
|
||||||
|
"""
|
||||||
|
Raises an exception if validate_class is not a
|
||||||
|
subclass of adapter_class.
|
||||||
|
|
||||||
|
:param validate_class: The class to be validated.
|
||||||
|
:type validate_class: class
|
||||||
|
|
||||||
|
:param adapter_class: The class type to check against.
|
||||||
|
:type adapter_class: class
|
||||||
|
|
||||||
|
:raises: Adapter.InvalidAdapterTypeException
|
||||||
|
"""
|
||||||
|
from .adapters import Adapter
|
||||||
|
|
||||||
|
# If a dictionary was passed in, check if it has an import_path attribute
|
||||||
|
if isinstance(validate_class, dict):
|
||||||
|
|
||||||
|
if 'import_path' not in validate_class:
|
||||||
|
raise Adapter.InvalidAdapterTypeException(
|
||||||
|
'The dictionary {} must contain a value for "import_path"'.format(
|
||||||
|
str(validate_class)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set the class to the import path for the next check
|
||||||
|
validate_class = validate_class.get('import_path')
|
||||||
|
|
||||||
|
if not issubclass(import_module(validate_class), adapter_class):
|
||||||
|
raise Adapter.InvalidAdapterTypeException(
|
||||||
|
'{} must be a subclass of {}'.format(
|
||||||
|
validate_class,
|
||||||
|
adapter_class.__name__
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def input_function():
|
||||||
|
"""
|
||||||
|
Normalizes reading input between python 2 and 3.
|
||||||
|
The function 'raw_input' becomes 'input' in Python 3.
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
|
||||||
|
if sys.version_info[0] < 3:
|
||||||
|
user_input = str(raw_input()) # NOQA
|
||||||
|
|
||||||
|
# Avoid problems using format strings with unicode characters
|
||||||
|
if user_input:
|
||||||
|
user_input = user_input.decode('utf-8')
|
||||||
|
|
||||||
|
else:
|
||||||
|
user_input = input() # NOQA
|
||||||
|
|
||||||
|
return user_input
|
||||||
|
|
||||||
|
|
||||||
|
def nltk_download_corpus(resource_path):
|
||||||
|
"""
|
||||||
|
Download the specified NLTK corpus file
|
||||||
|
unless it has already been downloaded.
|
||||||
|
|
||||||
|
Returns True if the corpus needed to be downloaded.
|
||||||
|
"""
|
||||||
|
from nltk.data import find
|
||||||
|
from nltk import download
|
||||||
|
from os.path import split, sep
|
||||||
|
from zipfile import BadZipfile
|
||||||
|
|
||||||
|
# Download the NLTK data only if it is not already downloaded
|
||||||
|
_, corpus_name = split(resource_path)
|
||||||
|
|
||||||
|
# From http://www.nltk.org/api/nltk.html
|
||||||
|
# When using find() to locate a directory contained in a zipfile,
|
||||||
|
# the resource name must end with the forward slash character.
|
||||||
|
# Otherwise, find() will not locate the directory.
|
||||||
|
#
|
||||||
|
# Helps when resource_path=='sentiment/vader_lexicon''
|
||||||
|
if not resource_path.endswith(sep):
|
||||||
|
resource_path = resource_path + sep
|
||||||
|
|
||||||
|
downloaded = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
find(resource_path)
|
||||||
|
except LookupError:
|
||||||
|
download(corpus_name)
|
||||||
|
downloaded = True
|
||||||
|
except BadZipfile:
|
||||||
|
raise BadZipfile(
|
||||||
|
'The NLTK corpus file being opened is not a zipfile, '
|
||||||
|
'or it has been corrupted and needs to be manually deleted.'
|
||||||
|
)
|
||||||
|
|
||||||
|
return downloaded
|
||||||
|
|
||||||
|
|
||||||
|
def remove_stopwords(tokens, language):
|
||||||
|
"""
|
||||||
|
Takes a language (i.e. 'english'), and a set of word tokens.
|
||||||
|
Returns the tokenized text with any stopwords removed.
|
||||||
|
Stop words are words like "is, the, a, ..."
|
||||||
|
|
||||||
|
Be sure to download the required NLTK corpus before calling this function:
|
||||||
|
- from chatterbot.utils import nltk_download_corpus
|
||||||
|
- nltk_download_corpus('corpora/stopwords')
|
||||||
|
"""
|
||||||
|
from nltk.corpus import stopwords
|
||||||
|
|
||||||
|
# Get the stopwords for the specified language
|
||||||
|
stop_words = stopwords.words(language)
|
||||||
|
|
||||||
|
# Remove the stop words from the set of word tokens
|
||||||
|
tokens = set(tokens) - set(stop_words)
|
||||||
|
|
||||||
|
return tokens
|
||||||
|
|
||||||
|
|
||||||
|
def get_response_time(chatbot):
|
||||||
|
"""
|
||||||
|
Returns the amount of time taken for a given
|
||||||
|
chat bot to return a response.
|
||||||
|
|
||||||
|
:param chatbot: A chat bot instance.
|
||||||
|
:type chatbot: ChatBot
|
||||||
|
|
||||||
|
:returns: The response time in seconds.
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
import time
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
chatbot.get_response('Hello')
|
||||||
|
|
||||||
|
return time.time() - start_time
|
||||||
|
|
||||||
|
|
||||||
|
def print_progress_bar(description, iteration_counter, total_items, progress_bar_length=20):
|
||||||
|
"""
|
||||||
|
Print progress bar
|
||||||
|
:param description: Training description
|
||||||
|
:type description: str
|
||||||
|
|
||||||
|
:param iteration_counter: Incremental counter
|
||||||
|
:type iteration_counter: int
|
||||||
|
|
||||||
|
:param total_items: total number items
|
||||||
|
:type total_items: int
|
||||||
|
|
||||||
|
:param progress_bar_length: Progress bar length
|
||||||
|
:type progress_bar_length: int
|
||||||
|
|
||||||
|
:returns: void
|
||||||
|
:rtype: void
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
|
||||||
|
percent = float(iteration_counter) / total_items
|
||||||
|
hashes = '#' * int(round(percent * progress_bar_length))
|
||||||
|
spaces = ' ' * (progress_bar_length - len(hashes))
|
||||||
|
sys.stdout.write("\r{0}: [{1}] {2}%".format(description, hashes + spaces, int(round(percent * 100))))
|
||||||
|
sys.stdout.flush()
|
||||||
|
if total_items == iteration_counter:
|
||||||
|
print("\r")
|
Loading…
Reference in new issue