include chatterbot

fight-fixes
bobloy 7 years ago
parent 0b6164e93e
commit 3bf5f24a01

@ -7,8 +7,8 @@ from discord.ext import commands
from redbot.core import Config
from redbot.core.bot import Red
from chatterbot import ChatBot
from chatterbot.trainers import ListTrainer
from .chatterbot import ChatBot
from .chatterbot.trainers import ListTrainer
from datetime import datetime,timedelta

@ -0,0 +1,13 @@
"""
ChatterBot is a machine learning, conversational dialog engine.
"""
from .chatterbot import ChatBot
__version__ = '0.8.4'
__author__ = 'Gunther Cox'
__email__ = 'gunthercx@gmail.com'
__url__ = 'https://github.com/gunthercox/ChatterBot'
__all__ = (
'ChatBot',
)

@ -0,0 +1,23 @@
import sys
if __name__ == '__main__':
import importlib
if '--version' in sys.argv:
chatterbot = importlib.import_module('chatterbot')
print(chatterbot.__version__)
if 'list_nltk_data' in sys.argv:
import os
import nltk.data
data_directories = []
# Find each data directory in the NLTK path that has content
for path in nltk.data.path:
if os.path.exists(path):
if os.listdir(path):
data_directories.append(path)
print(os.linesep.join(data_directories))

@ -0,0 +1,47 @@
import logging
class Adapter(object):
"""
A superclass for all adapter classes.
:param logger: A python logger.
"""
def __init__(self, **kwargs):
self.logger = kwargs.get('logger', logging.getLogger(__name__))
self.chatbot = kwargs.get('chatbot')
def set_chatbot(self, chatbot):
"""
Gives the adapter access to an instance of the ChatBot class.
:param chatbot: A chat bot instanse.
:type chatbot: ChatBot
"""
self.chatbot = chatbot
class AdapterMethodNotImplementedError(NotImplementedError):
"""
An exception to be raised when an adapter method has not been implemented.
Typically this indicates that the developer is expected to implement the
method in a subclass.
"""
def __init__(self, message=None):
"""
Set the message for the esception.
"""
if not message:
message = 'This method must be overridden in a subclass method.'
self.message = message
def __str__(self):
return self.message
class InvalidAdapterTypeException(Exception):
"""
An exception to be raised when an adapter
of an unexpected class type is received.
"""
pass

@ -0,0 +1,173 @@
from __future__ import unicode_literals
import logging
from .storage import StorageAdapter
from .input import InputAdapter
from .output import OutputAdapter
from . import utils
class ChatBot(object):
"""
A conversational dialog chat bot.
"""
def __init__(self, name, **kwargs):
from .logic import MultiLogicAdapter
self.name = name
kwargs['name'] = name
kwargs['chatbot'] = self
self.default_session = None
storage_adapter = kwargs.get('storage_adapter', 'chatterbot.storage.SQLStorageAdapter')
logic_adapters = kwargs.get('logic_adapters', [
'chatterbot.logic.BestMatch'
])
input_adapter = kwargs.get('input_adapter', 'chatterbot.input.VariableInputTypeAdapter')
output_adapter = kwargs.get('output_adapter', 'chatterbot.output.OutputAdapter')
# Check that each adapter is a valid subclass of it's respective parent
utils.validate_adapter_class(storage_adapter, StorageAdapter)
utils.validate_adapter_class(input_adapter, InputAdapter)
utils.validate_adapter_class(output_adapter, OutputAdapter)
self.logic = MultiLogicAdapter(**kwargs)
self.storage = utils.initialize_class(storage_adapter, **kwargs)
self.input = utils.initialize_class(input_adapter, **kwargs)
self.output = utils.initialize_class(output_adapter, **kwargs)
filters = kwargs.get('filters', tuple())
self.filters = tuple([utils.import_module(F)() for F in filters])
# Add required system logic adapter
self.logic.system_adapters.append(
utils.initialize_class('chatterbot.logic.NoKnowledgeAdapter', **kwargs)
)
for adapter in logic_adapters:
self.logic.add_adapter(adapter, **kwargs)
# Add the chatbot instance to each adapter to share information such as
# the name, the current conversation, or other adapters
self.logic.set_chatbot(self)
self.input.set_chatbot(self)
self.output.set_chatbot(self)
preprocessors = kwargs.get(
'preprocessors', [
'chatterbot.preprocessors.clean_whitespace'
]
)
self.preprocessors = []
for preprocessor in preprocessors:
self.preprocessors.append(utils.import_module(preprocessor))
# Use specified trainer or fall back to the default
trainer = kwargs.get('trainer', 'chatterbot.trainers.Trainer')
TrainerClass = utils.import_module(trainer)
self.trainer = TrainerClass(self.storage, **kwargs)
self.training_data = kwargs.get('training_data')
self.default_conversation_id = None
self.logger = kwargs.get('logger', logging.getLogger(__name__))
# Allow the bot to save input it receives so that it can learn
self.read_only = kwargs.get('read_only', False)
if kwargs.get('initialize', True):
self.initialize()
def initialize(self):
"""
Do any work that needs to be done before the responses can be returned.
"""
self.logic.initialize()
def get_response(self, input_item, conversation_id=None):
"""
Return the bot's response based on the input.
:param input_item: An input value.
:param conversation_id: The id of a conversation.
:returns: A response to the input.
:rtype: Statement
"""
if not conversation_id:
if not self.default_conversation_id:
self.default_conversation_id = self.storage.create_conversation()
conversation_id = self.default_conversation_id
input_statement = self.input.process_input_statement(input_item)
# Preprocess the input statement
for preprocessor in self.preprocessors:
input_statement = preprocessor(self, input_statement)
statement, response = self.generate_response(input_statement, conversation_id)
# Learn that the user's input was a valid response to the chat bot's previous output
previous_statement = self.storage.get_latest_response(conversation_id)
if not self.read_only:
self.learn_response(statement, previous_statement)
self.storage.add_to_conversation(conversation_id, statement, response)
# Process the response output with the output adapter
return self.output.process_response(response, conversation_id)
def generate_response(self, input_statement, conversation_id):
"""
Return a response based on a given input statement.
"""
self.storage.generate_base_query(self, conversation_id)
# Select a response to the input statement
response = self.logic.process(input_statement)
return input_statement, response
def learn_response(self, statement, previous_statement):
"""
Learn that the statement provided is a valid response.
"""
from .conversation import Response
if previous_statement:
statement.add_response(
Response(previous_statement.text)
)
self.logger.info('Adding "{}" as a response to "{}"'.format(
statement.text,
previous_statement.text
))
# Save the statement after selecting a response
self.storage.update(statement)
def set_trainer(self, training_class, **kwargs):
"""
Set the module used to train the chatbot.
:param training_class: The training class to use for the chat bot.
:type training_class: `Trainer`
:param \**kwargs: Any parameters that should be passed to the training class.
"""
if 'chatbot' not in kwargs:
kwargs['chatbot'] = self
self.trainer = training_class(self.storage, **kwargs)
@property
def train(self):
"""
Proxy method to the chat bot's trainer class.
"""
return self.trainer.train

@ -0,0 +1,331 @@
# -*- coding: utf-8 -*-
import sys
"""
This module contains various text-comparison algorithms
designed to compare one statement to another.
"""
# Use python-Levenshtein if available
try:
from Levenshtein.StringMatcher import StringMatcher as SequenceMatcher
except ImportError:
from difflib import SequenceMatcher
class Comparator:
def __call__(self, statement_a, statement_b):
return self.compare(statement_a, statement_b)
def compare(self, statement_a, statement_b):
return 0
def get_initialization_functions(self):
"""
Return all initialization methods for the comparison algorithm.
Initialization methods must start with 'initialize_' and
take no parameters.
"""
initialization_methods = [
(
method,
getattr(self, method),
) for method in dir(self) if method.startswith('initialize_')
]
return {
key: value for (key, value) in initialization_methods
}
class LevenshteinDistance(Comparator):
"""
Compare two statements based on the Levenshtein distance
of each statement's text.
For example, there is a 65% similarity between the statements
"where is the post office?" and "looking for the post office"
based on the Levenshtein distance algorithm.
"""
def compare(self, statement, other_statement):
"""
Compare the two input statements.
:return: The percent of similarity between the text of the statements.
:rtype: float
"""
PYTHON = sys.version_info[0]
# Return 0 if either statement has a falsy text value
if not statement.text or not other_statement.text:
return 0
# Get the lowercase version of both strings
if PYTHON < 3:
statement_text = unicode(statement.text.lower()) # NOQA
other_statement_text = unicode(other_statement.text.lower()) # NOQA
else:
statement_text = str(statement.text.lower())
other_statement_text = str(other_statement.text.lower())
similarity = SequenceMatcher(
None,
statement_text,
other_statement_text
)
# Calculate a decimal percent of the similarity
percent = round(similarity.ratio(), 2)
return percent
class SynsetDistance(Comparator):
"""
Calculate the similarity of two statements.
This is based on the total maximum synset similarity between each word in each sentence.
This algorithm uses the `wordnet`_ functionality of `NLTK`_ to determine the similarity
of two statements based on the path similarity between each token of each statement.
This is essentially an evaluation of the closeness of synonyms.
"""
def initialize_nltk_wordnet(self):
"""
Download required NLTK corpora if they have not already been downloaded.
"""
from .utils import nltk_download_corpus
nltk_download_corpus('corpora/wordnet')
def initialize_nltk_punkt(self):
"""
Download required NLTK corpora if they have not already been downloaded.
"""
from .utils import nltk_download_corpus
nltk_download_corpus('tokenizers/punkt')
def initialize_nltk_stopwords(self):
"""
Download required NLTK corpora if they have not already been downloaded.
"""
from .utils import nltk_download_corpus
nltk_download_corpus('corpora/stopwords')
def compare(self, statement, other_statement):
"""
Compare the two input statements.
:return: The percent of similarity between the closest synset distance.
:rtype: float
.. _wordnet: http://www.nltk.org/howto/wordnet.html
.. _NLTK: http://www.nltk.org/
"""
from nltk.corpus import wordnet
from nltk import word_tokenize
from chatterbot import utils
import itertools
tokens1 = word_tokenize(statement.text.lower())
tokens2 = word_tokenize(other_statement.text.lower())
# Remove all stop words from the list of word tokens
tokens1 = utils.remove_stopwords(tokens1, language='english')
tokens2 = utils.remove_stopwords(tokens2, language='english')
# The maximum possible similarity is an exact match
# Because path_similarity returns a value between 0 and 1,
# max_possible_similarity is the number of words in the longer
# of the two input statements.
max_possible_similarity = max(
len(statement.text.split()),
len(other_statement.text.split())
)
max_similarity = 0.0
# Get the highest matching value for each possible combination of words
for combination in itertools.product(*[tokens1, tokens2]):
synset1 = wordnet.synsets(combination[0])
synset2 = wordnet.synsets(combination[1])
if synset1 and synset2:
# Get the highest similarity for each combination of synsets
for synset in itertools.product(*[synset1, synset2]):
similarity = synset[0].path_similarity(synset[1])
if similarity and (similarity > max_similarity):
max_similarity = similarity
if max_possible_similarity == 0:
return 0
return max_similarity / max_possible_similarity
class SentimentComparison(Comparator):
"""
Calculate the similarity of two statements based on the closeness of
the sentiment value calculated for each statement.
"""
def initialize_nltk_vader_lexicon(self):
"""
Download the NLTK vader lexicon for sentiment analysis
that is required for this algorithm to run.
"""
from .utils import nltk_download_corpus
nltk_download_corpus('sentiment/vader_lexicon')
def compare(self, statement, other_statement):
"""
Return the similarity of two statements based on
their calculated sentiment values.
:return: The percent of similarity between the sentiment value.
:rtype: float
"""
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sentiment_analyzer = SentimentIntensityAnalyzer()
statement_polarity = sentiment_analyzer.polarity_scores(statement.text.lower())
statement2_polarity = sentiment_analyzer.polarity_scores(other_statement.text.lower())
statement_greatest_polarity = 'neu'
statement_greatest_score = -1
for polarity in sorted(statement_polarity):
if statement_polarity[polarity] > statement_greatest_score:
statement_greatest_polarity = polarity
statement_greatest_score = statement_polarity[polarity]
statement2_greatest_polarity = 'neu'
statement2_greatest_score = -1
for polarity in sorted(statement2_polarity):
if statement2_polarity[polarity] > statement2_greatest_score:
statement2_greatest_polarity = polarity
statement2_greatest_score = statement2_polarity[polarity]
# Check if the polarity if of a different type
if statement_greatest_polarity != statement2_greatest_polarity:
return 0
values = [statement_greatest_score, statement2_greatest_score]
difference = max(values) - min(values)
return 1.0 - difference
class JaccardSimilarity(Comparator):
"""
Calculates the similarity of two statements based on the Jaccard index.
The Jaccard index is composed of a numerator and denominator.
In the numerator, we count the number of items that are shared between the sets.
In the denominator, we count the total number of items across both sets.
Let's say we define sentences to be equivalent if 50% or more of their tokens are equivalent.
Here are two sample sentences:
The young cat is hungry.
The cat is very hungry.
When we parse these sentences to remove stopwords, we end up with the following two sets:
{young, cat, hungry}
{cat, very, hungry}
In our example above, our intersection is {cat, hungry}, which has count of two.
The union of the sets is {young, cat, very, hungry}, which has a count of four.
Therefore, our `Jaccard similarity index`_ is two divided by four, or 50%.
Given our similarity threshold above, we would consider this to be a match.
.. _`Jaccard similarity index`: https://en.wikipedia.org/wiki/Jaccard_index
"""
SIMILARITY_THRESHOLD = 0.5
def initialize_nltk_wordnet(self):
"""
Download the NLTK wordnet corpora that is required for this algorithm
to run only if the corpora has not already been downloaded.
"""
from .utils import nltk_download_corpus
nltk_download_corpus('corpora/wordnet')
def compare(self, statement, other_statement):
"""
Return the calculated similarity of two
statements based on the Jaccard index.
"""
from nltk.corpus import wordnet
import nltk
import string
a = statement.text.lower()
b = other_statement.text.lower()
# Get default English stopwords and extend with punctuation
stopwords = nltk.corpus.stopwords.words('english')
stopwords.extend(string.punctuation)
stopwords.append('')
lemmatizer = nltk.stem.wordnet.WordNetLemmatizer()
def get_wordnet_pos(pos_tag):
if pos_tag[1].startswith('J'):
return (pos_tag[0], wordnet.ADJ)
elif pos_tag[1].startswith('V'):
return (pos_tag[0], wordnet.VERB)
elif pos_tag[1].startswith('N'):
return (pos_tag[0], wordnet.NOUN)
elif pos_tag[1].startswith('R'):
return (pos_tag[0], wordnet.ADV)
else:
return (pos_tag[0], wordnet.NOUN)
ratio = 0
pos_a = map(get_wordnet_pos, nltk.pos_tag(nltk.tokenize.word_tokenize(a)))
pos_b = map(get_wordnet_pos, nltk.pos_tag(nltk.tokenize.word_tokenize(b)))
lemma_a = [
lemmatizer.lemmatize(
token.strip(string.punctuation),
pos
) for token, pos in pos_a if pos == wordnet.NOUN and token.strip(
string.punctuation
) not in stopwords
]
lemma_b = [
lemmatizer.lemmatize(
token.strip(string.punctuation),
pos
) for token, pos in pos_b if pos == wordnet.NOUN and token.strip(
string.punctuation
) not in stopwords
]
# Calculate Jaccard similarity
try:
numerator = len(set(lemma_a).intersection(lemma_b))
denominator = float(len(set(lemma_a).union(lemma_b)))
ratio = numerator / denominator
except Exception as e:
print('Error', e)
return ratio >= self.SIMILARITY_THRESHOLD
# ---------------------------------------- #
levenshtein_distance = LevenshteinDistance()
synset_distance = SynsetDistance()
sentiment_comparison = SentimentComparison()
jaccard_similarity = JaccardSimilarity()

@ -0,0 +1,15 @@
"""
ChatterBot constants
"""
'''
The maximum length of characters that the text of a statement can contain.
This should be enforced on a per-model basis by the data model for each
storage adapter.
'''
STATEMENT_TEXT_MAX_LENGTH = 400
# The maximum length of characters that the name of a tag can contain
TAG_NAME_MAX_LENGTH = 50
DEFAULT_DJANGO_APP_NAME = 'django_chatterbot'

@ -0,0 +1,229 @@
class StatementMixin(object):
"""
This class has shared methods used to
normalize different statement models.
"""
def get_tags(self):
"""
Return the list of tags for this statement.
"""
return self.tags
def add_tags(self, tags):
"""
Add a list of strings to the statement as tags.
"""
for tag in tags:
self.tags.append(tag)
class Statement(StatementMixin):
"""
A statement represents a single spoken entity, sentence or
phrase that someone can say.
"""
def __init__(self, text, **kwargs):
import sys
# Try not to allow non-string types to be passed to statements
try:
text = str(text)
except UnicodeEncodeError:
pass
# Prefer decoded utf8-strings in Python 2.7
if sys.version_info[0] < 3:
try:
text = text.decode('utf-8')
except UnicodeEncodeError:
pass
self.text = text
self.tags = kwargs.pop('tags', [])
self.in_response_to = kwargs.pop('in_response_to', [])
self.extra_data = kwargs.pop('extra_data', {})
# This is the confidence with which the chat bot believes
# this is an accurate response. This value is set when the
# statement is returned by the chat bot.
self.confidence = 0
self.storage = None
def __str__(self):
return self.text
def __repr__(self):
return '<Statement text:%s>' % (self.text)
def __hash__(self):
return hash(self.text)
def __eq__(self, other):
if not other:
return False
if isinstance(other, Statement):
return self.text == other.text
return self.text == other
def save(self):
"""
Save the statement in the database.
"""
self.storage.update(self)
def add_extra_data(self, key, value):
"""
This method allows additional data to be stored on the statement object.
Typically this data is something that pertains just to this statement.
For example, a value stored here might be the tagged parts of speech for
each word in the statement text.
- key = 'pos_tags'
- value = [('Now', 'RB'), ('for', 'IN'), ('something', 'NN'), ('different', 'JJ')]
:param key: The key to use in the dictionary of extra data.
:type key: str
:param value: The value to set for the specified key.
"""
self.extra_data[key] = value
def add_response(self, response):
"""
Add the response to the list of statements that this statement is in response to.
If the response is already in the list, increment the occurrence count of that response.
:param response: The response to add.
:type response: `Response`
"""
if not isinstance(response, Response):
raise Statement.InvalidTypeException(
'A {} was received when a {} instance was expected'.format(
type(response),
type(Response(''))
)
)
updated = False
for index in range(0, len(self.in_response_to)):
if response.text == self.in_response_to[index].text:
self.in_response_to[index].occurrence += 1
updated = True
if not updated:
self.in_response_to.append(response)
def remove_response(self, response_text):
"""
Removes a response from the statement's response list based
on the value of the response text.
:param response_text: The text of the response to be removed.
:type response_text: str
"""
for response in self.in_response_to:
if response_text == response.text:
self.in_response_to.remove(response)
return True
return False
def get_response_count(self, statement):
"""
Find the number of times that the statement has been used
as a response to the current statement.
:param statement: The statement object to get the count for.
:type statement: `Statement`
:returns: Return the number of times the statement has been used as a response.
:rtype: int
"""
for response in self.in_response_to:
if statement.text == response.text:
return response.occurrence
return 0
def serialize(self):
"""
:returns: A dictionary representation of the statement object.
:rtype: dict
"""
data = {}
data['text'] = self.text
data['in_response_to'] = []
data['extra_data'] = self.extra_data
for response in self.in_response_to:
data['in_response_to'].append(response.serialize())
return data
@property
def response_statement_cache(self):
"""
This property is to allow ChatterBot Statement objects to
be swappable with Django Statement models.
"""
return self.in_response_to
class InvalidTypeException(Exception):
def __init__(self, value='Received an unexpected value type.'):
self.value = value
def __str__(self):
return repr(self.value)
class Response(object):
"""
A response represents an entity which response to a statement.
"""
def __init__(self, text, **kwargs):
from datetime import datetime
from dateutil import parser as date_parser
self.text = text
self.created_at = kwargs.get('created_at', datetime.now())
self.occurrence = kwargs.get('occurrence', 1)
if not isinstance(self.created_at, datetime):
self.created_at = date_parser.parse(self.created_at)
def __str__(self):
return self.text
def __repr__(self):
return '<Response text:%s>' % (self.text)
def __hash__(self):
return hash(self.text)
def __eq__(self, other):
if not other:
return False
if isinstance(other, Response):
return self.text == other.text
return self.text == other
def serialize(self):
data = {}
data['text'] = self.text
data['created_at'] = self.created_at.isoformat()
data['occurrence'] = self.occurrence
return data

@ -0,0 +1,11 @@
"""
Seamlessly import the external chatterbot corpus module.
View the corpus on GitHub at https://github.com/gunthercox/chatterbot-corpus
"""
from chatterbot_corpus import Corpus
__all__ = (
'Corpus',
)

@ -0,0 +1,3 @@
default_app_config = (
'chatterbot.ext.django_chatterbot.apps.DjangoChatterBotConfig'
)

@ -0,0 +1,261 @@
from chatterbot.conversation import StatementMixin
from chatterbot import constants
from django.db import models
from django.apps import apps
from django.utils import timezone
from django.conf import settings
DJANGO_APP_NAME = constants.DEFAULT_DJANGO_APP_NAME
STATEMENT_MODEL = 'Statement'
RESPONSE_MODEL = 'Response'
if hasattr(settings, 'CHATTERBOT'):
"""
Allow related models to be overridden in the project settings.
Default to the original settings if one is not defined.
"""
DJANGO_APP_NAME = settings.CHATTERBOT.get(
'django_app_name',
DJANGO_APP_NAME
)
STATEMENT_MODEL = settings.CHATTERBOT.get(
'statement_model',
STATEMENT_MODEL
)
RESPONSE_MODEL = settings.CHATTERBOT.get(
'response_model',
RESPONSE_MODEL
)
class AbstractBaseStatement(models.Model, StatementMixin):
"""
The abstract base statement allows other models to
be created using the attributes that exist on the
default models.
"""
text = models.CharField(
unique=True,
blank=False,
null=False,
max_length=constants.STATEMENT_TEXT_MAX_LENGTH
)
extra_data = models.CharField(
max_length=500,
blank=True
)
# This is the confidence with which the chat bot believes
# this is an accurate response. This value is set when the
# statement is returned by the chat bot.
confidence = 0
class Meta:
abstract = True
def __str__(self):
if len(self.text.strip()) > 60:
return '{}...'.format(self.text[:57])
elif len(self.text.strip()) > 0:
return self.text
return '<empty>'
def __init__(self, *args, **kwargs):
super(AbstractBaseStatement, self).__init__(*args, **kwargs)
# Responses to be saved if the statement is updated with the storage adapter
self.response_statement_cache = []
@property
def in_response_to(self):
"""
Return the response objects that are for this statement.
"""
ResponseModel = apps.get_model(DJANGO_APP_NAME, RESPONSE_MODEL)
return ResponseModel.objects.filter(statement=self)
def add_extra_data(self, key, value):
"""
Add extra data to the extra_data field.
"""
import json
if not self.extra_data:
self.extra_data = '{}'
extra_data = json.loads(self.extra_data)
extra_data[key] = value
self.extra_data = json.dumps(extra_data)
def add_tags(self, tags):
"""
Add a list of strings to the statement as tags.
(Overrides the method from StatementMixin)
"""
for tag in tags:
self.tags.create(
name=tag
)
def add_response(self, statement):
"""
Add a response to this statement.
"""
self.response_statement_cache.append(statement)
def remove_response(self, response_text):
"""
Removes a response from the statement's response list based
on the value of the response text.
:param response_text: The text of the response to be removed.
:type response_text: str
"""
is_deleted = False
response = self.in_response.filter(response__text=response_text)
if response.exists():
is_deleted = True
return is_deleted
def get_response_count(self, statement):
"""
Find the number of times that the statement has been used
as a response to the current statement.
:param statement: The statement object to get the count for.
:type statement: chatterbot.conversation.Statement
:returns: Return the number of times the statement has been used as a response.
:rtype: int
"""
return self.in_response.filter(response__text=statement.text).count()
def serialize(self):
"""
:returns: A dictionary representation of the statement object.
:rtype: dict
"""
import json
data = {}
if not self.extra_data:
self.extra_data = '{}'
data['text'] = self.text
data['in_response_to'] = []
data['extra_data'] = json.loads(self.extra_data)
for response in self.in_response.all():
data['in_response_to'].append(response.serialize())
return data
class AbstractBaseResponse(models.Model):
"""
The abstract base response allows other models to
be created using the attributes that exist on the
default models.
"""
statement = models.ForeignKey(
STATEMENT_MODEL,
related_name='in_response',
on_delete=models.CASCADE
)
response = models.ForeignKey(
STATEMENT_MODEL,
related_name='responses',
on_delete=models.CASCADE
)
created_at = models.DateTimeField(
default=timezone.now,
help_text='The date and time that this response was created at.'
)
class Meta:
abstract = True
@property
def occurrence(self):
"""
Return a count of the number of times this response has occurred.
"""
ResponseModel = apps.get_model(DJANGO_APP_NAME, RESPONSE_MODEL)
return ResponseModel.objects.filter(
statement__text=self.statement.text,
response__text=self.response.text
).count()
def __str__(self):
statement = self.statement.text
response = self.response.text
return '{} => {}'.format(
statement if len(statement) <= 20 else statement[:17] + '...',
response if len(response) <= 40 else response[:37] + '...'
)
def serialize(self):
"""
:returns: A dictionary representation of the statement object.
:rtype: dict
"""
data = {}
data['text'] = self.response.text
data['created_at'] = self.created_at.isoformat()
data['occurrence'] = self.occurrence
return data
class AbstractBaseConversation(models.Model):
"""
The abstract base conversation allows other models to
be created using the attributes that exist on the
default models.
"""
responses = models.ManyToManyField(
RESPONSE_MODEL,
related_name='conversations',
help_text='The responses in this conversation.'
)
class Meta:
abstract = True
def __str__(self):
return str(self.id)
class AbstractBaseTag(models.Model):
"""
The abstract base tag allows other models to
be created using the attributes that exist on the
default models.
"""
name = models.SlugField(
max_length=constants.TAG_NAME_MAX_LENGTH
)
statements = models.ManyToManyField(
STATEMENT_MODEL,
related_name='tags'
)
class Meta:
abstract = True
def __str__(self):
return self.name

@ -0,0 +1,31 @@
from django.contrib import admin
from chatterbot.ext.django_chatterbot.models import (
Statement, Response, Conversation, Tag
)
class StatementAdmin(admin.ModelAdmin):
list_display = ('text', )
list_filter = ('text', )
search_fields = ('text', )
class ResponseAdmin(admin.ModelAdmin):
list_display = ('statement', 'response', 'occurrence', )
search_fields = ['statement__text', 'response__text']
class ConversationAdmin(admin.ModelAdmin):
list_display = ('id', )
class TagAdmin(admin.ModelAdmin):
list_display = ('name', )
list_filter = ('name', )
search_fields = ('name', )
admin.site.register(Statement, StatementAdmin)
admin.site.register(Response, ResponseAdmin)
admin.site.register(Conversation, ConversationAdmin)
admin.site.register(Tag, TagAdmin)

@ -0,0 +1,8 @@
from django.apps import AppConfig
class DjangoChatterBotConfig(AppConfig):
name = 'chatterbot.ext.django_chatterbot'
label = 'django_chatterbot'
verbose_name = 'Django ChatterBot'

@ -0,0 +1,42 @@
"""
These factories are used to generate fake data for testing.
"""
import factory
from chatterbot.ext.django_chatterbot import models
from chatterbot import constants
from factory.django import DjangoModelFactory
class StatementFactory(DjangoModelFactory):
text = factory.Faker(
'text',
max_nb_chars=constants.STATEMENT_TEXT_MAX_LENGTH
)
class Meta:
model = models.Statement
class ResponseFactory(DjangoModelFactory):
statement = factory.SubFactory(StatementFactory)
response = factory.SubFactory(StatementFactory)
class Meta:
model = models.Response
class ConversationFactory(DjangoModelFactory):
class Meta:
model = models.Conversation
class TagFactory(DjangoModelFactory):
name = factory.Faker('word')
class Meta:
model = models.Tag

@ -0,0 +1,29 @@
from django.core.management.base import BaseCommand
class Command(BaseCommand):
"""
A Django management command for calling a
chat bot's training method.
"""
help = 'Trains the database used by the chat bot'
can_import_settings = True
def handle(self, *args, **options):
from chatterbot import ChatBot
from chatterbot.ext.django_chatterbot import settings
chatterbot = ChatBot(**settings.CHATTERBOT)
chatterbot.train(chatterbot.training_data)
# Django 1.8 does not define SUCCESS
if hasattr(self.style, 'SUCCESS'):
style = self.style.SUCCESS
else:
style = self.style.NOTICE
self.stdout.write(style('Starting training...'))
training_class = chatterbot.trainer.__class__.__name__
self.stdout.write(style('ChatterBot trained using "%s"' % training_class))

@ -0,0 +1,39 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
initial = True
dependencies = []
operations = [
migrations.CreateModel(
name='Response',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('occurrence', models.PositiveIntegerField(default=0)),
],
),
migrations.CreateModel(
name='Statement',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('text', models.CharField(max_length=255, unique=True)),
],
),
migrations.AddField(
model_name='response',
name='response',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='+', to='django_chatterbot.Statement'),
),
migrations.AddField(
model_name='response',
name='statement',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='in_response_to', to='django_chatterbot.Statement'),
),
]

@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.10.2 on 2016-10-30 12:13
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('django_chatterbot', '0001_initial'),
]
operations = [
migrations.AddField(
model_name='statement',
name='extra_data',
field=models.CharField(default='{}', max_length=500),
preserve_default=False,
),
]

@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9 on 2016-12-12 00:06
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('django_chatterbot', '0002_statement_extra_data'),
]
operations = [
migrations.AlterField(
model_name='response',
name='occurrence',
field=models.PositiveIntegerField(default=1),
),
]

@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.10.3 on 2016-12-04 23:52
from __future__ import unicode_literals
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('django_chatterbot', '0003_change_occurrence_default'),
]
operations = [
migrations.AlterField(
model_name='response',
name='statement',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='in_response', to='django_chatterbot.Statement'),
),
migrations.AlterField(
model_name='response',
name='response',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='responses', to='django_chatterbot.Statement'),
),
]

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.10.1 on 2016-12-29 19:20
from __future__ import unicode_literals
from django.db import migrations, models
import django.utils.timezone
class Migration(migrations.Migration):
dependencies = [
('django_chatterbot', '0004_rename_in_response_to'),
]
operations = [
migrations.AddField(
model_name='statement',
name='created_at',
field=models.DateTimeField(
default=django.utils.timezone.now,
help_text='The date and time that this statement was created at.'
),
),
]

@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9 on 2017-01-17 07:02
from __future__ import unicode_literals
from django.db import migrations, models
import django.db.models.deletion
import django.utils.timezone
class Migration(migrations.Migration):
dependencies = [
('django_chatterbot', '0005_statement_created_at'),
]
operations = [
migrations.CreateModel(
name='Conversation',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
],
),
migrations.AlterField(
model_name='statement',
name='created_at',
field=models.DateTimeField(default=django.utils.timezone.now, help_text='The date and time that this statement was created at.'),
),
migrations.AddField(
model_name='conversation',
name='statements',
field=models.ManyToManyField(help_text='The statements in this conversation.', related_name='conversation', to='django_chatterbot.Statement'),
),
]

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11 on 2017-07-18 00:16
from __future__ import unicode_literals
from django.db import migrations, models
import django.utils.timezone
class Migration(migrations.Migration):
dependencies = [
('django_chatterbot', '0006_create_conversation'),
]
operations = [
migrations.AddField(
model_name='response',
name='created_at',
field=models.DateTimeField(
default=django.utils.timezone.now,
help_text='The date and time that this response was created at.'
),
),
]

@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11 on 2017-07-18 11:25
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('django_chatterbot', '0007_response_created_at'),
]
operations = [
migrations.RemoveField(
model_name='conversation',
name='statements',
),
migrations.RemoveField(
model_name='response',
name='occurrence',
),
migrations.RemoveField(
model_name='statement',
name='created_at',
),
migrations.AddField(
model_name='conversation',
name='responses',
field=models.ManyToManyField(help_text='The responses in this conversation.', related_name='conversations', to='django_chatterbot.Response'),
),
]

@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11a1 on 2017-07-07 00:12
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('django_chatterbot', '0008_update_conversations'),
]
operations = [
migrations.CreateModel(
name='Tag',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.SlugField()),
],
options={
'abstract': False,
},
),
migrations.AlterField(
model_name='statement',
name='text',
field=models.CharField(max_length=255, unique=True),
),
migrations.AddField(
model_name='tag',
name='statements',
field=models.ManyToManyField(related_name='tags', to='django_chatterbot.Statement'),
),
]

@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.4 on 2017-08-16 00:56
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('django_chatterbot', '0009_tags'),
]
operations = [
migrations.AlterField(
model_name='statement',
name='text',
field=models.CharField(max_length=400, unique=True),
),
]

@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.4 on 2017-08-20 13:55
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('django_chatterbot', '0010_statement_text'),
]
operations = [
migrations.AlterField(
model_name='statement',
name='extra_data',
field=models.CharField(blank=True, max_length=500),
),
]

@ -0,0 +1,34 @@
from chatterbot.ext.django_chatterbot.abstract_models import (
AbstractBaseConversation, AbstractBaseResponse,
AbstractBaseStatement, AbstractBaseTag
)
class Statement(AbstractBaseStatement):
"""
A statement represents a single spoken entity, sentence or
phrase that someone can say.
"""
pass
class Response(AbstractBaseResponse):
"""
A connection between a statement and anther statement
that response to it.
"""
pass
class Conversation(AbstractBaseConversation):
"""
A sequence of statements representing a conversation.
"""
pass
class Tag(AbstractBaseTag):
"""
A label that categorizes a statement.
"""
pass

@ -0,0 +1,19 @@
"""
Default ChatterBot settings for Django.
"""
from django.conf import settings
from chatterbot import constants
CHATTERBOT_SETTINGS = getattr(settings, 'CHATTERBOT', {})
CHATTERBOT_DEFAULTS = {
'name': 'ChatterBot',
'storage_adapter': 'chatterbot.storage.DjangoStorageAdapter',
'input_adapter': 'chatterbot.input.VariableInputTypeAdapter',
'output_adapter': 'chatterbot.output.OutputAdapter',
'django_app_name': constants.DEFAULT_DJANGO_APP_NAME
}
CHATTERBOT = CHATTERBOT_DEFAULTS.copy()
CHATTERBOT.update(CHATTERBOT_SETTINGS)

@ -0,0 +1,11 @@
from django.conf.urls import url
from .views import ChatterBotView
urlpatterns = [
url(
r'^$',
ChatterBotView.as_view(),
name='chatterbot',
),
]

@ -0,0 +1,118 @@
import json
from django.views.generic import View
from django.http import JsonResponse
from chatterbot import ChatBot
from chatterbot.ext.django_chatterbot import settings
class ChatterBotViewMixin(object):
"""
Subclass this mixin for access to the 'chatterbot' attribute.
"""
chatterbot = ChatBot(**settings.CHATTERBOT)
def validate(self, data):
"""
Validate the data recieved from the client.
* The data should contain a text attribute.
"""
from django.core.exceptions import ValidationError
if 'text' not in data:
raise ValidationError('The attribute "text" is required.')
def get_conversation(self, request):
"""
Return the conversation for the session if one exists.
Create a new conversation if one does not exist.
"""
from chatterbot.ext.django_chatterbot.models import Conversation, Response
class Obj(object):
def __init__(self):
self.id = None
self.statements = []
conversation = Obj()
conversation.id = request.session.get('conversation_id', 0)
existing_conversation = False
try:
Conversation.objects.get(id=conversation.id)
existing_conversation = True
except Conversation.DoesNotExist:
conversation_id = self.chatterbot.storage.create_conversation()
request.session['conversation_id'] = conversation_id
conversation.id = conversation_id
if existing_conversation:
responses = Response.objects.filter(
conversations__id=conversation.id
)
for response in responses:
conversation.statements.append(response.statement.serialize())
conversation.statements.append(response.response.serialize())
return conversation
class ChatterBotView(ChatterBotViewMixin, View):
"""
Provide an API endpoint to interact with ChatterBot.
"""
def post(self, request, *args, **kwargs):
"""
Return a response to the statement in the posted data.
"""
input_data = json.loads(request.read().decode('utf-8'))
self.validate(input_data)
conversation = self.get_conversation(request)
response = self.chatterbot.get_response(input_data, conversation.id)
response_data = response.serialize()
return JsonResponse(response_data, status=200)
def get(self, request, *args, **kwargs):
"""
Return data corresponding to the current conversation.
"""
conversation = self.get_conversation(request)
data = {
'detail': 'You should make a POST request to this endpoint.',
'name': self.chatterbot.name,
'conversation': conversation.statements
}
# Return a method not allowed response
return JsonResponse(data, status=405)
def patch(self, request, *args, **kwargs):
"""
The patch method is not allowed for this endpoint.
"""
data = {
'detail': 'You should make a POST request to this endpoint.'
}
# Return a method not allowed response
return JsonResponse(data, status=405)
def delete(self, request, *args, **kwargs):
"""
The delete method is not allowed for this endpoint.
"""
data = {
'detail': 'You should make a POST request to this endpoint.'
}
# Return a method not allowed response
return JsonResponse(data, status=405)

@ -0,0 +1,132 @@
from sqlalchemy import Table, Column, Integer, DateTime, ForeignKey, PickleType
from sqlalchemy.orm import relationship
from sqlalchemy.sql import func
from sqlalchemy.ext.declarative import declared_attr, declarative_base
from chatterbot.constants import TAG_NAME_MAX_LENGTH, STATEMENT_TEXT_MAX_LENGTH
from chatterbot.ext.sqlalchemy_app.types import UnicodeString
from chatterbot.conversation import StatementMixin
class ModelBase(object):
"""
An augmented base class for SqlAlchemy models.
"""
@declared_attr
def __tablename__(cls):
"""
Return the lowercase class name as the name of the table.
"""
return cls.__name__.lower()
id = Column(
Integer,
primary_key=True,
autoincrement=True
)
Base = declarative_base(cls=ModelBase)
tag_association_table = Table(
'tag_association',
Base.metadata,
Column('tag_id', Integer, ForeignKey('tag.id')),
Column('statement_id', Integer, ForeignKey('statement.id'))
)
class Tag(Base):
"""
A tag that describes a statement.
"""
name = Column(UnicodeString(TAG_NAME_MAX_LENGTH))
class Statement(Base, StatementMixin):
"""
A Statement represents a sentence or phrase.
"""
text = Column(UnicodeString(STATEMENT_TEXT_MAX_LENGTH), unique=True)
tags = relationship(
'Tag',
secondary=lambda: tag_association_table,
backref='statements'
)
extra_data = Column(PickleType)
in_response_to = relationship(
'Response',
back_populates='statement_table'
)
def get_tags(self):
"""
Return a list of tags for this statement.
"""
return [tag.name for tag in self.tags]
def get_statement(self):
from chatterbot.conversation import Statement as StatementObject
from chatterbot.conversation import Response as ResponseObject
statement = StatementObject(
self.text,
tags=[tag.name for tag in self.tags],
extra_data=self.extra_data
)
for response in self.in_response_to:
statement.add_response(
ResponseObject(text=response.text, occurrence=response.occurrence)
)
return statement
class Response(Base):
"""
Response, contains responses related to a given statement.
"""
text = Column(UnicodeString(STATEMENT_TEXT_MAX_LENGTH))
created_at = Column(
DateTime(timezone=True),
server_default=func.now()
)
occurrence = Column(Integer, default=1)
statement_text = Column(UnicodeString(STATEMENT_TEXT_MAX_LENGTH), ForeignKey('statement.text'))
statement_table = relationship(
'Statement',
back_populates='in_response_to',
cascade='all',
uselist=False
)
conversation_association_table = Table(
'conversation_association',
Base.metadata,
Column('conversation_id', Integer, ForeignKey('conversation.id')),
Column('statement_id', Integer, ForeignKey('statement.id'))
)
class Conversation(Base):
"""
A conversation.
"""
statements = relationship(
'Statement',
secondary=lambda: conversation_association_table,
backref='conversations'
)

@ -0,0 +1,21 @@
from sqlalchemy.types import TypeDecorator, Unicode
class UnicodeString(TypeDecorator):
"""
Type for unicode strings.
"""
impl = Unicode
def process_bind_param(self, value, dialect):
"""
Coerce Python bytestrings to unicode before
saving them to the database.
"""
import sys
if sys.version_info[0] < 3:
if isinstance(value, str):
value = value.decode('utf-8')
return value

@ -0,0 +1,47 @@
"""
Filters set the base query that gets passed to the storage adapter.
"""
class Filter(object):
"""
A base filter object from which all other
filters should be subclassed.
"""
def filter_selection(self, chatterbot, conversation_id):
"""
Because this is the base filter class, this method just
returns the storage adapter's base query. Other filters
are expected to override this method.
"""
return chatterbot.storage.base_query
class RepetitiveResponseFilter(Filter):
"""
A filter that eliminates possibly repetitive responses to prevent
a chat bot from repeating statements that it has recently said.
"""
def filter_selection(self, chatterbot, conversation_id):
text_of_recent_responses = []
# TODO: Add a larger quantity of response history
latest_response = chatterbot.storage.get_latest_response(conversation_id)
if latest_response:
text_of_recent_responses.append(latest_response.text)
# Return the query with no changes if there are no statements to exclude
if not text_of_recent_responses:
return super(RepetitiveResponseFilter, self).filter_selection(
chatterbot,
conversation_id
)
query = chatterbot.storage.base_query.statement_text_not_in(
text_of_recent_responses
)
return query

@ -0,0 +1,18 @@
from .input_adapter import InputAdapter
from .microsoft import Microsoft
from .gitter import Gitter
from .hipchat import HipChat
from .mailgun import Mailgun
from .terminal import TerminalAdapter
from .variable_input_type_adapter import VariableInputTypeAdapter
__all__ = (
'InputAdapter',
'Microsoft',
'Gitter',
'HipChat',
'Mailgun',
'TerminalAdapter',
'VariableInputTypeAdapter',
)

@ -0,0 +1,176 @@
from __future__ import unicode_literals
from time import sleep
from chatterbot.input import InputAdapter
from chatterbot.conversation import Statement
class Gitter(InputAdapter):
"""
An input adapter that allows a ChatterBot instance to get
input statements from a Gitter room.
"""
def __init__(self, **kwargs):
super(Gitter, self).__init__(**kwargs)
self.gitter_host = kwargs.get('gitter_host', 'https://api.gitter.im/v1/')
self.gitter_room = kwargs.get('gitter_room')
self.gitter_api_token = kwargs.get('gitter_api_token')
self.only_respond_to_mentions = kwargs.get('gitter_only_respond_to_mentions', True)
self.sleep_time = kwargs.get('gitter_sleep_time', 4)
authorization_header = 'Bearer {}'.format(self.gitter_api_token)
self.headers = {
'Authorization': authorization_header,
'Content-Type': 'application/json',
'Accept': 'application/json'
}
# Join the Gitter room
room_data = self.join_room(self.gitter_room)
self.room_id = room_data.get('id')
user_data = self.get_user_data()
self.user_id = user_data[0].get('id')
self.username = user_data[0].get('username')
def _validate_status_code(self, response):
code = response.status_code
if code not in [200, 201]:
raise self.HTTPStatusException('{} status code recieved'.format(code))
def join_room(self, room_name):
"""
Join the specified Gitter room.
"""
import requests
endpoint = '{}rooms'.format(self.gitter_host)
response = requests.post(
endpoint,
headers=self.headers,
json={'uri': room_name}
)
self.logger.info('{} joining room {}'.format(
response.status_code, endpoint
))
self._validate_status_code(response)
return response.json()
def get_user_data(self):
import requests
endpoint = '{}user'.format(self.gitter_host)
response = requests.get(
endpoint,
headers=self.headers
)
self.logger.info('{} retrieving user data {}'.format(
response.status_code, endpoint
))
self._validate_status_code(response)
return response.json()
def mark_messages_as_read(self, message_ids):
"""
Mark the specified message ids as read.
"""
import requests
endpoint = '{}user/{}/rooms/{}/unreadItems'.format(
self.gitter_host, self.user_id, self.room_id
)
response = requests.post(
endpoint,
headers=self.headers,
json={'chat': message_ids}
)
self.logger.info('{} marking messages as read {}'.format(
response.status_code, endpoint
))
self._validate_status_code(response)
return response.json()
def get_most_recent_message(self):
"""
Get the most recent message from the Gitter room.
"""
import requests
endpoint = '{}rooms/{}/chatMessages?limit=1'.format(self.gitter_host, self.room_id)
response = requests.get(
endpoint,
headers=self.headers
)
self.logger.info('{} getting most recent message'.format(
response.status_code
))
self._validate_status_code(response)
data = response.json()
if data:
return data[0]
return None
def _contains_mention(self, mentions):
for mention in mentions:
if self.username == mention.get('screenName'):
return True
return False
def should_respond(self, data):
"""
Takes the API response data from a single message.
Returns true if the chat bot should respond.
"""
if data:
unread = data.get('unread', False)
if self.only_respond_to_mentions:
if unread and self._contains_mention(data['mentions']):
return True
else:
return False
elif unread:
return True
return False
def remove_mentions(self, text):
"""
Return a string that has no leading mentions.
"""
import re
text_without_mentions = re.sub(r'@\S+', '', text)
# Remove consecutive spaces
text_without_mentions = re.sub(' +', ' ', text_without_mentions.strip())
return text_without_mentions
def process_input(self, statement):
new_message = False
while not new_message:
data = self.get_most_recent_message()
if self.should_respond(data):
self.mark_messages_as_read([data['id']])
new_message = True
sleep(self.sleep_time)
text = self.remove_mentions(data['text'])
statement = Statement(text)
return statement
class HTTPStatusException(Exception):
"""
Exception raised when unexpected non-success HTTP
status codes are returned in a response.
"""
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)

@ -0,0 +1,113 @@
from __future__ import unicode_literals
from time import sleep
from chatterbot.input import InputAdapter
from chatterbot.conversation import Statement
class HipChat(InputAdapter):
"""
An input adapter that allows a ChatterBot instance to get
input statements from a HipChat room.
"""
def __init__(self, **kwargs):
super(HipChat, self).__init__(**kwargs)
self.hipchat_host = kwargs.get('hipchat_host')
self.hipchat_access_token = kwargs.get('hipchat_access_token')
self.hipchat_room = kwargs.get('hipchat_room')
self.session_id = str(self.chatbot.default_session.uuid)
import requests
self.session = requests.Session()
self.session.verify = kwargs.get('ssl_verify', True)
authorization_header = 'Bearer {}'.format(self.hipchat_access_token)
self.headers = {
'Authorization': authorization_header,
'Content-Type': 'application/json'
}
# This is a list of the messages that have been responded to
self.recent_message_ids = self.get_initial_ids()
def get_initial_ids(self):
"""
Returns a list of the most recent message ids.
"""
data = self.view_recent_room_history(
self.hipchat_room,
max_results=75
)
results = set()
for item in data['items']:
results.add(item['id'])
return results
def view_recent_room_history(self, room_id_or_name, max_results=1):
"""
https://www.hipchat.com/docs/apiv2/method/view_recent_room_history
"""
recent_histroy_url = '{}/v2/room/{}/history?max-results={}'.format(
self.hipchat_host,
room_id_or_name,
max_results
)
response = self.session.get(
recent_histroy_url,
headers=self.headers
)
return response.json()
def get_most_recent_message(self, room_id_or_name):
"""
Return the most recent message from the HipChat room.
"""
data = self.view_recent_room_history(room_id_or_name)
items = data['items']
if not items:
return None
return items[-1]
def process_input(self, statement):
"""
Process input from the HipChat room.
"""
new_message = False
response_statement = self.chatbot.storage.get_latest_response(
self.session_id
)
if response_statement:
last_message_id = response_statement.extra_data.get(
'hipchat_message_id', None
)
if last_message_id:
self.recent_message_ids.add(last_message_id)
while not new_message:
data = self.get_most_recent_message(self.hipchat_room)
if data and data['id'] not in self.recent_message_ids:
self.recent_message_ids.add(data['id'])
new_message = True
else:
pass
sleep(3.5)
text = data['message']
statement = Statement(text)
statement.add_extra_data('hipchat_message_id', data['id'])
return statement

@ -0,0 +1,33 @@
from __future__ import unicode_literals
from chatterbot.adapters import Adapter
class InputAdapter(Adapter):
"""
This is an abstract class that represents the
interface that all input adapters should implement.
"""
def process_input(self, *args, **kwargs):
"""
Returns a statement object based on the input source.
"""
raise self.AdapterMethodNotImplementedError()
def process_input_statement(self, *args, **kwargs):
"""
Return an existing statement object (if one exists).
"""
input_statement = self.process_input(*args, **kwargs)
self.logger.info('Received input statement: {}'.format(input_statement.text))
existing_statement = self.chatbot.storage.find(input_statement.text)
if existing_statement:
self.logger.info('"{}" is a known statement'.format(input_statement.text))
input_statement = existing_statement
else:
self.logger.info('"{}" is not a known statement'.format(input_statement.text))
return input_statement

@ -0,0 +1,61 @@
from __future__ import unicode_literals
import datetime
from chatterbot.input import InputAdapter
from chatterbot.conversation import Statement
class Mailgun(InputAdapter):
"""
Get input from Mailgun.
"""
def __init__(self, **kwargs):
super(Mailgun, self).__init__(**kwargs)
# Use the bot's name for the name of the sender
self.name = kwargs.get('name')
self.from_address = kwargs.get('mailgun_from_address')
self.api_key = kwargs.get('mailgun_api_key')
self.endpoint = kwargs.get('mailgun_api_endpoint')
def get_email_stored_events(self):
import requests
yesterday = datetime.datetime.now() - datetime.timedelta(1)
return requests.get(
'{}/events'.format(self.endpoint),
auth=('api', self.api_key),
params={
'begin': yesterday.isoformat(),
'ascending': 'yes',
'limit': 1
}
)
def get_stored_email_urls(self):
response = self.get_email_stored_events()
data = response.json()
for item in data.get('items', []):
if 'storage' in item:
if 'url' in item['storage']:
yield item['storage']['url']
def get_message(self, url):
import requests
return requests.get(
url,
auth=('api', self.api_key)
)
def process_input(self, statement):
urls = self.get_stored_email_urls()
url = list(urls)[0]
response = self.get_message(url)
message = response.json()
text = message.get('stripped-text')
return Statement(text)

@ -0,0 +1,115 @@
from __future__ import unicode_literals
from time import sleep
from chatterbot.input import InputAdapter
from chatterbot.conversation import Statement
class Microsoft(InputAdapter):
"""
An input adapter that allows a ChatterBot instance to get
input statements from a Microsoft Bot using *Directline client protocol*.
https://docs.botframework.com/en-us/restapi/directline/#navtitle
"""
def __init__(self, **kwargs):
super(Microsoft, self).__init__(**kwargs)
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
self.directline_host = kwargs.get('directline_host', 'https://directline.botframework.com')
# NOTE: Direct Line client credentials are different from your bot's
# credentials
self.direct_line_token_or_secret = kwargs.\
get('direct_line_token_or_secret')
authorization_header = 'BotConnector {}'.\
format(self.direct_line_token_or_secret)
self.headers = {
'Authorization': authorization_header,
'Content-Type': 'application/json',
'Accept': 'application/json',
'charset': 'utf-8'
}
conversation_data = self.start_conversation()
self.conversation_id = conversation_data.get('conversationId')
self.conversation_token = conversation_data.get('token')
def _validate_status_code(self, response):
code = response.status_code
if not code == 200:
raise self.HTTPStatusException('{} status code recieved'.
format(code))
def start_conversation(self):
import requests
endpoint = '{host}/api/conversations'.format(host=self.directline_host)
response = requests.post(
endpoint,
headers=self.headers,
verify=False
)
self.logger.info('{} starting conversation {}'.format(
response.status_code, endpoint
))
self._validate_status_code(response)
return response.json()
def get_most_recent_message(self):
import requests
endpoint = '{host}/api/conversations/{id}/messages'\
.format(host=self.directline_host,
id=self.conversation_id)
response = requests.get(
endpoint,
headers=self.headers,
verify=False
)
self.logger.info('{} retrieving most recent messages {}'.format(
response.status_code, endpoint
))
self._validate_status_code(response)
data = response.json()
if data['messages']:
last_msg = int(data['watermark'])
return data['messages'][last_msg - 1]
return None
def process_input(self, statement):
new_message = False
data = None
while not new_message:
data = self.get_most_recent_message()
if data and data['id']:
new_message = True
else:
pass
sleep(3.5)
text = data['text']
statement = Statement(text)
self.logger.info('processing user statement {}'.format(statement))
return statement
class HTTPStatusException(Exception):
"""
Exception raised when unexpected non-success HTTP
status codes are returned in a response.
"""
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)

@ -0,0 +1,18 @@
from __future__ import unicode_literals
from chatterbot.input import InputAdapter
from chatterbot.conversation import Statement
from chatterbot.utils import input_function
class TerminalAdapter(InputAdapter):
"""
A simple adapter that allows ChatterBot to
communicate through the terminal.
"""
def process_input(self, *args, **kwargs):
"""
Read the user's input from the terminal.
"""
user_input = input_function()
return Statement(user_input)

@ -0,0 +1,65 @@
from __future__ import unicode_literals
from chatterbot.input import InputAdapter
from chatterbot.conversation import Statement
class VariableInputTypeAdapter(InputAdapter):
JSON = 'json'
TEXT = 'text'
OBJECT = 'object'
VALID_FORMATS = (JSON, TEXT, OBJECT, )
def detect_type(self, statement):
import sys
if sys.version_info[0] < 3:
string_types = basestring # NOQA
else:
string_types = str
if hasattr(statement, 'text'):
return self.OBJECT
if isinstance(statement, string_types):
return self.TEXT
if isinstance(statement, dict):
return self.JSON
input_type = type(statement)
raise self.UnrecognizedInputFormatException(
'The type {} is not recognized as a valid input type.'.format(
input_type
)
)
def process_input(self, statement):
input_type = self.detect_type(statement)
# Return the statement object without modification
if input_type == self.OBJECT:
return statement
# Convert the input string into a statement object
if input_type == self.TEXT:
return Statement(statement)
# Convert input dictionary into a statement object
if input_type == self.JSON:
input_json = dict(statement)
text = input_json['text']
del input_json['text']
return Statement(text, **input_json)
class UnrecognizedInputFormatException(Exception):
"""
Exception raised when an input format is specified that is
not in the VariableInputTypeAdapter.VALID_FORMATS variable.
"""
def __init__(self, value='The input format was not recognized.'):
self.value = value
def __str__(self):
return repr(self.value)

@ -0,0 +1,20 @@
from .logic_adapter import LogicAdapter
from .best_match import BestMatch
from .low_confidence import LowConfidenceAdapter
from .mathematical_evaluation import MathematicalEvaluation
from .multi_adapter import MultiLogicAdapter
from .no_knowledge_adapter import NoKnowledgeAdapter
from .specific_response import SpecificResponseAdapter
from .time_adapter import TimeLogicAdapter
__all__ = (
'LogicAdapter',
'BestMatch',
'LowConfidenceAdapter',
'MathematicalEvaluation',
'MultiLogicAdapter',
'NoKnowledgeAdapter',
'SpecificResponseAdapter',
'TimeLogicAdapter',
)

@ -0,0 +1,84 @@
from __future__ import unicode_literals
from .logic_adapter import LogicAdapter
class BestMatch(LogicAdapter):
"""
A logic adapter that returns a response based on known responses to
the closest matches to the input statement.
"""
def get(self, input_statement):
"""
Takes a statement string and a list of statement strings.
Returns the closest matching statement from the list.
"""
statement_list = self.chatbot.storage.get_response_statements()
if not statement_list:
if self.chatbot.storage.count():
# Use a randomly picked statement
self.logger.info(
'No statements have known responses. ' +
'Choosing a random response to return.'
)
random_response = self.chatbot.storage.get_random()
random_response.confidence = 0
return random_response
else:
raise self.EmptyDatasetException()
closest_match = input_statement
closest_match.confidence = 0
# Find the closest matching known statement
for statement in statement_list:
confidence = self.compare_statements(input_statement, statement)
if confidence > closest_match.confidence:
statement.confidence = confidence
closest_match = statement
return closest_match
def can_process(self, statement):
"""
Check that the chatbot's storage adapter is available to the logic
adapter and there is at least one statement in the database.
"""
return self.chatbot.storage.count()
def process(self, input_statement):
# Select the closest match to the input statement
closest_match = self.get(input_statement)
self.logger.info('Using "{}" as a close match to "{}"'.format(
input_statement.text, closest_match.text
))
# Get all statements that are in response to the closest match
response_list = self.chatbot.storage.filter(
in_response_to__contains=closest_match.text
)
if response_list:
self.logger.info(
'Selecting response from {} optimal responses.'.format(
len(response_list)
)
)
response = self.select_response(input_statement, response_list)
response.confidence = closest_match.confidence
self.logger.info('Response selected. Using "{}"'.format(response.text))
else:
response = self.chatbot.storage.get_random()
self.logger.info(
'No response to "{}" found. Selecting a random response.'.format(
closest_match.text
)
)
# Set confidence to zero because a random response is selected
response.confidence = 0
return response

@ -0,0 +1,100 @@
from __future__ import unicode_literals
from chatterbot.adapters import Adapter
from chatterbot.utils import import_module
class LogicAdapter(Adapter):
"""
This is an abstract class that represents the interface
that all logic adapters should implement.
:param statement_comparison_function: The dot-notated import path to a statement comparison function.
Defaults to ``levenshtein_distance``.
:param response_selection_method: The a response selection method.
Defaults to ``get_first_response``.
"""
def __init__(self, **kwargs):
super(LogicAdapter, self).__init__(**kwargs)
from chatterbot.comparisons import levenshtein_distance
from chatterbot.response_selection import get_first_response
# Import string module parameters
if 'statement_comparison_function' in kwargs:
import_path = kwargs.get('statement_comparison_function')
if isinstance(import_path, str):
kwargs['statement_comparison_function'] = import_module(import_path)
if 'response_selection_method' in kwargs:
import_path = kwargs.get('response_selection_method')
if isinstance(import_path, str):
kwargs['response_selection_method'] = import_module(import_path)
# By default, compare statements using Levenshtein distance
self.compare_statements = kwargs.get(
'statement_comparison_function',
levenshtein_distance
)
# By default, select the first available response
self.select_response = kwargs.get(
'response_selection_method',
get_first_response
)
def get_initialization_functions(self):
"""
Return a dictionary of functions to be run once when the chat bot is instantiated.
"""
return self.compare_statements.get_initialization_functions()
def initialize(self):
for function in self.get_initialization_functions().values():
function()
def can_process(self, statement):
"""
A preliminary check that is called to determine if a
logic adapter can process a given statement. By default,
this method returns true but it can be overridden in
child classes as needed.
:rtype: bool
"""
return True
def process(self, statement):
"""
Override this method and implement your logic for selecting a response to an input statement.
A confidence value and the selected response statement should be returned.
The confidence value represents a rating of how accurate the logic adapter
expects the selected response to be. Confidence scores are used to select
the best response from multiple logic adapters.
The confidence value should be a number between 0 and 1 where 0 is the
lowest confidence level and 1 is the highest.
:param statement: An input statement to be processed by the logic adapter.
:type statement: Statement
:rtype: Statement
"""
raise self.AdapterMethodNotImplementedError()
@property
def class_name(self):
"""
Return the name of the current logic adapter class.
This is typically used for logging and debugging.
"""
return str(self.__class__.__name__)
class EmptyDatasetException(Exception):
def __init__(self, value='An empty set was received when at least one statement was expected.'):
self.value = value
def __str__(self):
return repr(self.value)

@ -0,0 +1,58 @@
from __future__ import unicode_literals
from chatterbot.conversation import Statement
from .best_match import BestMatch
class LowConfidenceAdapter(BestMatch):
"""
Returns a default response with a high confidence
when a high confidence response is not known.
:kwargs:
* *threshold* (``float``) --
The low confidence value that triggers this adapter.
Defaults to 0.65.
* *default_response* (``str``) or (``iterable``)--
The response returned by this logic adaper.
* *response_selection_method* (``str``) or (``callable``)
The a response selection method.
Defaults to ``get_first_response``.
"""
def __init__(self, **kwargs):
super(LowConfidenceAdapter, self).__init__(**kwargs)
self.confidence_threshold = kwargs.get('threshold', 0.65)
default_responses = kwargs.get(
'default_response', "I'm sorry, I do not understand."
)
# Convert a single string into a list
if isinstance(default_responses, str):
default_responses = [
default_responses
]
self.default_responses = [
Statement(text=default) for default in default_responses
]
def process(self, input_statement):
"""
Return a default response with a high confidence if
a high confidence response is not known.
"""
# Select the closest match to the input statement
closest_match = self.get(input_statement)
# Choose a response from the list of options
response = self.select_response(input_statement, self.default_responses)
# Confidence should be high only if it is less than the threshold
if closest_match.confidence < self.confidence_threshold:
response.confidence = 1
else:
response.confidence = 0
return response

@ -0,0 +1,67 @@
from __future__ import unicode_literals
from chatterbot.logic import LogicAdapter
from chatterbot.conversation import Statement
class MathematicalEvaluation(LogicAdapter):
"""
The MathematicalEvaluation logic adapter parses input to determine
whether the user is asking a question that requires math to be done.
If so, the equation is extracted from the input and returned with
the evaluated result.
For example:
User: 'What is three plus five?'
Bot: 'Three plus five equals eight'
:kwargs:
* *language* (``str``) --
The language is set to 'ENG' for English by default.
"""
def __init__(self, **kwargs):
super(MathematicalEvaluation, self).__init__(**kwargs)
self.language = kwargs.get('language', 'ENG')
self.cache = {}
def can_process(self, statement):
"""
Determines whether it is appropriate for this
adapter to respond to the user input.
"""
response = self.process(statement)
self.cache[statement.text] = response
return response.confidence == 1
def process(self, statement):
"""
Takes a statement string.
Returns the equation from the statement with the mathematical terms solved.
"""
from mathparse import mathparse
input_text = statement.text
# Use the result cached by the process method if it exists
if input_text in self.cache:
cached_result = self.cache[input_text]
self.cache = {}
return cached_result
# Getting the mathematical terms within the input statement
expression = mathparse.extract_expression(input_text, language=self.language)
response = Statement(text=expression)
try:
response.text += ' = ' + str(
mathparse.parse(expression, language=self.language)
)
# The confidence is 1 if the expression could be evaluated
response.confidence = 1
except mathparse.PostfixTokenEvaluationException:
response.confidence = 0
return response

@ -0,0 +1,153 @@
from __future__ import unicode_literals
from collections import Counter
from chatterbot import utils
from .logic_adapter import LogicAdapter
class MultiLogicAdapter(LogicAdapter):
"""
MultiLogicAdapter allows ChatterBot to use multiple logic
adapters. It has methods that allow ChatterBot to add an
adapter, set the chat bot, and process an input statement
to get a response.
"""
def __init__(self, **kwargs):
super(MultiLogicAdapter, self).__init__(**kwargs)
# Logic adapters added by the chat bot
self.adapters = []
# Required logic adapters that must always be present
self.system_adapters = []
def get_initialization_functions(self):
"""
Get the initialization functions for each logic adapter.
"""
functions_dict = {}
# Iterate over each adapter and get its initialization functions
for logic_adapter in self.get_adapters():
functions = logic_adapter.get_initialization_functions()
functions_dict.update(functions)
return functions_dict
def process(self, statement):
"""
Returns the output of a selection of logic adapters
for a given input statement.
:param statement: The input statement to be processed.
"""
results = []
result = None
max_confidence = -1
for adapter in self.get_adapters():
if adapter.can_process(statement):
output = adapter.process(statement)
results.append((output.confidence, output, ))
self.logger.info(
'{} selected "{}" as a response with a confidence of {}'.format(
adapter.class_name, output.text, output.confidence
)
)
if output.confidence > max_confidence:
result = output
max_confidence = output.confidence
else:
self.logger.info(
'Not processing the statement using {}'.format(adapter.class_name)
)
# If multiple adapters agree on the same statement,
# then that statement is more likely to be the correct response
if len(results) >= 3:
statements = [s[1] for s in results]
count = Counter(statements)
most_common = count.most_common()
if most_common[0][1] > 1:
result = most_common[0][0]
max_confidence = self.get_greatest_confidence(result, results)
result.confidence = max_confidence
return result
def get_greatest_confidence(self, statement, options):
"""
Returns the greatest confidence value for a statement that occurs
multiple times in the set of options.
:param statement: A statement object.
:param options: A tuple in the format of (confidence, statement).
"""
values = []
for option in options:
if option[1] == statement:
values.append(option[0])
return max(values)
def get_adapters(self):
"""
Return a list of all logic adapters being used, including system logic adapters.
"""
adapters = []
adapters.extend(self.adapters)
adapters.extend(self.system_adapters)
return adapters
def add_adapter(self, adapter, **kwargs):
"""
Appends a logic adapter to the list of logic adapters being used.
:param adapter: The logic adapter to be added.
:type adapter: `LogicAdapter`
"""
utils.validate_adapter_class(adapter, LogicAdapter)
adapter = utils.initialize_class(adapter, **kwargs)
self.adapters.append(adapter)
def insert_logic_adapter(self, logic_adapter, insert_index, **kwargs):
"""
Adds a logic adapter at a specified index.
:param logic_adapter: The string path to the logic adapter to add.
:type logic_adapter: str
:param insert_index: The index to insert the logic adapter into the list at.
:type insert_index: int
"""
utils.validate_adapter_class(logic_adapter, LogicAdapter)
NewAdapter = utils.import_module(logic_adapter)
adapter = NewAdapter(**kwargs)
self.adapters.insert(insert_index, adapter)
def remove_logic_adapter(self, adapter_name):
"""
Removes a logic adapter from the chat bot.
:param adapter_name: The class name of the adapter to remove.
:type adapter_name: str
"""
for index, adapter in enumerate(self.adapters):
if adapter_name == type(adapter).__name__:
del self.adapters[index]
return True
return False
def set_chatbot(self, chatbot):
"""
Set the chatbot for each of the contained logic adapters.
"""
super(MultiLogicAdapter, self).set_chatbot(chatbot)
for adapter in self.get_adapters():
adapter.set_chatbot(chatbot)

@ -0,0 +1,26 @@
from __future__ import unicode_literals
from .logic_adapter import LogicAdapter
class NoKnowledgeAdapter(LogicAdapter):
"""
This is a system adapter that is automatically added
to the list of logic adapters during initialization.
This adapter is placed at the beginning of the list
to be given the highest priority.
"""
def process(self, statement):
"""
If there are no known responses in the database,
then a confidence of 1 should be returned with
the input statement.
Otherwise, a confidence of 0 should be returned.
"""
if self.chatbot.storage.count():
statement.confidence = 0
else:
statement.confidence = 1
return statement

@ -0,0 +1,38 @@
from __future__ import unicode_literals
from .logic_adapter import LogicAdapter
class SpecificResponseAdapter(LogicAdapter):
"""
Return a specific response to a specific input.
:kwargs:
* *input_text* (``str``) --
The input text that triggers this logic adapter.
* *output_text* (``str``) --
The output text returned by this logic adapter.
"""
def __init__(self, **kwargs):
super(SpecificResponseAdapter, self).__init__(**kwargs)
from chatterbot.conversation import Statement
self.input_text = kwargs.get('input_text')
output_text = kwargs.get('output_text')
self.response_statement = Statement(output_text)
def can_process(self, statement):
if statement == self.input_text:
return True
return False
def process(self, statement):
if statement == self.input_text:
self.response_statement.confidence = 1
else:
self.response_statement.confidence = 0
return self.response_statement

@ -0,0 +1,91 @@
from __future__ import unicode_literals
from datetime import datetime
from .logic_adapter import LogicAdapter
class TimeLogicAdapter(LogicAdapter):
"""
The TimeLogicAdapter returns the current time.
:kwargs:
* *positive* (``list``) --
The time-related questions used to identify time questions.
Defaults to a list of English sentences.
* *negative* (``list``) --
The non-time-related questions used to identify time questions.
Defaults to a list of English sentences.
"""
def __init__(self, **kwargs):
super(TimeLogicAdapter, self).__init__(**kwargs)
from nltk import NaiveBayesClassifier
self.positive = kwargs.get('positive', [
'what time is it',
'hey what time is it',
'do you have the time',
'do you know the time',
'do you know what time it is',
'what is the time'
])
self.negative = kwargs.get('negative', [
'it is time to go to sleep',
'what is your favorite color',
'i had a great time',
'thyme is my favorite herb',
'do you have time to look at my essay',
'how do you have the time to do all this'
'what is it'
])
labeled_data = (
[(name, 0) for name in self.negative] +
[(name, 1) for name in self.positive]
)
train_set = [
(self.time_question_features(text), n) for (text, n) in labeled_data
]
self.classifier = NaiveBayesClassifier.train(train_set)
def time_question_features(self, text):
"""
Provide an analysis of significant features in the string.
"""
features = {}
# A list of all words from the known sentences
all_words = " ".join(self.positive + self.negative).split()
# A list of the first word in each of the known sentence
all_first_words = []
for sentence in self.positive + self.negative:
all_first_words.append(
sentence.split(' ', 1)[0]
)
for word in text.split():
features['first_word({})'.format(word)] = (word in all_first_words)
for word in text.split():
features['contains({})'.format(word)] = (word in all_words)
for letter in 'abcdefghijklmnopqrstuvwxyz':
features['count({})'.format(letter)] = text.lower().count(letter)
features['has({})'.format(letter)] = (letter in text.lower())
return features
def process(self, statement):
from chatterbot.conversation import Statement
now = datetime.now()
time_features = self.time_question_features(statement.text.lower())
confidence = self.classifier.classify(time_features)
response = Statement('The current time is ' + now.strftime('%I:%M %p'))
response.confidence = confidence
return response

@ -0,0 +1,15 @@
from .output_adapter import OutputAdapter
from .microsoft import Microsoft
from .terminal import TerminalAdapter
from .mailgun import Mailgun
from .gitter import Gitter
from .hipchat import HipChat
__all__ = (
'OutputAdapter',
'Microsoft',
'TerminalAdapter',
'Mailgun',
'Gitter',
'HipChat',
)

@ -0,0 +1,85 @@
from __future__ import unicode_literals
from .output_adapter import OutputAdapter
class Gitter(OutputAdapter):
"""
An output adapter that allows a ChatterBot instance to send
responses to a Gitter room.
"""
def __init__(self, **kwargs):
super(Gitter, self).__init__(**kwargs)
self.gitter_host = kwargs.get('gitter_host', 'https://api.gitter.im/v1/')
self.gitter_room = kwargs.get('gitter_room')
self.gitter_api_token = kwargs.get('gitter_api_token')
authorization_header = 'Bearer {}'.format(self.gitter_api_token)
self.headers = {
'Authorization': authorization_header,
'Content-Type': 'application/json; charset=utf-8',
'Accept': 'application/json'
}
# Join the Gitter room
room_data = self.join_room(self.gitter_room)
self.room_id = room_data.get('id')
def _validate_status_code(self, response):
code = response.status_code
if code not in [200, 201]:
raise self.HTTPStatusException('{} status code recieved'.format(code))
def join_room(self, room_name):
"""
Join the specified Gitter room.
"""
import requests
endpoint = '{}rooms'.format(self.gitter_host)
response = requests.post(
endpoint,
headers=self.headers,
json={'uri': room_name}
)
self.logger.info('{} status joining room {}'.format(
response.status_code, endpoint
))
self._validate_status_code(response)
return response.json()
def send_message(self, text):
"""
Send a message to a Gitter room.
"""
import requests
endpoint = '{}rooms/{}/chatMessages'.format(self.gitter_host, self.room_id)
response = requests.post(
endpoint,
headers=self.headers,
json={'text': text}
)
self.logger.info('{} sending message to {}'.format(
response.status_code, endpoint
))
self._validate_status_code(response)
return response.json()
def process_response(self, statement, session_id=None):
self.send_message(statement.text)
return statement
class HTTPStatusException(Exception):
"""
Exception raised when unexpected non-success HTTP
status codes are returned in a response.
"""
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)

@ -0,0 +1,67 @@
from __future__ import unicode_literals
import json
from .output_adapter import OutputAdapter
class HipChat(OutputAdapter):
"""
An output adapter that allows a ChatterBot instance to send
responses to a HipChat room.
"""
def __init__(self, **kwargs):
super(HipChat, self).__init__(**kwargs)
self.hipchat_host = kwargs.get("hipchat_host")
self.hipchat_access_token = kwargs.get("hipchat_access_token")
self.hipchat_room = kwargs.get("hipchat_room")
authorization_header = "Bearer {}".format(self.hipchat_access_token)
self.headers = {
'Authorization': authorization_header,
'Content-Type': 'application/json'
}
import requests
self.session = requests.Session()
self.session.verify = kwargs.get('ssl_verify', True)
def send_message(self, room_id_or_name, message):
"""
Send a message to a HipChat room.
https://www.hipchat.com/docs/apiv2/method/send_message
"""
message_url = "{}/v2/room/{}/message".format(
self.hipchat_host,
room_id_or_name
)
response = self.session.post(
message_url,
headers=self.headers,
data=json.dumps({
'message': message
})
)
return response.json()
def reply_to_message(self):
"""
The HipChat api supports responding to a given message.
This may be a good feature to implement in the future to
help with multi-user conversations.
https://www.hipchat.com/docs/apiv2/method/reply_to_message
"""
raise self.AdapterMethodNotImplementedError()
def process_response(self, statement, session_id=None):
data = self.send_message(self.hipchat_room, statement.text)
# Update the output statement with the message id
self.chatbot.storage.update(
statement.add_extra_data('hipchat_message_id', data['id'])
)
return statement

@ -0,0 +1,49 @@
from __future__ import unicode_literals
from .output_adapter import OutputAdapter
class Mailgun(OutputAdapter):
def __init__(self, **kwargs):
super(Mailgun, self).__init__(**kwargs)
# Use the bot's name for the name of the sender
self.name = kwargs.get('name')
self.from_address = kwargs.get('mailgun_from_address')
self.api_key = kwargs.get('mailgun_api_key')
self.endpoint = kwargs.get('mailgun_api_endpoint')
self.recipients = kwargs.get('mailgun_recipients')
def send_message(self, subject, text, from_address, recipients):
"""
* subject: Subject of the email.
* text: Text body of the email.
* from_email: The email address that the message will be sent from.
* recipients: A list of recipient email addresses.
"""
import requests
return requests.post(
self.endpoint,
auth=('api', self.api_key),
data={
'from': '%s <%s>' % (self.name, from_address),
'to': recipients,
'subject': subject,
'text': text
})
def process_response(self, statement, session_id=None):
"""
Send the response statement as an email.
"""
subject = 'Message from %s' % (self.name)
self.send_message(
subject,
statement.text,
self.from_address,
self.recipients
)
return statement

@ -0,0 +1,109 @@
from __future__ import unicode_literals
import json
from .output_adapter import OutputAdapter
class Microsoft(OutputAdapter):
"""
An output adapter that allows a ChatterBot instance to send
responses to a Microsoft bot using *Direct Line client protocol*.
"""
def __init__(self, **kwargs):
super(Microsoft, self).__init__(**kwargs)
self.directline_host = kwargs.get(
'directline_host',
'https://directline.botframework.com'
)
self.direct_line_token_or_secret = kwargs.get(
'direct_line_token_or_secret'
)
self.conversation_id = kwargs.get('conversation_id')
authorization_header = 'BotConnector {}'.format(
self.direct_line_token_or_secret
)
self.headers = {
'Authorization': authorization_header,
'Content-Type': 'application/json'
}
def _validate_status_code(self, response):
status_code = response.status_code
if status_code not in [200, 204]:
raise self.HTTPStatusException('{} status code recieved'.format(status_code))
def get_most_recent_message(self):
"""
Return the most recently sent message.
"""
import requests
endpoint = '{host}/api/conversations/{id}/messages'.format(
host=self.directline_host,
id=self.conversation_id
)
response = requests.get(
endpoint,
headers=self.headers,
verify=False
)
self.logger.info('{} retrieving most recent messages {}'.format(
response.status_code, endpoint
))
self._validate_status_code(response)
data = response.json()
if data['messages']:
last_msg = int(data['watermark'])
return data['messages'][last_msg - 1]
return None
def send_message(self, conversation_id, message):
"""
Send a message to a HipChat room.
https://www.hipchat.com/docs/apiv2/method/send_message
"""
import requests
message_url = "{host}/api/conversations/{conversationId}/messages".format(
host=self.directline_host,
conversationId=conversation_id
)
response = requests.post(
message_url,
headers=self.headers,
data=json.dumps({
'message': message
})
)
self.logger.info('{} sending message {}'.format(
response.status_code, message_url
))
self._validate_status_code(response)
# Microsoft return 204 on operation succeeded and no content was returned.
return self.get_most_recent_message()
def process_response(self, statement, session_id=None):
data = self.send_message(self.conversation_id, statement.text)
self.logger.info('processing user response {}'.format(data))
return statement
class HTTPStatusException(Exception):
"""
Exception raised when unexpected non-success HTTP
status codes are returned in a response.
"""
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)

@ -0,0 +1,20 @@
from chatterbot.adapters import Adapter
class OutputAdapter(Adapter):
"""
A generic class that can be overridden by a subclass to provide extended
functionality, such as delivering a response to an API endpoint.
"""
def process_response(self, statement, session_id=None):
"""
Override this method in a subclass to implement customized functionality.
:param statement: The statement that the chat bot has produced in response to some input.
:param session_id: The unique id of the current chat session.
:returns: The response statement.
"""
return statement

@ -0,0 +1,16 @@
from __future__ import unicode_literals
from .output_adapter import OutputAdapter
class TerminalAdapter(OutputAdapter):
"""
A simple adapter that allows ChatterBot to
communicate through the terminal.
"""
def process_response(self, statement, session_id=None):
"""
Print the response to the user's input.
"""
print(statement.text)
return statement.text

@ -0,0 +1,751 @@
# -*- coding: utf-8 -*-
import re
from datetime import timedelta, datetime
import calendar
# Variations of dates that the parser can capture
year_variations = ['year', 'years', 'yrs']
day_variations = ['days', 'day']
minute_variations = ['minute', 'minutes', 'mins']
hour_variations = ['hrs', 'hours', 'hour']
week_variations = ['weeks', 'week', 'wks']
month_variations = ['month', 'months']
# Variables used for RegEx Matching
day_names = 'monday|tuesday|wednesday|thursday|friday|saturday|sunday'
month_names_long = (
'january|february|march|april|may|june|july|august|september|october|november|december'
)
month_names = month_names_long + '|jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec'
day_nearest_names = 'today|yesterday|tomorrow|tonight|tonite'
numbers = (
'(^a(?=\s)|one|two|three|four|five|six|seven|eight|nine|ten|'
'eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|'
'eighteen|nineteen|twenty|thirty|forty|fifty|sixty|seventy|'
'eighty|ninety|hundred|thousand)'
)
re_dmy = '(' + '|'.join(day_variations + minute_variations + year_variations + week_variations + month_variations) + ')'
re_duration = '(before|after|earlier|later|ago|from\snow)'
re_year = '(19|20)\d{2}|^(19|20)\d{2}'
re_timeframe = 'this|coming|next|following|previous|last|end\sof\sthe'
re_ordinal = 'st|nd|rd|th|first|second|third|fourth|fourth|' + re_timeframe
re_time = r'(?P<hour>\d{1,2})(\:(?P<minute>\d{1,2})|(?P<convention>am|pm))'
re_separator = 'of|at|on'
# A list tuple of regular expressions / parser fn to match
# Start with the widest match and narrow it down because the order of the match in this list matters
regex = [
(
re.compile(
r'''
(
((?P<dow>%s)[,\s]\s*)? #Matches Monday, 12 Jan 2012, 12 Jan 2012 etc
(?P<day>\d{1,2}) # Matches a digit
(%s)?
[-\s] # One or more space
(?P<month>%s) # Matches any month name
[-\s] # Space
(?P<year>%s) # Year
((\s|,\s|\s(%s))?\s*(%s))?
)
''' % (day_names, re_ordinal, month_names, re_year, re_separator, re_time),
(re.VERBOSE | re.IGNORECASE)
),
lambda m, base_date: datetime(
int(m.group('year') if m.group('year') else base_date.year),
HASHMONTHS[m.group('month').strip().lower()],
int(m.group('day') if m.group('day') else 1),
) + timedelta(**convert_time_to_hour_minute(
m.group('hour'),
m.group('minute'),
m.group('convention')
))
),
(
re.compile(
r'''
(
((?P<dow>%s)[,\s][-\s]*)? #Matches Monday, Jan 12 2012, Jan 12 2012 etc
(?P<month>%s) # Matches any month name
[-\s] # Space
((?P<day>\d{1,2})) # Matches a digit
(%s)?
([-\s](?P<year>%s))? # Year
((\s|,\s|\s(%s))?\s*(%s))?
)
''' % (day_names, month_names, re_ordinal, re_year, re_separator, re_time),
(re.VERBOSE | re.IGNORECASE)
),
lambda m, base_date: datetime(
int(m.group('year') if m.group('year') else base_date.year),
HASHMONTHS[m.group('month').strip().lower()],
int(m.group('day') if m.group('day') else 1)
) + timedelta(**convert_time_to_hour_minute(
m.group('hour'),
m.group('minute'),
m.group('convention')
))
),
(
re.compile(
r'''
(
(?P<month>%s) # Matches any month name
[-\s] # One or more space
(?P<day>\d{1,2}) # Matches a digit
(%s)?
[-\s]\s*?
(?P<year>%s) # Year
((\s|,\s|\s(%s))?\s*(%s))?
)
''' % (month_names, re_ordinal, re_year, re_separator, re_time),
(re.VERBOSE | re.IGNORECASE)
),
lambda m, base_date: datetime(
int(m.group('year') if m.group('year') else base_date.year),
HASHMONTHS[m.group('month').strip().lower()],
int(m.group('day') if m.group('day') else 1),
) + timedelta(**convert_time_to_hour_minute(
m.group('hour'),
m.group('minute'),
m.group('convention')
))
),
(
re.compile(
r'''
(
((?P<number>\d+|(%s[-\s]?)+)\s)? # Matches any number or string 25 or twenty five
(?P<unit>%s)s?\s # Matches days, months, years, weeks, minutes
(?P<duration>%s) # before, after, earlier, later, ago, from now
(\s*(?P<base_time>(%s)))?
((\s|,\s|\s(%s))?\s*(%s))?
)
''' % (numbers, re_dmy, re_duration, day_nearest_names, re_separator, re_time),
(re.VERBOSE | re.IGNORECASE)
),
lambda m, base_date: date_from_duration(
base_date,
m.group('number'),
m.group('unit').lower(),
m.group('duration').lower(),
m.group('base_time')
) + timedelta(**convert_time_to_hour_minute(
m.group('hour'),
m.group('minute'),
m.group('convention')
))
),
(
re.compile(
r'''
(
(?P<ordinal>%s) # First quarter of 2014
\s+
quarter\sof
\s+
(?P<year>%s)
)
''' % (re_ordinal, re_year),
(re.VERBOSE | re.IGNORECASE)
),
lambda m, base_date: date_from_quarter(
base_date,
HASHORDINALS[m.group('ordinal').lower()],
int(m.group('year') if m.group('year') else base_date.year)
)
),
(
re.compile(
r'''
(
(?P<ordinal_value>\d+)
(?P<ordinal>%s) # 1st January 2012
((\s|,\s|\s(%s))?\s*)?
(?P<month>%s)
([,\s]\s*(?P<year>%s))?
)
''' % (re_ordinal, re_separator, month_names, re_year),
(re.VERBOSE | re.IGNORECASE)
),
lambda m, base_date: datetime(
int(m.group('year') if m.group('year') else base_date.year),
int(HASHMONTHS[m.group('month').lower()] if m.group('month') else 1),
int(m.group('ordinal_value') if m.group('ordinal_value') else 1),
)
),
(
re.compile(
r'''
(
(?P<month>%s)
\s+
(?P<ordinal_value>\d+)
(?P<ordinal>%s) # January 1st 2012
([,\s]\s*(?P<year>%s))?
)
''' % (month_names, re_ordinal, re_year),
(re.VERBOSE | re.IGNORECASE)
),
lambda m, base_date: datetime(
int(m.group('year') if m.group('year') else base_date.year),
int(HASHMONTHS[m.group('month').lower()] if m.group('month') else 1),
int(m.group('ordinal_value') if m.group('ordinal_value') else 1),
)
),
(
re.compile(
r'''
(?P<time>%s) # this, next, following, previous, last
\s+
((?P<number>\d+|(%s[-\s]?)+)\s)?
(?P<dmy>%s) # year, day, week, month, night, minute, min
((\s|,\s|\s(%s))?\s*(%s))?
''' % (re_timeframe, numbers, re_dmy, re_separator, re_time),
(re.VERBOSE | re.IGNORECASE),
),
lambda m, base_date: date_from_relative_week_year(
base_date,
m.group('time'),
m.group('dmy'),
m.group('number')
) + timedelta(**convert_time_to_hour_minute(
m.group('hour'),
m.group('minute'),
m.group('convention')
))
),
(
re.compile(
r'''
(?P<time>%s) # this, next, following, previous, last
\s+
(?P<dow>%s) # mon - fri
((\s|,\s|\s(%s))?\s*(%s))?
''' % (re_timeframe, day_names, re_separator, re_time),
(re.VERBOSE | re.IGNORECASE),
),
lambda m, base_date: date_from_relative_day(
base_date,
m.group('time'),
m.group('dow')
) + timedelta(**convert_time_to_hour_minute(
m.group('hour'),
m.group('minute'),
m.group('convention')
))
),
(
re.compile(
r'''
(
(?P<day>\d{1,2}) # Day, Month
(%s)
[-\s] # One or more space
(?P<month>%s)
)
''' % (re_ordinal, month_names),
(re.VERBOSE | re.IGNORECASE)
),
lambda m, base_date: datetime(
base_date.year,
HASHMONTHS[m.group('month').strip().lower()],
int(m.group('day') if m.group('day') else 1)
)
),
(
re.compile(
r'''
(
(?P<month>%s) # Month, day
[-\s] # One or more space
((?P<day>\d{1,2})\b) # Matches a digit January 12
(%s)?
)
''' % (month_names, re_ordinal),
(re.VERBOSE | re.IGNORECASE)
),
lambda m, base_date: datetime(
base_date.year,
HASHMONTHS[m.group('month').strip().lower()],
int(m.group('day') if m.group('day') else 1)
)
),
(
re.compile(
r'''
(
(?P<month>%s) # Month, year
[-\s] # One or more space
((?P<year>\d{1,4})\b) # Matches a digit January 12
)
''' % (month_names),
(re.VERBOSE | re.IGNORECASE)
),
lambda m, base_date: datetime(
int(m.group('year')),
HASHMONTHS[m.group('month').strip().lower()],
1
)
),
(
re.compile(
r'''
(
(?P<month>\d{1,2}) # MM/DD or MM/DD/YYYY
/
((?P<day>\d{1,2}))
(/(?P<year>%s))?
)
''' % (re_year),
(re.VERBOSE | re.IGNORECASE)
),
lambda m, base_date: datetime(
int(m.group('year') if m.group('year') else base_date.year),
int(m.group('month').strip()),
int(m.group('day'))
)
),
(
re.compile(
r'''
(?P<adverb>%s) # today, yesterday, tomorrow, tonight
((\s|,\s|\s(%s))?\s*(%s))?
''' % (day_nearest_names, re_separator, re_time),
(re.VERBOSE | re.IGNORECASE)
),
lambda m, base_date: date_from_adverb(
base_date,
m.group('adverb')
) + timedelta(**convert_time_to_hour_minute(
m.group('hour'),
m.group('minute'),
m.group('convention')
))
),
(
re.compile(
r'''
(?P<named_day>%s) # Mon - Sun
''' % (day_names),
(re.VERBOSE | re.IGNORECASE)
),
lambda m, base_date: this_week_day(
base_date,
HASHWEEKDAYS[m.group('named_day').lower()]
)
),
(
re.compile(
r'''
(?P<year>%s) # Year
''' % (re_year),
(re.VERBOSE | re.IGNORECASE)
),
lambda m, base_date: datetime(int(m.group('year')), 1, 1)
),
(
re.compile(
r'''
(?P<month>%s) # Month
''' % (month_names_long),
(re.VERBOSE | re.IGNORECASE)
),
lambda m, base_date: datetime(
base_date.year,
HASHMONTHS[m.group('month').lower()],
1
)
),
(
re.compile(
r'''
(%s) # Matches time 12:00
''' % (re_time),
(re.VERBOSE | re.IGNORECASE),
),
lambda m, base_date: datetime(
base_date.year,
base_date.month,
base_date.day
) + timedelta(**convert_time_to_hour_minute(
m.group('hour'),
m.group('minute'),
m.group('convention')
))
),
(
re.compile(
r'''
(
(?P<hour>\d+) # Matches 12 hours, 2 hrs
\s+
(%s)
)
''' % ('|'.join(hour_variations)),
(re.VERBOSE | re.IGNORECASE),
),
lambda m, base_date: datetime(
base_date.year,
base_date.month,
base_date.day,
int(m.group('hour'))
)
)
]
def hashnum(number):
"""
Hash of numbers
Append more number to modify your match
"""
if re.match(r'one|^a\b', number, re.IGNORECASE):
return 1
if re.match(r'two', number, re.IGNORECASE):
return 2
if re.match(r'three', number, re.IGNORECASE):
return 3
if re.match(r'four', number, re.IGNORECASE):
return 4
if re.match(r'five', number, re.IGNORECASE):
return 5
if re.match(r'six', number, re.IGNORECASE):
return 6
if re.match(r'seven', number, re.IGNORECASE):
return 7
if re.match(r'eight', number, re.IGNORECASE):
return 8
if re.match(r'nine', number, re.IGNORECASE):
return 9
if re.match(r'ten', number, re.IGNORECASE):
return 10
if re.match(r'eleven', number, re.IGNORECASE):
return 11
if re.match(r'twelve', number, re.IGNORECASE):
return 12
if re.match(r'thirteen', number, re.IGNORECASE):
return 13
if re.match(r'fourteen', number, re.IGNORECASE):
return 14
if re.match(r'fifteen', number, re.IGNORECASE):
return 15
if re.match(r'sixteen', number, re.IGNORECASE):
return 16
if re.match(r'seventeen', number, re.IGNORECASE):
return 17
if re.match(r'eighteen', number, re.IGNORECASE):
return 18
if re.match(r'nineteen', number, re.IGNORECASE):
return 19
if re.match(r'twenty', number, re.IGNORECASE):
return 20
if re.match(r'thirty', number, re.IGNORECASE):
return 30
if re.match(r'forty', number, re.IGNORECASE):
return 40
if re.match(r'fifty', number, re.IGNORECASE):
return 50
if re.match(r'sixty', number, re.IGNORECASE):
return 60
if re.match(r'seventy', number, re.IGNORECASE):
return 70
if re.match(r'eighty', number, re.IGNORECASE):
return 80
if re.match(r'ninety', number, re.IGNORECASE):
return 90
if re.match(r'hundred', number, re.IGNORECASE):
return 100
if re.match(r'thousand', number, re.IGNORECASE):
return 1000
def convert_string_to_number(value):
"""
Convert strings to numbers
"""
if value is None:
return 1
if isinstance(value, int):
return value
if value.isdigit():
return int(value)
num_list = map(lambda s: hashnum(s), re.findall(numbers + '+', value, re.IGNORECASE))
return sum(num_list)
def convert_time_to_hour_minute(hour, minute, convention):
"""
Convert time to hour, minute
"""
if hour is None:
hour = 0
if minute is None:
minute = 0
if convention is None:
convention = 'am'
hour = int(hour)
minute = int(minute)
if convention == 'pm':
hour += 12
return {'hours': hour, 'minutes': minute}
def date_from_quarter(base_date, ordinal, year):
"""
Extract date from quarter of a year
"""
interval = 3
month_start = interval * (ordinal - 1)
if month_start < 0:
month_start = 9
month_end = month_start + interval
if month_start == 0:
month_start = 1
return [
datetime(year, month_start, 1),
datetime(year, month_end, calendar.monthrange(year, month_end)[1])
]
def date_from_relative_day(base_date, time, dow):
"""
Converts relative day to time
Ex: this tuesday, last tuesday
"""
# Reset date to start of the day
base_date = datetime(base_date.year, base_date.month, base_date.day)
time = time.lower()
dow = dow.lower()
if time == 'this' or time == 'coming':
# Else day of week
num = HASHWEEKDAYS[dow]
return this_week_day(base_date, num)
elif time == 'last' or time == 'previous':
# Else day of week
num = HASHWEEKDAYS[dow]
return previous_week_day(base_date, num)
elif time == 'next' or time == 'following':
# Else day of week
num = HASHWEEKDAYS[dow]
return next_week_day(base_date, num)
def date_from_relative_week_year(base_date, time, dow, ordinal=1):
"""
Converts relative day to time
Eg. this tuesday, last tuesday
"""
# If there is an ordinal (next 3 weeks) => return a start and end range
# Reset date to start of the day
relative_date = datetime(base_date.year, base_date.month, base_date.day)
if dow in year_variations:
if time == 'this' or time == 'coming':
return datetime(relative_date.year, 1, 1)
elif time == 'last' or time == 'previous':
return datetime(relative_date.year - 1, relative_date.month, 1)
elif time == 'next' or time == 'following':
return relative_date + timedelta(relative_date.year + 1)
elif time == 'end of the':
return datetime(relative_date.year, 12, 31)
elif dow in month_variations:
if time == 'this':
return datetime(relative_date.year, relative_date.month, relative_date.day)
elif time == 'last' or time == 'previous':
return datetime(relative_date.year, relative_date.month - 1, relative_date.day)
elif time == 'next' or time == 'following':
return datetime(relative_date.year, relative_date.month + 1, relative_date.day)
elif time == 'end of the':
return datetime(
relative_date.year,
relative_date.month,
calendar.monthrange(relative_date.year, relative_date.month)[1]
)
elif dow in week_variations:
if time == 'this':
return relative_date - timedelta(days=relative_date.weekday())
elif time == 'last' or time == 'previous':
return relative_date - timedelta(weeks=1)
elif time == 'next' or time == 'following':
return relative_date + timedelta(weeks=1)
elif time == 'end of the':
day_of_week = base_date.weekday()
return day_of_week + timedelta(days=6 - relative_date.weekday())
elif dow in day_variations:
if time == 'this':
return relative_date
elif time == 'last' or time == 'previous':
return relative_date - timedelta(days=1)
elif time == 'next' or time == 'following':
return relative_date + timedelta(days=1)
elif time == 'end of the':
return datetime(relative_date.year, relative_date.month, relative_date.day, 23, 59, 59)
def date_from_adverb(base_date, name):
"""
Convert Day adverbs to dates
Tomorrow => Date
Today => Date
"""
# Reset date to start of the day
adverb_date = datetime(base_date.year, base_date.month, base_date.day)
if name == 'today' or name == 'tonite' or name == 'tonight':
return adverb_date.today()
elif name == 'yesterday':
return adverb_date - timedelta(days=1)
elif name == 'tomorrow' or name == 'tom':
return adverb_date + timedelta(days=1)
def date_from_duration(base_date, number_as_string, unit, duration, base_time=None):
"""
Find dates from duration
Eg: 20 days from now
Currently does not support strings like "20 days from last monday".
"""
# Check if query is `2 days before yesterday` or `day before yesterday`
if base_time is not None:
base_date = date_from_adverb(base_date, base_time)
num = convert_string_to_number(number_as_string)
if unit in day_variations:
args = {'days': num}
elif unit in minute_variations:
args = {'minutes': num}
elif unit in week_variations:
args = {'weeks': num}
elif unit in month_variations:
args = {'days': 365 * num / 12}
elif unit in year_variations:
args = {'years': num}
if duration == 'ago' or duration == 'before' or duration == 'earlier':
if 'years' in args:
return datetime(base_date.year - args['years'], base_date.month, base_date.day)
return base_date - timedelta(**args)
elif duration == 'after' or duration == 'later' or duration == 'from now':
if 'years' in args:
return datetime(base_date.year + args['years'], base_date.month, base_date.day)
return base_date + timedelta(**args)
def this_week_day(base_date, weekday):
"""
Finds coming weekday
"""
day_of_week = base_date.weekday()
# If today is Tuesday and the query is `this monday`
# We should output the next_week monday
if day_of_week > weekday:
return next_week_day(base_date, weekday)
start_of_this_week = base_date - timedelta(days=day_of_week + 1)
day = start_of_this_week + timedelta(days=1)
while day.weekday() != weekday:
day = day + timedelta(days=1)
return day
def previous_week_day(base_date, weekday):
"""
Finds previous weekday
"""
day = base_date - timedelta(days=1)
while day.weekday() != weekday:
day = day - timedelta(days=1)
return day
def next_week_day(base_date, weekday):
"""
Finds next weekday
"""
day_of_week = base_date.weekday()
end_of_this_week = base_date + timedelta(days=6 - day_of_week)
day = end_of_this_week + timedelta(days=1)
while day.weekday() != weekday:
day = day + timedelta(days=1)
return day
# Mapping of Month name and Value
HASHMONTHS = {
'january': 1,
'jan': 1,
'february': 2,
'feb': 2,
'march': 3,
'mar': 3,
'april': 4,
'apr': 4,
'may': 5,
'june': 6,
'jun': 6,
'july': 7,
'jul': 7,
'august': 8,
'aug': 8,
'september': 9,
'sep': 9,
'october': 10,
'oct': 10,
'november': 11,
'nov': 11,
'december': 12,
'dec': 12
}
# Days to number mapping
HASHWEEKDAYS = {
'monday': 0,
'mon': 0,
'tuesday': 1,
'tue': 1,
'wednesday': 2,
'wed': 2,
'thursday': 3,
'thu': 3,
'friday': 4,
'fri': 4,
'saturday': 5,
'sat': 5,
'sunday': 6,
'sun': 6
}
# Ordinal to number
HASHORDINALS = {
'first': 1,
'second': 2,
'third': 3,
'fourth': 4,
'forth': 4,
'last': -1
}
def datetime_parsing(text, base_date=datetime.now()):
"""
Extract datetime objects from a string of text.
"""
matches = []
found_array = []
# Find the position in the string
for expression, function in regex:
for match in expression.finditer(text):
matches.append((match.group(), function(match, base_date), match.span()))
# Wrap the matched text with TAG element to prevent nested selections
for match, value, spans in matches:
subn = re.subn(
'(?!<TAG[^>]*?>)' + match + '(?![^<]*?</TAG>)', '<TAG>' + match + '</TAG>', text
)
text = subn[0]
is_substituted = subn[1]
if is_substituted != 0:
found_array.append((match, value, spans))
# To preserve order of the match, sort based on the start position
return sorted(found_array, key=lambda match: match and match[2][0])

@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
"""
Statement pre-processors.
"""
def clean_whitespace(chatbot, statement):
"""
Remove any consecutive whitespace characters from the statement text.
"""
import re
# Replace linebreaks and tabs with spaces
statement.text = statement.text.replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')
# Remove any leeding or trailing whitespace
statement.text = statement.text.strip()
# Remove consecutive spaces
statement.text = re.sub(' +', ' ', statement.text)
return statement
def unescape_html(chatbot, statement):
"""
Convert escaped html characters into unescaped html characters.
For example: "&lt;b&gt;" becomes "<b>".
"""
import sys
# Replace HTML escape characters
if sys.version_info[0] < 3:
from HTMLParser import HTMLParser
html = HTMLParser()
else:
import html
statement.text = html.unescape(statement.text)
return statement
def convert_to_ascii(chatbot, statement):
"""
Converts unicode characters to ASCII character equivalents.
For example: "på fédéral" becomes "pa federal".
"""
import unicodedata
import sys
# Normalize unicode characters
if sys.version_info[0] < 3:
statement.text = unicode(statement.text) # NOQA
text = unicodedata.normalize('NFKD', statement.text)
text = text.encode('ascii', 'ignore').decode('utf-8')
statement.text = str(text)
return statement

@ -0,0 +1,71 @@
"""
Response selection methods determines which response should be used in
the event that multiple responses are generated within a logic adapter.
"""
import logging
def get_most_frequent_response(input_statement, response_list):
"""
:param input_statement: A statement, that closely matches an input to the chat bot.
:type input_statement: Statement
:param response_list: A list of statement options to choose a response from.
:type response_list: list
:return: The response statement with the greatest number of occurrences.
:rtype: Statement
"""
matching_response = None
occurrence_count = -1
logger = logging.getLogger(__name__)
logger.info(u'Selecting response with greatest number of occurrences.')
for statement in response_list:
count = statement.get_response_count(input_statement)
# Keep the more common statement
if count >= occurrence_count:
matching_response = statement
occurrence_count = count
# Choose the most commonly occuring matching response
return matching_response
def get_first_response(input_statement, response_list):
"""
:param input_statement: A statement, that closely matches an input to the chat bot.
:type input_statement: Statement
:param response_list: A list of statement options to choose a response from.
:type response_list: list
:return: Return the first statement in the response list.
:rtype: Statement
"""
logger = logging.getLogger(__name__)
logger.info(u'Selecting first response from list of {} options.'.format(
len(response_list)
))
return response_list[0]
def get_random_response(input_statement, response_list):
"""
:param input_statement: A statement, that closely matches an input to the chat bot.
:type input_statement: Statement
:param response_list: A list of statement options to choose a response from.
:type response_list: list
:return: Choose a random response from the selection.
:rtype: Statement
"""
from random import choice
logger = logging.getLogger(__name__)
logger.info(u'Selecting a response from list of {} options.'.format(
len(response_list)
))
return choice(response_list)

@ -0,0 +1,12 @@
from .storage_adapter import StorageAdapter
from .django_storage import DjangoStorageAdapter
from .mongodb import MongoDatabaseAdapter
from .sql_storage import SQLStorageAdapter
__all__ = (
'StorageAdapter',
'DjangoStorageAdapter',
'MongoDatabaseAdapter',
'SQLStorageAdapter',
)

@ -0,0 +1,220 @@
from chatterbot.storage import StorageAdapter
from chatterbot import constants
class DjangoStorageAdapter(StorageAdapter):
"""
Storage adapter that allows ChatterBot to interact with
Django storage backends.
"""
def __init__(self, **kwargs):
super(DjangoStorageAdapter, self).__init__(**kwargs)
self.adapter_supports_queries = False
self.django_app_name = kwargs.get(
'django_app_name',
constants.DEFAULT_DJANGO_APP_NAME
)
def get_statement_model(self):
from django.apps import apps
return apps.get_model(self.django_app_name, 'Statement')
def get_response_model(self):
from django.apps import apps
return apps.get_model(self.django_app_name, 'Response')
def get_conversation_model(self):
from django.apps import apps
return apps.get_model(self.django_app_name, 'Conversation')
def get_tag_model(self):
from django.apps import apps
return apps.get_model(self.django_app_name, 'Tag')
def count(self):
Statement = self.get_model('statement')
return Statement.objects.count()
def find(self, statement_text):
Statement = self.get_model('statement')
try:
return Statement.objects.get(text=statement_text)
except Statement.DoesNotExist as e:
self.logger.info(str(e))
return None
def filter(self, **kwargs):
"""
Returns a list of statements in the database
that match the parameters specified.
"""
from django.db.models import Q
Statement = self.get_model('statement')
order = kwargs.pop('order_by', None)
RESPONSE_CONTAINS = 'in_response_to__contains'
if RESPONSE_CONTAINS in kwargs:
value = kwargs[RESPONSE_CONTAINS]
del kwargs[RESPONSE_CONTAINS]
kwargs['in_response__response__text'] = value
kwargs_copy = kwargs.copy()
for kwarg in kwargs_copy:
value = kwargs[kwarg]
del kwargs[kwarg]
kwarg = kwarg.replace('in_response_to', 'in_response')
kwargs[kwarg] = value
if 'in_response' in kwargs:
responses = kwargs['in_response']
del kwargs['in_response']
if responses:
kwargs['in_response__response__text__in'] = []
for response in responses:
kwargs['in_response__response__text__in'].append(response)
else:
kwargs['in_response'] = None
parameters = {}
if 'in_response__response__text' in kwargs:
value = kwargs['in_response__response__text']
parameters['responses__statement__text'] = value
statements = Statement.objects.filter(Q(**kwargs) | Q(**parameters))
if order:
statements = statements.order_by(order)
return statements
def update(self, statement):
"""
Update the provided statement.
"""
Statement = self.get_model('statement')
Response = self.get_model('response')
response_statement_cache = statement.response_statement_cache
statement, created = Statement.objects.get_or_create(text=statement.text)
statement.extra_data = getattr(statement, 'extra_data', '')
statement.save()
for _response_statement in response_statement_cache:
response_statement, created = Statement.objects.get_or_create(
text=_response_statement.text
)
response_statement.extra_data = getattr(_response_statement, 'extra_data', '')
response_statement.save()
Response.objects.create(
statement=response_statement,
response=statement
)
return statement
def get_random(self):
"""
Returns a random statement from the database
"""
Statement = self.get_model('statement')
return Statement.objects.order_by('?').first()
def remove(self, statement_text):
"""
Removes the statement that matches the input text.
Removes any responses from statements if the response text matches the
input text.
"""
from django.db.models import Q
Statement = self.get_model('statement')
Response = self.get_model('response')
statements = Statement.objects.filter(text=statement_text)
responses = Response.objects.filter(
Q(statement__text=statement_text) | Q(response__text=statement_text)
)
responses.delete()
statements.delete()
def get_latest_response(self, conversation_id):
"""
Returns the latest response in a conversation if it exists.
Returns None if a matching conversation cannot be found.
"""
Response = self.get_model('response')
response = Response.objects.filter(
conversations__id=conversation_id
).order_by(
'created_at'
).last()
if not response:
return None
return response.response
def create_conversation(self):
"""
Create a new conversation.
"""
Conversation = self.get_model('conversation')
conversation = Conversation.objects.create()
return conversation.id
def add_to_conversation(self, conversation_id, statement, response):
"""
Add the statement and response to the conversation.
"""
Statement = self.get_model('statement')
Response = self.get_model('response')
first_statement, created = Statement.objects.get_or_create(text=statement.text)
first_response, created = Statement.objects.get_or_create(text=response.text)
response = Response.objects.create(
statement=first_statement,
response=first_response
)
response.conversations.add(conversation_id)
def drop(self):
"""
Remove all data from the database.
"""
Statement = self.get_model('statement')
Response = self.get_model('response')
Conversation = self.get_model('conversation')
Tag = self.get_model('tag')
Statement.objects.all().delete()
Response.objects.all().delete()
Conversation.objects.all().delete()
Tag.objects.all().delete()
def get_response_statements(self):
"""
Return only statements that are in response to another statement.
A statement must exist which lists the closest matching statement in the
in_response_to field. Otherwise, the logic adapter may find a closest
matching statement that does not have a known response.
"""
Statement = self.get_model('statement')
Response = self.get_model('response')
responses = Response.objects.all()
return Statement.objects.filter(in_response__in=responses)

@ -0,0 +1,394 @@
from chatterbot.storage import StorageAdapter
class Query(object):
def __init__(self, query={}):
self.query = query
def value(self):
return self.query.copy()
def raw(self, data):
query = self.query.copy()
query.update(data)
return Query(query)
def statement_text_equals(self, statement_text):
query = self.query.copy()
query['text'] = statement_text
return Query(query)
def statement_text_not_in(self, statements):
query = self.query.copy()
if 'text' not in query:
query['text'] = {}
if '$nin' not in query['text']:
query['text']['$nin'] = []
query['text']['$nin'].extend(statements)
return Query(query)
def statement_response_list_contains(self, statement_text):
query = self.query.copy()
if 'in_response_to' not in query:
query['in_response_to'] = {}
if '$elemMatch' not in query['in_response_to']:
query['in_response_to']['$elemMatch'] = {}
query['in_response_to']['$elemMatch']['text'] = statement_text
return Query(query)
def statement_response_list_equals(self, response_list):
query = self.query.copy()
query['in_response_to'] = response_list
return Query(query)
class MongoDatabaseAdapter(StorageAdapter):
"""
The MongoDatabaseAdapter is an interface that allows
ChatterBot to store statements in a MongoDB database.
:keyword database: The name of the database you wish to connect to.
:type database: str
.. code-block:: python
database='chatterbot-database'
:keyword database_uri: The URI of a remote instance of MongoDB.
:type database_uri: str
.. code-block:: python
database_uri='mongodb://example.com:8100/'
"""
def __init__(self, **kwargs):
super(MongoDatabaseAdapter, self).__init__(**kwargs)
from pymongo import MongoClient
from pymongo.errors import OperationFailure
self.database_name = self.kwargs.get(
'database', 'chatterbot-database'
)
self.database_uri = self.kwargs.get(
'database_uri', 'mongodb://localhost:27017/'
)
# Use the default host and port
self.client = MongoClient(self.database_uri)
# Increase the sort buffer to 42M if possible
try:
self.client.admin.command({'setParameter': 1, 'internalQueryExecMaxBlockingSortBytes': 44040192})
except OperationFailure:
pass
# Specify the name of the database
self.database = self.client[self.database_name]
# The mongo collection of statement documents
self.statements = self.database['statements']
# The mongo collection of conversation documents
self.conversations = self.database['conversations']
# Set a requirement for the text attribute to be unique
self.statements.create_index('text', unique=True)
self.base_query = Query()
def get_statement_model(self):
"""
Return the class for the statement model.
"""
from chatterbot.conversation import Statement
# Create a storage-aware statement
statement = Statement
statement.storage = self
return statement
def get_response_model(self):
"""
Return the class for the response model.
"""
from chatterbot.conversation import Response
# Create a storage-aware response
response = Response
response.storage = self
return response
def count(self):
return self.statements.count()
def find(self, statement_text):
Statement = self.get_model('statement')
query = self.base_query.statement_text_equals(statement_text)
values = self.statements.find_one(query.value())
if not values:
return None
del values['text']
# Build the objects for the response list
values['in_response_to'] = self.deserialize_responses(
values.get('in_response_to', [])
)
return Statement(statement_text, **values)
def deserialize_responses(self, response_list):
"""
Takes the list of response items and returns
the list converted to Response objects.
"""
Statement = self.get_model('statement')
Response = self.get_model('response')
proxy_statement = Statement('')
for response in response_list:
text = response['text']
del response['text']
proxy_statement.add_response(
Response(text, **response)
)
return proxy_statement.in_response_to
def mongo_to_object(self, statement_data):
"""
Return Statement object when given data
returned from Mongo DB.
"""
Statement = self.get_model('statement')
statement_text = statement_data['text']
del statement_data['text']
statement_data['in_response_to'] = self.deserialize_responses(
statement_data.get('in_response_to', [])
)
return Statement(statement_text, **statement_data)
def filter(self, **kwargs):
"""
Returns a list of statements in the database
that match the parameters specified.
"""
import pymongo
query = self.base_query
order_by = kwargs.pop('order_by', None)
# Convert Response objects to data
if 'in_response_to' in kwargs:
serialized_responses = []
for response in kwargs['in_response_to']:
serialized_responses.append({'text': response})
query = query.statement_response_list_equals(serialized_responses)
del kwargs['in_response_to']
if 'in_response_to__contains' in kwargs:
query = query.statement_response_list_contains(
kwargs['in_response_to__contains']
)
del kwargs['in_response_to__contains']
query = query.raw(kwargs)
matches = self.statements.find(query.value())
if order_by:
direction = pymongo.ASCENDING
# Sort so that newer datetimes appear first
if order_by == 'created_at':
direction = pymongo.DESCENDING
matches = matches.sort(order_by, direction)
results = []
for match in list(matches):
results.append(self.mongo_to_object(match))
return results
def update(self, statement):
from pymongo import UpdateOne
from pymongo.errors import BulkWriteError
data = statement.serialize()
operations = []
update_operation = UpdateOne(
{'text': statement.text},
{'$set': data},
upsert=True
)
operations.append(update_operation)
# Make sure that an entry for each response is saved
for response_dict in data.get('in_response_to', []):
response_text = response_dict.get('text')
# $setOnInsert does nothing if the document is not created
update_operation = UpdateOne(
{'text': response_text},
{'$set': response_dict},
upsert=True
)
operations.append(update_operation)
try:
self.statements.bulk_write(operations, ordered=False)
except BulkWriteError as bwe:
# Log the details of a bulk write error
self.logger.error(str(bwe.details))
return statement
def create_conversation(self):
"""
Create a new conversation.
"""
conversation_id = self.conversations.insert_one({}).inserted_id
return conversation_id
def get_latest_response(self, conversation_id):
"""
Returns the latest response in a conversation if it exists.
Returns None if a matching conversation cannot be found.
"""
from pymongo import DESCENDING
statements = list(self.statements.find({
'conversations.id': conversation_id
}).sort('conversations.created_at', DESCENDING))
if not statements:
return None
return self.mongo_to_object(statements[-2])
def add_to_conversation(self, conversation_id, statement, response):
"""
Add the statement and response to the conversation.
"""
from datetime import datetime, timedelta
self.statements.update_one(
{
'text': statement.text
},
{
'$push': {
'conversations': {
'id': conversation_id,
'created_at': datetime.utcnow()
}
}
}
)
self.statements.update_one(
{
'text': response.text
},
{
'$push': {
'conversations': {
'id': conversation_id,
# Force the response to be at least one millisecond after the input statement
'created_at': datetime.utcnow() + timedelta(milliseconds=1)
}
}
}
)
def get_random(self):
"""
Returns a random statement from the database
"""
from random import randint
count = self.count()
if count < 1:
raise self.EmptyDatabaseException()
random_integer = randint(0, count - 1)
statements = self.statements.find().limit(1).skip(random_integer)
return self.mongo_to_object(list(statements)[0])
def remove(self, statement_text):
"""
Removes the statement that matches the input text.
Removes any responses from statements if the response text matches the
input text.
"""
for statement in self.filter(in_response_to__contains=statement_text):
statement.remove_response(statement_text)
self.update(statement)
self.statements.delete_one({'text': statement_text})
def get_response_statements(self):
"""
Return only statements that are in response to another statement.
A statement must exist which lists the closest matching statement in the
in_response_to field. Otherwise, the logic adapter may find a closest
matching statement that does not have a known response.
"""
response_query = self.statements.aggregate([{'$group': {'_id': '$in_response_to.text'}}])
responses = []
for r in response_query:
try:
responses.extend(r['_id'])
except TypeError:
pass
_statement_query = {
'text': {
'$in': responses
}
}
_statement_query.update(self.base_query.value())
statement_query = self.statements.find(_statement_query)
statement_objects = []
for statement in list(statement_query):
statement_objects.append(self.mongo_to_object(statement))
return statement_objects
def drop(self):
"""
Remove the database.
"""
self.client.drop_database(self.database_name)

@ -0,0 +1,403 @@
from chatterbot.storage import StorageAdapter
def get_response_table(response):
from chatterbot.ext.sqlalchemy_app.models import Response
return Response(text=response.text, occurrence=response.occurrence)
class SQLStorageAdapter(StorageAdapter):
"""
SQLStorageAdapter allows ChatterBot to store conversation
data semi-structured T-SQL database, virtually, any database
that SQL Alchemy supports.
Notes:
Tables may change (and will), so, save your training data.
There is no data migration (yet).
Performance test not done yet.
Tests using other databases not finished.
All parameters are optional, by default a sqlite database is used.
It will check if tables are present, if they are not, it will attempt
to create the required tables.
:keyword database: Used for sqlite database. Ignored if database_uri is specified.
:type database: str
:keyword database_uri: eg: sqlite:///database_test.db", use database_uri or database,
database_uri can be specified to choose database driver (database parameter will be ignored).
:type database_uri: str
:keyword read_only: False by default, makes all operations read only, has priority over all DB operations
so, create, update, delete will NOT be executed
:type read_only: bool
"""
def __init__(self, **kwargs):
super(SQLStorageAdapter, self).__init__(**kwargs)
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
default_uri = "sqlite:///db.sqlite3"
database_name = self.kwargs.get("database", False)
# None results in a sqlite in-memory database as the default
if database_name is None:
default_uri = "sqlite://"
self.database_uri = self.kwargs.get(
"database_uri", default_uri
)
# Create a sqlite file if a database name is provided
if database_name:
self.database_uri = "sqlite:///" + database_name
self.engine = create_engine(self.database_uri, convert_unicode=True)
from re import search
if search('^sqlite://', self.database_uri):
from sqlalchemy.engine import Engine
from sqlalchemy import event
@event.listens_for(Engine, "connect")
def set_sqlite_pragma(dbapi_connection, connection_record):
dbapi_connection.execute('PRAGMA journal_mode=WAL')
dbapi_connection.execute('PRAGMA synchronous=NORMAL')
self.read_only = self.kwargs.get(
"read_only", False
)
if not self.engine.dialect.has_table(self.engine, 'Statement'):
self.create()
self.Session = sessionmaker(bind=self.engine, expire_on_commit=True)
# ChatterBot's internal query builder is not yet supported for this adapter
self.adapter_supports_queries = False
def get_statement_model(self):
"""
Return the statement model.
"""
from chatterbot.ext.sqlalchemy_app.models import Statement
return Statement
def get_response_model(self):
"""
Return the response model.
"""
from chatterbot.ext.sqlalchemy_app.models import Response
return Response
def get_conversation_model(self):
"""
Return the conversation model.
"""
from chatterbot.ext.sqlalchemy_app.models import Conversation
return Conversation
def get_tag_model(self):
"""
Return the conversation model.
"""
from chatterbot.ext.sqlalchemy_app.models import Tag
return Tag
def count(self):
"""
Return the number of entries in the database.
"""
Statement = self.get_model('statement')
session = self.Session()
statement_count = session.query(Statement).count()
session.close()
return statement_count
def find(self, statement_text):
"""
Returns a statement if it exists otherwise None
"""
Statement = self.get_model('statement')
session = self.Session()
query = session.query(Statement).filter_by(text=statement_text)
record = query.first()
if record:
statement = record.get_statement()
session.close()
return statement
session.close()
return None
def remove(self, statement_text):
"""
Removes the statement that matches the input text.
Removes any responses from statements where the response text matches
the input text.
"""
Statement = self.get_model('statement')
session = self.Session()
query = session.query(Statement).filter_by(text=statement_text)
record = query.first()
session.delete(record)
self._session_finish(session)
def filter(self, **kwargs):
"""
Returns a list of objects from the database.
The kwargs parameter can contain any number
of attributes. Only objects which contain
all listed attributes and in which all values
match for all listed attributes will be returned.
"""
Statement = self.get_model('statement')
Response = self.get_model('response')
session = self.Session()
filter_parameters = kwargs.copy()
statements = []
_query = None
if len(filter_parameters) == 0:
_response_query = session.query(Statement)
statements.extend(_response_query.all())
else:
for i, fp in enumerate(filter_parameters):
_filter = filter_parameters[fp]
if fp in ['in_response_to', 'in_response_to__contains']:
_response_query = session.query(Statement)
if isinstance(_filter, list):
if len(_filter) == 0:
_query = _response_query.filter(
Statement.in_response_to == None # NOQA Here must use == instead of is
)
else:
for f in _filter:
_query = _response_query.filter(
Statement.in_response_to.contains(get_response_table(f)))
else:
if fp == 'in_response_to__contains':
_query = _response_query.join(Response).filter(Response.text == _filter)
else:
_query = _response_query.filter(Statement.in_response_to == None) # NOQA
else:
if _query:
_query = _query.filter(Response.statement_text.like('%' + _filter + '%'))
else:
_response_query = session.query(Response)
_query = _response_query.filter(Response.statement_text.like('%' + _filter + '%'))
if _query is None:
return []
if len(filter_parameters) == i + 1:
statements.extend(_query.all())
results = []
for statement in statements:
if isinstance(statement, Response):
if statement and statement.statement_table:
results.append(statement.statement_table.get_statement())
else:
if statement:
results.append(statement.get_statement())
session.close()
return results
def update(self, statement):
"""
Modifies an entry in the database.
Creates an entry if one does not exist.
"""
Statement = self.get_model('statement')
Response = self.get_model('response')
Tag = self.get_model('tag')
if statement:
session = self.Session()
query = session.query(Statement).filter_by(text=statement.text)
record = query.first()
# Create a new statement entry if one does not already exist
if not record:
record = Statement(text=statement.text)
record.extra_data = dict(statement.extra_data)
for _tag in statement.tags:
tag = session.query(Tag).filter_by(name=_tag).first()
if not tag:
# Create the record
tag = Tag(name=_tag)
record.tags.append(tag)
# Get or create the response records as needed
for response in statement.in_response_to:
_response = session.query(Response).filter_by(
text=response.text,
statement_text=statement.text
).first()
if _response:
_response.occurrence += 1
else:
# Create the record
_response = Response(
text=response.text,
statement_text=statement.text,
occurrence=response.occurrence
)
record.in_response_to.append(_response)
session.add(record)
self._session_finish(session)
def create_conversation(self):
"""
Create a new conversation.
"""
Conversation = self.get_model('conversation')
session = self.Session()
conversation = Conversation()
session.add(conversation)
session.flush()
session.refresh(conversation)
conversation_id = conversation.id
session.commit()
session.close()
return conversation_id
def add_to_conversation(self, conversation_id, statement, response):
"""
Add the statement and response to the conversation.
"""
Statement = self.get_model('statement')
Conversation = self.get_model('conversation')
session = self.Session()
conversation = session.query(Conversation).get(conversation_id)
statement_query = session.query(Statement).filter_by(
text=statement.text
).first()
response_query = session.query(Statement).filter_by(
text=response.text
).first()
# Make sure the statements exist
if not statement_query:
self.update(statement)
statement_query = session.query(Statement).filter_by(
text=statement.text
).first()
if not response_query:
self.update(response)
response_query = session.query(Statement).filter_by(
text=response.text
).first()
conversation.statements.append(statement_query)
conversation.statements.append(response_query)
session.add(conversation)
self._session_finish(session)
def get_latest_response(self, conversation_id):
"""
Returns the latest response in a conversation if it exists.
Returns None if a matching conversation cannot be found.
"""
Statement = self.get_model('statement')
session = self.Session()
statement = None
statement_query = session.query(Statement).filter(
Statement.conversations.any(id=conversation_id)
).order_by(Statement.id)
if statement_query.count() >= 2:
statement = statement_query[-2].get_statement()
# Handle the case of the first statement in the list
elif statement_query.count() == 1:
statement = statement_query[0].get_statement()
session.close()
return statement
def get_random(self):
"""
Returns a random statement from the database
"""
import random
Statement = self.get_model('statement')
session = self.Session()
count = self.count()
if count < 1:
raise self.EmptyDatabaseException()
rand = random.randrange(0, count)
stmt = session.query(Statement)[rand]
statement = stmt.get_statement()
session.close()
return statement
def drop(self):
"""
Drop the database attached to a given adapter.
"""
from chatterbot.ext.sqlalchemy_app.models import Base
Base.metadata.drop_all(self.engine)
def create(self):
"""
Populate the database with the tables.
"""
from chatterbot.ext.sqlalchemy_app.models import Base
Base.metadata.create_all(self.engine)
def _session_finish(self, session, statement_text=None):
from sqlalchemy.exc import InvalidRequestError
try:
if not self.read_only:
session.commit()
else:
session.rollback()
except InvalidRequestError:
# Log the statement text and the exception
self.logger.exception(statement_text)
finally:
session.close()

@ -0,0 +1,171 @@
import logging
class StorageAdapter(object):
"""
This is an abstract class that represents the interface
that all storage adapters should implement.
"""
def __init__(self, base_query=None, *args, **kwargs):
"""
Initialize common attributes shared by all storage adapters.
"""
self.kwargs = kwargs
self.logger = kwargs.get('logger', logging.getLogger(__name__))
self.adapter_supports_queries = True
self.base_query = None
def get_model(self, model_name):
"""
Return the model class for a given model name.
"""
# The string must be lowercase
model_name = model_name.lower()
kwarg_model_key = '%s_model' % (model_name, )
if kwarg_model_key in self.kwargs:
return self.kwargs.get(kwarg_model_key)
get_model_method = getattr(self, 'get_%s_model' % (model_name, ))
return get_model_method()
def generate_base_query(self, chatterbot, session_id):
"""
Create a base query for the storage adapter.
"""
if self.adapter_supports_queries:
for filter_instance in chatterbot.filters:
self.base_query = filter_instance.filter_selection(chatterbot, session_id)
def count(self):
"""
Return the number of entries in the database.
"""
raise self.AdapterMethodNotImplementedError(
'The `count` method is not implemented by this adapter.'
)
def find(self, statement_text):
"""
Returns a object from the database if it exists
"""
raise self.AdapterMethodNotImplementedError(
'The `find` method is not implemented by this adapter.'
)
def remove(self, statement_text):
"""
Removes the statement that matches the input text.
Removes any responses from statements where the response text matches
the input text.
"""
raise self.AdapterMethodNotImplementedError(
'The `remove` method is not implemented by this adapter.'
)
def filter(self, **kwargs):
"""
Returns a list of objects from the database.
The kwargs parameter can contain any number
of attributes. Only objects which contain
all listed attributes and in which all values
match for all listed attributes will be returned.
"""
raise self.AdapterMethodNotImplementedError(
'The `filter` method is not implemented by this adapter.'
)
def update(self, statement):
"""
Modifies an entry in the database.
Creates an entry if one does not exist.
"""
raise self.AdapterMethodNotImplementedError(
'The `update` method is not implemented by this adapter.'
)
def get_latest_response(self, conversation_id):
"""
Returns the latest response in a conversation if it exists.
Returns None if a matching conversation cannot be found.
"""
raise self.AdapterMethodNotImplementedError(
'The `get_latest_response` method is not implemented by this adapter.'
)
def create_conversation(self):
"""
Creates a new conversation.
"""
raise self.AdapterMethodNotImplementedError(
'The `create_conversation` method is not implemented by this adapter.'
)
def add_to_conversation(self, conversation_id, statement, response):
"""
Add the statement and response to the conversation.
"""
raise self.AdapterMethodNotImplementedError(
'The `add_to_conversation` method is not implemented by this adapter.'
)
def get_random(self):
"""
Returns a random statement from the database.
"""
raise self.AdapterMethodNotImplementedError(
'The `get_random` method is not implemented by this adapter.'
)
def drop(self):
"""
Drop the database attached to a given adapter.
"""
raise self.AdapterMethodNotImplementedError(
'The `drop` method is not implemented by this adapter.'
)
def get_response_statements(self):
"""
Return only statements that are in response to another statement.
A statement must exist which lists the closest matching statement in the
in_response_to field. Otherwise, the logic adapter may find a closest
matching statement that does not have a known response.
This method may be overridden by a child class to provide more a
efficient method to get these results.
"""
statement_list = self.filter()
responses = set()
to_remove = list()
for statement in statement_list:
for response in statement.in_response_to:
responses.add(response.text)
for statement in statement_list:
if statement.text not in responses:
to_remove.append(statement)
for statement in to_remove:
statement_list.remove(statement)
return statement_list
class EmptyDatabaseException(Exception):
def __init__(self, value='The database currently contains no entries. At least one entry is expected. You may need to train your chat bot to populate your database.'):
self.value = value
def __str__(self):
return repr(self.value)
class AdapterMethodNotImplementedError(NotImplementedError):
"""
An exception to be raised when a storage adapter method has not been implemented.
Typically this indicates that the method should be implement in a subclass.
"""
pass

@ -0,0 +1,426 @@
import logging
import os
import sys
from .conversation import Statement, Response
from . import utils
class Trainer(object):
"""
Base class for all other trainer classes.
"""
def __init__(self, storage, **kwargs):
self.chatbot = kwargs.get('chatbot')
self.storage = storage
self.logger = logging.getLogger(__name__)
self.show_training_progress = kwargs.get('show_training_progress', True)
def get_preprocessed_statement(self, input_statement):
"""
Preprocess the input statement.
"""
# The chatbot is optional to prevent backwards-incompatible changes
if not self.chatbot:
return input_statement
for preprocessor in self.chatbot.preprocessors:
input_statement = preprocessor(self, input_statement)
return input_statement
def train(self, *args, **kwargs):
"""
This method must be overridden by a child class.
"""
raise self.TrainerInitializationException()
def get_or_create(self, statement_text):
"""
Return a statement if it exists.
Create and return the statement if it does not exist.
"""
temp_statement = self.get_preprocessed_statement(
Statement(text=statement_text)
)
statement = self.storage.find(temp_statement.text)
if not statement:
statement = Statement(temp_statement.text)
return statement
class TrainerInitializationException(Exception):
"""
Exception raised when a base class has not overridden
the required methods on the Trainer base class.
"""
def __init__(self, value=None):
default = (
'A training class must be specified before calling train(). ' +
'See http://chatterbot.readthedocs.io/en/stable/training.html'
)
self.value = value or default
def __str__(self):
return repr(self.value)
def _generate_export_data(self):
result = []
for statement in self.storage.filter():
for response in statement.in_response_to:
result.append([response.text, statement.text])
return result
def export_for_training(self, file_path='./export.json'):
"""
Create a file from the database that can be used to
train other chat bots.
"""
import json
export = {'conversations': self._generate_export_data()}
with open(file_path, 'w+') as jsonfile:
json.dump(export, jsonfile, ensure_ascii=False)
class ListTrainer(Trainer):
"""
Allows a chat bot to be trained using a list of strings
where the list represents a conversation.
"""
def train(self, conversation):
"""
Train the chat bot based on the provided list of
statements that represents a single conversation.
"""
previous_statement_text = None
for conversation_count, text in enumerate(conversation):
if self.show_training_progress:
utils.print_progress_bar(
'List Trainer',
conversation_count + 1, len(conversation)
)
statement = self.get_or_create(text)
if previous_statement_text:
statement.add_response(
Response(previous_statement_text)
)
previous_statement_text = statement.text
self.storage.update(statement)
class ChatterBotCorpusTrainer(Trainer):
"""
Allows the chat bot to be trained using data from the
ChatterBot dialog corpus.
"""
def __init__(self, storage, **kwargs):
super(ChatterBotCorpusTrainer, self).__init__(storage, **kwargs)
from .corpus import Corpus
self.corpus = Corpus()
def train(self, *corpus_paths):
# Allow a list of corpora to be passed instead of arguments
if len(corpus_paths) == 1:
if isinstance(corpus_paths[0], list):
corpus_paths = corpus_paths[0]
# Train the chat bot with each statement and response pair
for corpus_path in corpus_paths:
corpora = self.corpus.load_corpus(corpus_path)
corpus_files = self.corpus.list_corpus_files(corpus_path)
for corpus_count, corpus in enumerate(corpora):
for conversation_count, conversation in enumerate(corpus):
if self.show_training_progress:
utils.print_progress_bar(
str(os.path.basename(corpus_files[corpus_count])) + ' Training',
conversation_count + 1,
len(corpus)
)
previous_statement_text = None
for text in conversation:
statement = self.get_or_create(text)
statement.add_tags(corpus.categories)
if previous_statement_text:
statement.add_response(
Response(previous_statement_text)
)
previous_statement_text = statement.text
self.storage.update(statement)
class TwitterTrainer(Trainer):
"""
Allows the chat bot to be trained using data
gathered from Twitter.
:param random_seed_word: The seed word to be used to get random tweets from the Twitter API.
This parameter is optional. By default it is the word 'random'.
:param twitter_lang: Language for results as ISO 639-1 code.
This parameter is optional. Default is None (all languages).
"""
def __init__(self, storage, **kwargs):
super(TwitterTrainer, self).__init__(storage, **kwargs)
from twitter import Api as TwitterApi
# The word to be used as the first search term when searching for tweets
self.random_seed_word = kwargs.get('random_seed_word', 'random')
self.lang = kwargs.get('twitter_lang')
self.api = TwitterApi(
consumer_key=kwargs.get('twitter_consumer_key'),
consumer_secret=kwargs.get('twitter_consumer_secret'),
access_token_key=kwargs.get('twitter_access_token_key'),
access_token_secret=kwargs.get('twitter_access_token_secret')
)
def random_word(self, base_word, lang=None):
"""
Generate a random word using the Twitter API.
Search twitter for recent tweets containing the term 'random'.
Then randomly select one word from those tweets and do another
search with that word. Return a randomly selected word from the
new set of results.
"""
import random
random_tweets = self.api.GetSearch(term=base_word, count=5, lang=lang)
random_words = self.get_words_from_tweets(random_tweets)
random_word = random.choice(list(random_words))
tweets = self.api.GetSearch(term=random_word, count=5, lang=lang)
words = self.get_words_from_tweets(tweets)
word = random.choice(list(words))
return word
def get_words_from_tweets(self, tweets):
"""
Given a list of tweets, return the set of
words from the tweets.
"""
words = set()
for tweet in tweets:
tweet_words = tweet.text.split()
for word in tweet_words:
# If the word contains only letters with a length from 4 to 9
if word.isalpha() and len(word) > 3 and len(word) <= 9:
words.add(word)
return words
def get_statements(self):
"""
Returns list of random statements from the API.
"""
from twitter import TwitterError
statements = []
# Generate a random word
random_word = self.random_word(self.random_seed_word, self.lang)
self.logger.info(u'Requesting 50 random tweets containing the word {}'.format(random_word))
tweets = self.api.GetSearch(term=random_word, count=50, lang=self.lang)
for tweet in tweets:
statement = Statement(tweet.text)
if tweet.in_reply_to_status_id:
try:
status = self.api.GetStatus(tweet.in_reply_to_status_id)
statement.add_response(Response(status.text))
statements.append(statement)
except TwitterError as error:
self.logger.warning(str(error))
self.logger.info('Adding {} tweets with responses'.format(len(statements)))
return statements
def train(self):
for _ in range(0, 10):
statements = self.get_statements()
for statement in statements:
self.storage.update(statement)
class UbuntuCorpusTrainer(Trainer):
"""
Allow chatbots to be trained with the data from
the Ubuntu Dialog Corpus.
"""
def __init__(self, storage, **kwargs):
super(UbuntuCorpusTrainer, self).__init__(storage, **kwargs)
self.data_download_url = kwargs.get(
'ubuntu_corpus_data_download_url',
'http://cs.mcgill.ca/~jpineau/datasets/ubuntu-corpus-1.0/ubuntu_dialogs.tgz'
)
self.data_directory = kwargs.get(
'ubuntu_corpus_data_directory',
'./data/'
)
self.extracted_data_directory = os.path.join(
self.data_directory, 'ubuntu_dialogs'
)
# Create the data directory if it does not already exist
if not os.path.exists(self.data_directory):
os.makedirs(self.data_directory)
def is_downloaded(self, file_path):
"""
Check if the data file is already downloaded.
"""
if os.path.exists(file_path):
self.logger.info('File is already downloaded')
return True
return False
def is_extracted(self, file_path):
"""
Check if the data file is already extracted.
"""
if os.path.isdir(file_path):
self.logger.info('File is already extracted')
return True
return False
def download(self, url, show_status=True):
"""
Download a file from the given url.
Show a progress indicator for the download status.
Based on: http://stackoverflow.com/a/15645088/1547223
"""
import requests
file_name = url.split('/')[-1]
file_path = os.path.join(self.data_directory, file_name)
# Do not download the data if it already exists
if self.is_downloaded(file_path):
return file_path
with open(file_path, 'wb') as open_file:
print('Downloading %s' % url)
response = requests.get(url, stream=True)
total_length = response.headers.get('content-length')
if total_length is None:
# No content length header
open_file.write(response.content)
else:
download = 0
total_length = int(total_length)
for data in response.iter_content(chunk_size=4096):
download += len(data)
open_file.write(data)
if show_status:
done = int(50 * download / total_length)
sys.stdout.write('\r[%s%s]' % ('=' * done, ' ' * (50 - done)))
sys.stdout.flush()
# Add a new line after the download bar
sys.stdout.write('\n')
print('Download location: %s' % file_path)
return file_path
def extract(self, file_path):
"""
Extract a tar file at the specified file path.
"""
import tarfile
print('Extracting {}'.format(file_path))
if not os.path.exists(self.extracted_data_directory):
os.makedirs(self.extracted_data_directory)
def track_progress(members):
sys.stdout.write('.')
for member in members:
# This will be the current file being extracted
yield member
with tarfile.open(file_path) as tar:
tar.extractall(path=self.extracted_data_directory, members=track_progress(tar))
self.logger.info('File extracted to {}'.format(self.extracted_data_directory))
return True
def train(self):
import glob
import csv
# Download and extract the Ubuntu dialog corpus if needed
corpus_download_path = self.download(self.data_download_url)
# Extract if the directory doesn not already exists
if not self.is_extracted(self.extracted_data_directory):
self.extract(corpus_download_path)
extracted_corpus_path = os.path.join(
self.extracted_data_directory,
'**', '**', '*.tsv'
)
file_kwargs = {}
if sys.version_info[0] > 2:
# Specify the encoding in Python versions 3 and up
file_kwargs['encoding'] = 'utf-8'
# WARNING: This might fail to read a unicode corpus file in Python 2.x
for file in glob.iglob(extracted_corpus_path):
self.logger.info('Training from: {}'.format(file))
with open(file, 'r', **file_kwargs) as tsv:
reader = csv.reader(tsv, delimiter='\t')
previous_statement_text = None
for row in reader:
if len(row) > 0:
text = row[3]
statement = self.get_or_create(text)
print(text, len(row))
statement.add_extra_data('datetime', row[0])
statement.add_extra_data('speaker', row[1])
if row[2].strip():
statement.add_extra_data('addressing_speaker', row[2])
if previous_statement_text:
statement.add_response(
Response(previous_statement_text)
)
previous_statement_text = statement.text
self.storage.update(statement)

@ -0,0 +1,200 @@
"""
ChatterBot utility functions
"""
def import_module(dotted_path):
"""
Imports the specified module based on the
dot notated import path for the module.
"""
import importlib
module_parts = dotted_path.split('.')
module_path = '.'.join(module_parts[:-1])
module = importlib.import_module(module_path)
return getattr(module, module_parts[-1])
def initialize_class(data, **kwargs):
"""
:param data: A string or dictionary containing a import_path attribute.
"""
if isinstance(data, dict):
import_path = data.get('import_path')
data.update(kwargs)
Class = import_module(import_path)
return Class(**data)
else:
Class = import_module(data)
return Class(**kwargs)
def validate_adapter_class(validate_class, adapter_class):
"""
Raises an exception if validate_class is not a
subclass of adapter_class.
:param validate_class: The class to be validated.
:type validate_class: class
:param adapter_class: The class type to check against.
:type adapter_class: class
:raises: Adapter.InvalidAdapterTypeException
"""
from .adapters import Adapter
# If a dictionary was passed in, check if it has an import_path attribute
if isinstance(validate_class, dict):
if 'import_path' not in validate_class:
raise Adapter.InvalidAdapterTypeException(
'The dictionary {} must contain a value for "import_path"'.format(
str(validate_class)
)
)
# Set the class to the import path for the next check
validate_class = validate_class.get('import_path')
if not issubclass(import_module(validate_class), adapter_class):
raise Adapter.InvalidAdapterTypeException(
'{} must be a subclass of {}'.format(
validate_class,
adapter_class.__name__
)
)
def input_function():
"""
Normalizes reading input between python 2 and 3.
The function 'raw_input' becomes 'input' in Python 3.
"""
import sys
if sys.version_info[0] < 3:
user_input = str(raw_input()) # NOQA
# Avoid problems using format strings with unicode characters
if user_input:
user_input = user_input.decode('utf-8')
else:
user_input = input() # NOQA
return user_input
def nltk_download_corpus(resource_path):
"""
Download the specified NLTK corpus file
unless it has already been downloaded.
Returns True if the corpus needed to be downloaded.
"""
from nltk.data import find
from nltk import download
from os.path import split, sep
from zipfile import BadZipfile
# Download the NLTK data only if it is not already downloaded
_, corpus_name = split(resource_path)
# From http://www.nltk.org/api/nltk.html
# When using find() to locate a directory contained in a zipfile,
# the resource name must end with the forward slash character.
# Otherwise, find() will not locate the directory.
#
# Helps when resource_path=='sentiment/vader_lexicon''
if not resource_path.endswith(sep):
resource_path = resource_path + sep
downloaded = False
try:
find(resource_path)
except LookupError:
download(corpus_name)
downloaded = True
except BadZipfile:
raise BadZipfile(
'The NLTK corpus file being opened is not a zipfile, '
'or it has been corrupted and needs to be manually deleted.'
)
return downloaded
def remove_stopwords(tokens, language):
"""
Takes a language (i.e. 'english'), and a set of word tokens.
Returns the tokenized text with any stopwords removed.
Stop words are words like "is, the, a, ..."
Be sure to download the required NLTK corpus before calling this function:
- from chatterbot.utils import nltk_download_corpus
- nltk_download_corpus('corpora/stopwords')
"""
from nltk.corpus import stopwords
# Get the stopwords for the specified language
stop_words = stopwords.words(language)
# Remove the stop words from the set of word tokens
tokens = set(tokens) - set(stop_words)
return tokens
def get_response_time(chatbot):
"""
Returns the amount of time taken for a given
chat bot to return a response.
:param chatbot: A chat bot instance.
:type chatbot: ChatBot
:returns: The response time in seconds.
:rtype: float
"""
import time
start_time = time.time()
chatbot.get_response('Hello')
return time.time() - start_time
def print_progress_bar(description, iteration_counter, total_items, progress_bar_length=20):
"""
Print progress bar
:param description: Training description
:type description: str
:param iteration_counter: Incremental counter
:type iteration_counter: int
:param total_items: total number items
:type total_items: int
:param progress_bar_length: Progress bar length
:type progress_bar_length: int
:returns: void
:rtype: void
"""
import sys
percent = float(iteration_counter) / total_items
hashes = '#' * int(round(percent * progress_bar_length))
spaces = ' ' * (progress_bar_length - len(hashes))
sys.stdout.write("\r{0}: [{1}] {2}%".format(description, hashes + spaces, int(round(percent * 100))))
sys.stdout.flush()
if total_items == iteration_counter:
print("\r")

@ -3,7 +3,7 @@
"bot_version" : [3,0,0],
"description" : "Create an offline chatbot that talks like your average member using Machine Learning",
"hidden" : false,
"install_msg" : "Thank you for installing Chatter.",
"install_msg" : "Thank you for installing Chatter!",
"requirements" : ["chatterbot"],
"short" : "Local Chatbot run on machine learning",
"tags" : ["chat", "chatbot", "cleverbot", "clever","bobloy"]

Loading…
Cancel
Save