From 762b0fd32005e3a403dcb44bae32a53f2f6d1777 Mon Sep 17 00:00:00 2001 From: bobloy Date: Fri, 25 Sep 2020 12:02:13 -0400 Subject: [PATCH] WIP Twitter training --- chatter/chat.py | 21 ++++++++++---------- chatter/trainers.py | 48 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 10 deletions(-) create mode 100644 chatter/trainers.py diff --git a/chatter/chat.py b/chatter/chat.py index ad8e37b..607457c 100644 --- a/chatter/chat.py +++ b/chatter/chat.py @@ -15,6 +15,8 @@ from redbot.core.commands import Cog from redbot.core.data_manager import cog_data_path from redbot.core.utils.predicates import MessagePredicate +from chatter.trainers import TwitterCorpusTrainer + log = logging.getLogger("red.fox_v3.chatter") @@ -105,15 +107,7 @@ class Chatter(Cog): return msg.clean_content def new_conversation(msg, sent, out_in, delta): - # if sent is None: - # return False - - # Don't do "too short" processing here. Sometimes people don't respond. - # if len(out_in) < 2: - # return False - - # print(msg.created_at - sent) - + # Should always be positive numbers return msg.created_at - sent >= delta for channel in ctx.guild.text_channels: @@ -158,6 +152,11 @@ class Chatter(Cog): return out + def _train_twitter(self, *args, **kwargs): + trainer = TwitterCorpusTrainer(self.chatbot) + trainer.train(*args, **kwargs) + return True + def _train_ubuntu(self): trainer = UbuntuCorpusTrainer(self.chatbot) trainer.train() @@ -479,7 +478,9 @@ class Chatter(Cog): text = message.clean_content async with channel.typing(): - future = await self.loop.run_in_executor(None, self.chatbot.get_response, text) + # Switched to `generate_response` from `get_result` + # Switch back once better conversation detection is used. + future = await self.loop.run_in_executor(None, self.chatbot.generate_response, text) if future and str(future): await channel.send(str(future)) diff --git a/chatter/trainers.py b/chatter/trainers.py new file mode 100644 index 0000000..e6eedba --- /dev/null +++ b/chatter/trainers.py @@ -0,0 +1,48 @@ +from chatterbot import utils +from chatterbot.conversation import Statement +from chatterbot.trainers import Trainer + + +class TwitterCorpusTrainer(Trainer): + def train(self, *args, **kwargs): + """ + Train the chat bot based on the provided list of + statements that represents a single conversation. + """ + import twint + + c = twint.Config() + c.__dict__.update(kwargs) + twint.run.Search(c) + + + previous_statement_text = None + previous_statement_search_text = '' + + statements_to_create = [] + + for conversation_count, text in enumerate(conversation): + if self.show_training_progress: + utils.print_progress_bar( + 'List Trainer', + conversation_count + 1, len(conversation) + ) + + statement_search_text = self.chatbot.storage.tagger.get_text_index_string(text) + + statement = self.get_preprocessed_statement( + Statement( + text=text, + search_text=statement_search_text, + in_response_to=previous_statement_text, + search_in_response_to=previous_statement_search_text, + conversation='training' + ) + ) + + previous_statement_text = statement.text + previous_statement_search_text = statement_search_text + + statements_to_create.append(statement) + + self.chatbot.storage.create_many(statements_to_create) \ No newline at end of file