From 762b0fd32005e3a403dcb44bae32a53f2f6d1777 Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Fri, 25 Sep 2020 12:02:13 -0400
Subject: [PATCH 01/21] WIP Twitter training

---
 chatter/chat.py     | 21 ++++++++++----------
 chatter/trainers.py | 48 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 10 deletions(-)
 create mode 100644 chatter/trainers.py

diff --git a/chatter/chat.py b/chatter/chat.py
index ad8e37b..607457c 100644
--- a/chatter/chat.py
+++ b/chatter/chat.py
@@ -15,6 +15,8 @@ from redbot.core.commands import Cog
 from redbot.core.data_manager import cog_data_path
 from redbot.core.utils.predicates import MessagePredicate
 
+from chatter.trainers import TwitterCorpusTrainer
+
 log = logging.getLogger("red.fox_v3.chatter")
 
 
@@ -105,15 +107,7 @@ class Chatter(Cog):
             return msg.clean_content
 
         def new_conversation(msg, sent, out_in, delta):
-            # if sent is None:
-            #     return False
-
-            # Don't do "too short" processing here. Sometimes people don't respond.
-            # if len(out_in) < 2:
-            #     return False
-
-            # print(msg.created_at - sent)
-
+            # Should always be positive numbers
             return msg.created_at - sent >= delta
 
         for channel in ctx.guild.text_channels:
@@ -158,6 +152,11 @@ class Chatter(Cog):
 
         return out
 
+    def _train_twitter(self, *args, **kwargs):
+        trainer = TwitterCorpusTrainer(self.chatbot)
+        trainer.train(*args, **kwargs)
+        return True
+
     def _train_ubuntu(self):
         trainer = UbuntuCorpusTrainer(self.chatbot)
         trainer.train()
@@ -479,7 +478,9 @@ class Chatter(Cog):
         text = message.clean_content
 
         async with channel.typing():
-            future = await self.loop.run_in_executor(None, self.chatbot.get_response, text)
+            # Switched to `generate_response` from `get_result`
+            # Switch back once better conversation detection is used.
+            future = await self.loop.run_in_executor(None, self.chatbot.generate_response, text)
 
             if future and str(future):
                 await channel.send(str(future))
diff --git a/chatter/trainers.py b/chatter/trainers.py
new file mode 100644
index 0000000..e6eedba
--- /dev/null
+++ b/chatter/trainers.py
@@ -0,0 +1,48 @@
+from chatterbot import utils
+from chatterbot.conversation import Statement
+from chatterbot.trainers import Trainer
+
+
+class TwitterCorpusTrainer(Trainer):
+    def train(self, *args, **kwargs):
+        """
+        Train the chat bot based on the provided list of
+        statements that represents a single conversation.
+        """
+        import twint
+
+        c = twint.Config()
+        c.__dict__.update(kwargs)
+        twint.run.Search(c)
+
+
+        previous_statement_text = None
+        previous_statement_search_text = ''
+
+        statements_to_create = []
+
+        for conversation_count, text in enumerate(conversation):
+            if self.show_training_progress:
+                utils.print_progress_bar(
+                    'List Trainer',
+                    conversation_count + 1, len(conversation)
+                )
+
+            statement_search_text = self.chatbot.storage.tagger.get_text_index_string(text)
+
+            statement = self.get_preprocessed_statement(
+                Statement(
+                    text=text,
+                    search_text=statement_search_text,
+                    in_response_to=previous_statement_text,
+                    search_in_response_to=previous_statement_search_text,
+                    conversation='training'
+                )
+            )
+
+            previous_statement_text = statement.text
+            previous_statement_search_text = statement_search_text
+
+            statements_to_create.append(statement)
+
+        self.chatbot.storage.create_many(statements_to_create)
\ No newline at end of file

From 26234e3b18a465ded651960a73ed7d15692a53fb Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Mon, 19 Oct 2020 15:16:49 -0400
Subject: [PATCH 02/21] Alternate dependencies attempt

---
 chatter/info.json   |  3 +-
 chatter/trainers.py | 85 +++++++++++++++++++++++----------------------
 2 files changed, 45 insertions(+), 43 deletions(-)

diff --git a/chatter/info.json b/chatter/info.json
index b79e587..df77ee8 100644
--- a/chatter/info.json
+++ b/chatter/info.json
@@ -17,7 +17,8 @@
     "pytz",
     "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz#egg=en_core_web_sm",
     "https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.3.1/en_core_web_md-2.3.1.tar.gz#egg=en_core_web_md",
-    "spacy>=2.3,<2.4"
+    "spacy>=2.3,<2.4",
+    "--no-deps \"chatterbot>=1.1\""
   ],
   "short": "Local Chatbot run on machine learning",
   "end_user_data_statement": "This cog only stores anonymous conversations data; no End User Data is stored.",
diff --git a/chatter/trainers.py b/chatter/trainers.py
index e6eedba..42d6288 100644
--- a/chatter/trainers.py
+++ b/chatter/trainers.py
@@ -4,45 +4,46 @@ from chatterbot.trainers import Trainer
 
 
 class TwitterCorpusTrainer(Trainer):
-    def train(self, *args, **kwargs):
-        """
-        Train the chat bot based on the provided list of
-        statements that represents a single conversation.
-        """
-        import twint
-
-        c = twint.Config()
-        c.__dict__.update(kwargs)
-        twint.run.Search(c)
-
-
-        previous_statement_text = None
-        previous_statement_search_text = ''
-
-        statements_to_create = []
-
-        for conversation_count, text in enumerate(conversation):
-            if self.show_training_progress:
-                utils.print_progress_bar(
-                    'List Trainer',
-                    conversation_count + 1, len(conversation)
-                )
-
-            statement_search_text = self.chatbot.storage.tagger.get_text_index_string(text)
-
-            statement = self.get_preprocessed_statement(
-                Statement(
-                    text=text,
-                    search_text=statement_search_text,
-                    in_response_to=previous_statement_text,
-                    search_in_response_to=previous_statement_search_text,
-                    conversation='training'
-                )
-            )
-
-            previous_statement_text = statement.text
-            previous_statement_search_text = statement_search_text
-
-            statements_to_create.append(statement)
-
-        self.chatbot.storage.create_many(statements_to_create)
\ No newline at end of file
+    pass
+    # def train(self, *args, **kwargs):
+    #     """
+    #     Train the chat bot based on the provided list of
+    #     statements that represents a single conversation.
+    #     """
+    #     import twint
+    #
+    #     c = twint.Config()
+    #     c.__dict__.update(kwargs)
+    #     twint.run.Search(c)
+    #
+    #
+    #     previous_statement_text = None
+    #     previous_statement_search_text = ''
+    #
+    #     statements_to_create = []
+    #
+    #     for conversation_count, text in enumerate(conversation):
+    #         if self.show_training_progress:
+    #             utils.print_progress_bar(
+    #                 'List Trainer',
+    #                 conversation_count + 1, len(conversation)
+    #             )
+    #
+    #         statement_search_text = self.chatbot.storage.tagger.get_text_index_string(text)
+    #
+    #         statement = self.get_preprocessed_statement(
+    #             Statement(
+    #                 text=text,
+    #                 search_text=statement_search_text,
+    #                 in_response_to=previous_statement_text,
+    #                 search_in_response_to=previous_statement_search_text,
+    #                 conversation='training'
+    #             )
+    #         )
+    #
+    #         previous_statement_text = statement.text
+    #         previous_statement_search_text = statement_search_text
+    #
+    #         statements_to_create.append(statement)
+    #
+    #     self.chatbot.storage.create_many(statements_to_create)
\ No newline at end of file

From a6ebe02233eadd97cedc3191b680d3a3040dd8fe Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Mon, 19 Oct 2020 16:09:21 -0400
Subject: [PATCH 03/21] Back to basics

---
 chatter/info.json | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/chatter/info.json b/chatter/info.json
index df77ee8..b79e587 100644
--- a/chatter/info.json
+++ b/chatter/info.json
@@ -17,8 +17,7 @@
     "pytz",
     "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz#egg=en_core_web_sm",
     "https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.3.1/en_core_web_md-2.3.1.tar.gz#egg=en_core_web_md",
-    "spacy>=2.3,<2.4",
-    "--no-deps \"chatterbot>=1.1\""
+    "spacy>=2.3,<2.4"
   ],
   "short": "Local Chatbot run on machine learning",
   "end_user_data_statement": "This cog only stores anonymous conversations data; no End User Data is stored.",

From 46342109604e2824a3bd011dfbd880fe3909e91c Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Mon, 19 Oct 2020 16:24:39 -0400
Subject: [PATCH 04/21] Add automatic install option

---
 chatter/README.md | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/chatter/README.md b/chatter/README.md
index 8ef6734..c831bb8 100644
--- a/chatter/README.md
+++ b/chatter/README.md
@@ -59,6 +59,35 @@ Install these on your windows machine before attempting the installation:
 [Pandoc - Universal Document Converter](https://pandoc.org/installing.html)
 
 ## Methods
+### Automatic
+
+This method requires some luck to pull off.
+
+#### Step 1: Add repo and install cog
+
+```
+[p]repo add Fox https://github.com/bobloy/Fox-V3
+[p]cog install Fox chatter
+```
+
+If you get an error at this step, stop and skip to one of the manual methods below.
+
+#### Step 2: Install additional dependencies
+
+Assuming the previous commands had no error, you can now use `pipinstall` to add the remaining dependencies.
+
+NOTE: This method is not the intended use case for `pipinstall` and may stop working in the future.
+
+```
+[p]pipinstall --no-deps chatterbot>=1.1
+```
+
+#### Step 3: Load the cog and get started
+
+```
+[p]load chatter
+```
+
 ### Windows - Manually
 #### Step 1: Built-in Downloader
 

From 14f8b825d8a5a81d12aa885da7236b13e97964d0 Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Tue, 2 Feb 2021 16:35:41 -0500
Subject: [PATCH 05/21] Fix bad learning and checks

---
 chatter/chat.py | 55 ++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 47 insertions(+), 8 deletions(-)

diff --git a/chatter/chat.py b/chatter/chat.py
index 0988d46..a0a5f28 100644
--- a/chatter/chat.py
+++ b/chatter/chat.py
@@ -2,8 +2,10 @@ import asyncio
 import logging
 import os
 import pathlib
+from collections import defaultdict
 from datetime import datetime, timedelta
-from typing import Optional
+from functools import partial
+from typing import Dict, Optional
 
 import discord
 from chatterbot import ChatBot
@@ -75,6 +77,10 @@ class Chatter(Cog):
 
         self.loop = asyncio.get_event_loop()
 
+        self._guild_cache = defaultdict(dict)
+
+        self._last_message_per_channel: Dict[Optional[discord.Message]] = defaultdict(lambda: None)
+
     async def red_delete_data_for_user(self, **kwargs):
         """Nothing to delete"""
         return
@@ -190,6 +196,7 @@ class Chatter(Cog):
         if ctx.invoked_subcommand is None:
             pass
 
+    @commands.admin()
     @chatter.command(name="channel")
     async def chatter_channel(
         self, ctx: commands.Context, channel: Optional[discord.TextChannel] = None
@@ -209,6 +216,7 @@ class Chatter(Cog):
             await self.config.guild(ctx.guild).chatchannel.set(channel.id)
             await ctx.maybe_send_embed(f"Chat channel is now {channel.mention}")
 
+    @commands.is_owner()
     @chatter.command(name="cleardata")
     async def chatter_cleardata(self, ctx: commands.Context, confirm: bool = False):
         """
@@ -241,6 +249,7 @@ class Chatter(Cog):
 
         await ctx.tick()
 
+    @commands.is_owner()
     @chatter.command(name="algorithm", aliases=["algo"])
     async def chatter_algorithm(
         self, ctx: commands.Context, algo_number: int, threshold: float = None
@@ -274,6 +283,7 @@ class Chatter(Cog):
 
             await ctx.tick()
 
+    @commands.is_owner()
     @chatter.command(name="model")
     async def chatter_model(self, ctx: commands.Context, model_number: int):
         """
@@ -311,6 +321,7 @@ class Chatter(Cog):
                 f"Model has been switched to {self.tagger_language.ISO_639_1}"
             )
 
+    @commands.is_owner()
     @chatter.command(name="minutes")
     async def minutes(self, ctx: commands.Context, minutes: int):
         """
@@ -322,10 +333,12 @@ class Chatter(Cog):
             await ctx.send_help()
             return
 
-        await self.config.guild(ctx.guild).convo_length.set(minutes)
+        await self.config.guild(ctx.guild).convo_delta.set(minutes)
+        self._guild_cache[ctx.guild.id]["convo_delta"] = minutes
 
         await ctx.tick()
 
+    @commands.is_owner()
     @chatter.command(name="age")
     async def age(self, ctx: commands.Context, days: int):
         """
@@ -340,6 +353,7 @@ class Chatter(Cog):
         await self.config.guild(ctx.guild).days.set(days)
         await ctx.tick()
 
+    @commands.is_owner()
     @chatter.command(name="backup")
     async def backup(self, ctx, backupname):
         """
@@ -361,6 +375,7 @@ class Chatter(Cog):
         else:
             await ctx.maybe_send_embed("Error occurred :(")
 
+    @commands.is_owner()
     @chatter.command(name="trainubuntu")
     async def chatter_train_ubuntu(self, ctx: commands.Context, confirmation: bool = False):
         """
@@ -382,6 +397,7 @@ class Chatter(Cog):
         else:
             await ctx.send("Error occurred :(")
 
+    @commands.is_owner()
     @chatter.command(name="trainenglish")
     async def chatter_train_english(self, ctx: commands.Context):
         """
@@ -395,6 +411,7 @@ class Chatter(Cog):
         else:
             await ctx.maybe_send_embed("Error occurred :(")
 
+    @commands.is_owner()
     @chatter.command()
     async def train(self, ctx: commands.Context, channel: discord.TextChannel):
         """
@@ -477,12 +494,34 @@ class Chatter(Cog):
 
         text = message.clean_content
 
-        async with channel.typing():
-            # Switched to `generate_response` from `get_result`
-            # Switch back once better conversation detection is used.
-            future = await self.loop.run_in_executor(None, self.chatbot.generate_response, text)
+        async with ctx.typing():
+
+            if not self._guild_cache[ctx.guild.id]:
+                self._guild_cache[ctx.guild.id] = await self.config.guild(ctx.guild).all()
+
+            if self._last_message_per_channel[ctx.channel.id] is not None:
+                last_m: discord.Message = self._last_message_per_channel[ctx.channel.id]
+                minutes = self._guild_cache[ctx.guild.id]["convo_delta"]
+                if (datetime.utcnow() - last_m.created_at).seconds > minutes*60:
+                    in_response_to = None
+                else:
+                    in_response_to = last_m.content
+            else:
+                in_response_to = None
+
+            if in_response_to is None:
+                log.debug("Generating response")
+                Statement = self.chatbot.storage.get_object('statement')
+                future = await self.loop.run_in_executor(
+                    None, self.chatbot.generate_response, Statement(text)
+                )
+            else:
+                log.debug("Getting response")
+                future = await self.loop.run_in_executor(
+                    None, partial(self.chatbot.get_response, text, in_response_to=in_response_to)
+                )
 
             if future and str(future):
-                await channel.send(str(future))
+                self._last_message_per_channel[ctx.channel.id] = await ctx.send(str(future))
             else:
-                await channel.send(":thinking:")
+                await ctx.send(":thinking:")

From 337def2fa32ebdcc788e5d785bd388537a6b4899 Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Mon, 15 Feb 2021 10:18:18 -0500
Subject: [PATCH 06/21] Some progress on updated ubuntu trainer

---
 chatter/chat.py     |  79 +++++++++++++++++++++---
 chatter/info.json   |   3 +-
 chatter/trainers.py | 142 +++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 213 insertions(+), 11 deletions(-)

diff --git a/chatter/chat.py b/chatter/chat.py
index a0a5f28..098ba73 100644
--- a/chatter/chat.py
+++ b/chatter/chat.py
@@ -17,7 +17,7 @@ from redbot.core.commands import Cog
 from redbot.core.data_manager import cog_data_path
 from redbot.core.utils.predicates import MessagePredicate
 
-from chatter.trainers import TwitterCorpusTrainer
+from chatter.trainers import TwitterCorpusTrainer, UbuntuCorpusTrainer2
 
 log = logging.getLogger("red.fox_v3.chatter")
 
@@ -168,6 +168,10 @@ class Chatter(Cog):
         trainer.train()
         return True
 
+    async def _train_ubuntu2(self):
+        trainer = UbuntuCorpusTrainer2(self.chatbot, cog_data_path(self))
+        await trainer.asynctrain()
+
     def _train_english(self):
         trainer = ChatterBotCorpusTrainer(self.chatbot)
         # try:
@@ -353,6 +357,15 @@ class Chatter(Cog):
         await self.config.guild(ctx.guild).days.set(days)
         await ctx.tick()
 
+    @commands.is_owner()
+    @chatter.command(name="kaggle")
+    async def chatter_kaggle(self, ctx: commands.Context):
+        """Register with the kaggle API to download additional datasets for training"""
+        if not await self.check_for_kaggle():
+            await ctx.maybe_send_embed(
+                "[Click here for instructions to setup the kaggle api](https://github.com/Kaggle/kaggle-api#api-credentials)"
+            )
+
     @commands.is_owner()
     @chatter.command(name="backup")
     async def backup(self, ctx, backupname):
@@ -376,7 +389,13 @@ class Chatter(Cog):
             await ctx.maybe_send_embed("Error occurred :(")
 
     @commands.is_owner()
-    @chatter.command(name="trainubuntu")
+    @chatter.group(name="train")
+    async def chatter_train(self, ctx: commands.Context):
+        """Commands for training the bot"""
+        pass
+
+    @commands.is_owner()
+    @chatter_train.command(name="ubuntu")
     async def chatter_train_ubuntu(self, ctx: commands.Context, confirmation: bool = False):
         """
         WARNING: Large Download! Trains the bot using Ubuntu Dialog Corpus data.
@@ -385,7 +404,7 @@ class Chatter(Cog):
         if not confirmation:
             await ctx.maybe_send_embed(
                 "Warning: This command downloads ~500MB then eats your CPU for training\n"
-                "If you're sure you want to continue, run `[p]chatter trainubuntu True`"
+                "If you're sure you want to continue, run `[p]chatter train ubuntu True`"
             )
             return
 
@@ -398,7 +417,29 @@ class Chatter(Cog):
             await ctx.send("Error occurred :(")
 
     @commands.is_owner()
-    @chatter.command(name="trainenglish")
+    @chatter_train.command(name="ubuntu2")
+    async def chatter_train_ubuntu2(self, ctx: commands.Context, confirmation: bool = False):
+        """
+        WARNING: Large Download! Trains the bot using *NEW* Ubuntu Dialog Corpus data.
+        """
+
+        if not confirmation:
+            await ctx.maybe_send_embed(
+                "Warning: This command downloads ~800 then eats your CPU for training\n"
+                "If you're sure you want to continue, run `[p]chatter train ubuntu2 True`"
+            )
+            return
+
+        async with ctx.typing():
+            future = await self._train_ubuntu2()
+
+        if future:
+            await ctx.send("Training successful!")
+        else:
+            await ctx.send("Error occurred :(")
+
+    @commands.is_owner()
+    @chatter_train.command(name="english")
     async def chatter_train_english(self, ctx: commands.Context):
         """
         Trains the bot in english
@@ -412,10 +453,27 @@ class Chatter(Cog):
             await ctx.maybe_send_embed("Error occurred :(")
 
     @commands.is_owner()
-    @chatter.command()
-    async def train(self, ctx: commands.Context, channel: discord.TextChannel):
+    @chatter_train.command(name="list")
+    async def chatter_train_list(self, ctx: commands.Context):
+        """Trains the bot based on an uploaded list.
+
+        Must be a file in the format of a python list: ['prompt', 'response1', 'response2']
         """
-        Trains the bot based on language in this guild
+        if not ctx.message.attachments:
+            await ctx.maybe_send_embed("You must upload a file when using this command")
+            return
+
+        attachment: discord.Attachment = ctx.message.attachments[0]
+
+        a_bytes = await attachment.read()
+
+        await ctx.send("Not yet implemented")
+
+    @commands.is_owner()
+    @chatter_train.command(name="channel")
+    async def chatter_train_channel(self, ctx: commands.Context, channel: discord.TextChannel):
+        """
+        Trains the bot based on language in this guild.
         """
 
         await ctx.maybe_send_embed(
@@ -502,7 +560,7 @@ class Chatter(Cog):
             if self._last_message_per_channel[ctx.channel.id] is not None:
                 last_m: discord.Message = self._last_message_per_channel[ctx.channel.id]
                 minutes = self._guild_cache[ctx.guild.id]["convo_delta"]
-                if (datetime.utcnow() - last_m.created_at).seconds > minutes*60:
+                if (datetime.utcnow() - last_m.created_at).seconds > minutes * 60:
                     in_response_to = None
                 else:
                     in_response_to = last_m.content
@@ -511,7 +569,7 @@ class Chatter(Cog):
 
             if in_response_to is None:
                 log.debug("Generating response")
-                Statement = self.chatbot.storage.get_object('statement')
+                Statement = self.chatbot.storage.get_object("statement")
                 future = await self.loop.run_in_executor(
                     None, self.chatbot.generate_response, Statement(text)
                 )
@@ -525,3 +583,6 @@ class Chatter(Cog):
                 self._last_message_per_channel[ctx.channel.id] = await ctx.send(str(future))
             else:
                 await ctx.send(":thinking:")
+
+    async def check_for_kaggle(self):
+        return False
diff --git a/chatter/info.json b/chatter/info.json
index b79e587..a048c23 100644
--- a/chatter/info.json
+++ b/chatter/info.json
@@ -17,7 +17,8 @@
     "pytz",
     "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz#egg=en_core_web_sm",
     "https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.3.1/en_core_web_md-2.3.1.tar.gz#egg=en_core_web_md",
-    "spacy>=2.3,<2.4"
+    "spacy>=2.3,<2.4",
+    "kaggle"
   ],
   "short": "Local Chatbot run on machine learning",
   "end_user_data_statement": "This cog only stores anonymous conversations data; no End User Data is stored.",
diff --git a/chatter/trainers.py b/chatter/trainers.py
index 42d6288..0b765b7 100644
--- a/chatter/trainers.py
+++ b/chatter/trainers.py
@@ -1,6 +1,146 @@
+import asyncio
+import csv
+import logging
+import os
+import pathlib
+import time
+from functools import partial
+
 from chatterbot import utils
 from chatterbot.conversation import Statement
+from chatterbot.tagging import PosLemmaTagger
 from chatterbot.trainers import Trainer
+from redbot.core.bot import Red
+from dateutil import parser as date_parser
+from redbot.core.utils import AsyncIter
+
+log = logging.getLogger("red.fox_v3.chatter.trainers")
+
+
+class KaggleTrainer(Trainer):
+    def __init__(self, chatbot, datapath: pathlib.Path, **kwargs):
+        super().__init__(chatbot, **kwargs)
+
+        self.data_directory = datapath / kwargs.get("downloadpath", "kaggle_download")
+
+        self.kaggle_dataset = kwargs.get(
+            "kaggle_dataset",
+            "Cornell-University/movie-dialog-corpus",
+        )
+
+        # Create the data directory if it does not already exist
+        if not os.path.exists(self.data_directory):
+            os.makedirs(self.data_directory)
+
+    def is_downloaded(self, file_path):
+        """
+        Check if the data file is already downloaded.
+        """
+        if os.path.exists(file_path):
+            self.chatbot.logger.info("File is already downloaded")
+            return True
+
+        return False
+
+    async def download(self, dataset):
+        import kaggle  # This triggers the API token check
+
+        future = await asyncio.get_event_loop().run_in_executor(
+            None,
+            partial(
+                kaggle.api.dataset_download_files,
+                dataset=dataset,
+                path=self.data_directory,
+                quiet=False,
+                unzip=True,
+            ),
+        )
+
+
+class UbuntuCorpusTrainer2(KaggleTrainer):
+    def __init__(self, chatbot, datapath: pathlib.Path, **kwargs):
+        super().__init__(
+            chatbot,
+            datapath,
+            downloadpath="ubuntu_data_v2",
+            kaggle_dataset="rtatman/ubuntu-dialogue-corpus",
+            **kwargs
+        )
+
+    async def asynctrain(self, *args, **kwargs):
+        extracted_dir = self.data_directory / "Ubuntu-dialogue-corpus"
+
+        # Download and extract the Ubuntu dialog corpus if needed
+        if not extracted_dir.exists():
+            await self.download(self.kaggle_dataset)
+        else:
+            log.info("Ubuntu dialogue already downloaded")
+        if not extracted_dir.exists():
+            raise FileNotFoundError("Did not extract in the expected way")
+
+        train_dialogue = kwargs.get("train_dialogue", True)
+        train_196_dialogue = kwargs.get("train_196", False)
+        train_301_dialogue = kwargs.get("train_301", False)
+
+        if train_dialogue:
+            await self.run_dialogue_training(extracted_dir, "dialogueText.csv")
+
+        if train_196_dialogue:
+            await self.run_dialogue_training(extracted_dir, "dialogueText_196.csv")
+
+        if train_301_dialogue:
+            await self.run_dialogue_training(extracted_dir, "dialogueText_301.csv")
+
+    async def run_dialogue_training(self, extracted_dir, dialogue_file):
+        log.info(f"Beginning dialogue training on {dialogue_file}")
+        start_time = time.time()
+
+        tagger = PosLemmaTagger(language=self.chatbot.storage.tagger.language)
+
+        with open(extracted_dir / dialogue_file, "r", encoding="utf-8") as dg:
+            reader = csv.DictReader(dg)
+
+            next(reader)  # Skip the header
+
+            last_dialogue_id = None
+            previous_statement_text = None
+            previous_statement_search_text = ""
+            statements_from_file = []
+
+            async for row in AsyncIter(reader):
+                dialogue_id = row["dialogueID"]
+                if dialogue_id != last_dialogue_id:
+                    previous_statement_text = None
+                    previous_statement_search_text = ""
+                    last_dialogue_id = dialogue_id
+
+                if len(row) > 0:
+                    statement = Statement(
+                        text=row["text"],
+                        in_response_to=previous_statement_text,
+                        conversation="training",
+                        created_at=date_parser.parse(row["date"]),
+                        persona=row["from"],
+                    )
+
+                    for preprocessor in self.chatbot.preprocessors:
+                        statement = preprocessor(statement)
+
+                    statement.search_text = tagger.get_text_index_string(statement.text)
+                    statement.search_in_response_to = previous_statement_search_text
+
+                    previous_statement_text = statement.text
+                    previous_statement_search_text = statement.search_text
+
+                    statements_from_file.append(statement)
+
+            if statements_from_file:
+                self.chatbot.storage.create_many(statements_from_file)
+
+        print("Training took", time.time() - start_time, "seconds.")
+
+    def train(self, *args, **kwargs):
+        log.error("See asynctrain instead")
 
 
 class TwitterCorpusTrainer(Trainer):
@@ -46,4 +186,4 @@ class TwitterCorpusTrainer(Trainer):
     #
     #         statements_to_create.append(statement)
     #
-    #     self.chatbot.storage.create_many(statements_to_create)
\ No newline at end of file
+    #     self.chatbot.storage.create_many(statements_to_create)

From 8acbc5d9645e1e23e65d60eb00e929d202c4a3e5 Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Mon, 15 Mar 2021 15:48:34 -0400
Subject: [PATCH 07/21] Whatever this commit is

---
 chatter/chat.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/chatter/chat.py b/chatter/chat.py
index e29c317..500284c 100644
--- a/chatter/chat.py
+++ b/chatter/chat.py
@@ -620,7 +620,9 @@ class Chatter(Cog):
                     replying = message
 
             if future and str(future):
-                self._last_message_per_channel[ctx.channel.id]  = await channel.send(str(future), reference=replying)
+                self._last_message_per_channel[ctx.channel.id] = await channel.send(
+                    str(future), reference=replying
+                )
             else:
                 await ctx.send(":thinking:")
 

From 7811c71edbcb7b059e0181fc87cfb3934ba95c53 Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Tue, 16 Mar 2021 16:00:42 -0400
Subject: [PATCH 08/21] Use is_reply to train

---
 chatter/chat.py | 37 ++++++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/chatter/chat.py b/chatter/chat.py
index 500284c..81d09a8 100644
--- a/chatter/chat.py
+++ b/chatter/chat.py
@@ -564,13 +564,13 @@ class Chatter(Cog):
         # Thank you Cog-Creators
         channel: discord.TextChannel = message.channel
 
-        # is_reply = False # this is only useful with in_response_to
+        is_reply = False  # this is only useful with in_response_to
         if (
             message.reference is not None
             and isinstance(message.reference.resolved, discord.Message)
             and message.reference.resolved.author.id == self.bot.user.id
         ):
-            # is_reply = True # this is only useful with in_response_to
+            is_reply = True  # this is only useful with in_response_to
             pass  # this is a reply to the bot, good to go
         elif guild is not None and channel.id == await self.config.guild(guild).chatchannel():
             pass  # good to go
@@ -592,7 +592,9 @@ class Chatter(Cog):
             if not self._guild_cache[ctx.guild.id]:
                 self._guild_cache[ctx.guild.id] = await self.config.guild(ctx.guild).all()
 
-            if self._last_message_per_channel[ctx.channel.id] is not None:
+            if is_reply:
+                in_response_to = message.reference.resolved.content
+            elif self._last_message_per_channel[ctx.channel.id] is not None:
                 last_m: discord.Message = self._last_message_per_channel[ctx.channel.id]
                 minutes = self._guild_cache[ctx.guild.id]["convo_delta"]
                 if (datetime.utcnow() - last_m.created_at).seconds > minutes * 60:
@@ -602,16 +604,25 @@ class Chatter(Cog):
             else:
                 in_response_to = None
 
-            if in_response_to is None:
-                log.debug("Generating response")
-                Statement = self.chatbot.storage.get_object("statement")
-                future = await self.loop.run_in_executor(
-                    None, self.chatbot.generate_response, Statement(text)
-                )
-            else:
-                log.debug("Getting response")
-                future = await self.loop.run_in_executor(
-                    None, partial(self.chatbot.get_response, text, in_response_to=in_response_to)
+            # Always use generate reponse
+            # Chatterbot tries to learn based on the result it comes up with, which is dumb
+            log.debug("Generating response")
+            Statement = self.chatbot.storage.get_object("statement")
+            future = await self.loop.run_in_executor(
+                None, self.chatbot.generate_response, Statement(text)
+            )
+
+            if in_response_to is not None:
+                log.debug("learning response")
+                learning_task = asyncio.create_task(
+                    self.loop.run_in_executor(
+                        None,
+                        partial(
+                            self.chatbot.learn_response,
+                            Statement(text),
+                            previous_statement=in_response_to,
+                        ),
+                    )
                 )
 
             replying = None

From dad14fe972fa9382b58adcc226d65e0cca4ad620 Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Thu, 18 Mar 2021 16:08:10 -0400
Subject: [PATCH 09/21] black reformatting

---
 chatter/trainers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chatter/trainers.py b/chatter/trainers.py
index 0b765b7..dc0e0b1 100644
--- a/chatter/trainers.py
+++ b/chatter/trainers.py
@@ -64,7 +64,7 @@ class UbuntuCorpusTrainer2(KaggleTrainer):
             datapath,
             downloadpath="ubuntu_data_v2",
             kaggle_dataset="rtatman/ubuntu-dialogue-corpus",
-            **kwargs
+            **kwargs,
         )
 
     async def asynctrain(self, *args, **kwargs):

From 8200cd9af1dfd33edd04e85e1f34af5988214501 Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Fri, 19 Mar 2021 15:54:19 -0400
Subject: [PATCH 10/21] Run futures correctly

---
 chatter/chat.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/chatter/chat.py b/chatter/chat.py
index 727efc2..7d3c40f 100644
--- a/chatter/chat.py
+++ b/chatter/chat.py
@@ -613,15 +613,13 @@ class Chatter(Cog):
 
             if in_response_to is not None:
                 log.debug("learning response")
-                learning_task = asyncio.create_task(
-                    self.loop.run_in_executor(
-                        None,
-                        partial(
-                            self.chatbot.learn_response,
-                            Statement(text),
-                            previous_statement=in_response_to,
-                        ),
-                    )
+                await self.loop.run_in_executor(
+                    None,
+                    partial(
+                        self.chatbot.learn_response,
+                        Statement(text),
+                        previous_statement=in_response_to,
+                    ),
                 )
 
             replying = None
@@ -637,4 +635,6 @@ class Chatter(Cog):
                 await ctx.send(":thinking:")
 
     async def check_for_kaggle(self):
+        """Check whether Kaggle is installed and configured properly"""
+        # TODO: This
         return False

From eac7aee82c4ab29a40f79d2f1dbb16556d58672f Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Fri, 19 Mar 2021 15:54:35 -0400
Subject: [PATCH 11/21] Save every 50 instead of all at once, so it can be
 cancelled

---
 chatter/trainers.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/chatter/trainers.py b/chatter/trainers.py
index dc0e0b1..1fe5f62 100644
--- a/chatter/trainers.py
+++ b/chatter/trainers.py
@@ -107,19 +107,27 @@ class UbuntuCorpusTrainer2(KaggleTrainer):
             previous_statement_search_text = ""
             statements_from_file = []
 
+            save_every = 50
+            count = 0
+
             async for row in AsyncIter(reader):
                 dialogue_id = row["dialogueID"]
                 if dialogue_id != last_dialogue_id:
                     previous_statement_text = None
                     previous_statement_search_text = ""
                     last_dialogue_id = dialogue_id
+                    count += 1
+                    if count >= save_every:
+                        if statements_from_file:
+                            self.chatbot.storage.create_many(statements_from_file)
+                        count = 0
 
                 if len(row) > 0:
                     statement = Statement(
                         text=row["text"],
                         in_response_to=previous_statement_text,
                         conversation="training",
-                        created_at=date_parser.parse(row["date"]),
+                        # created_at=date_parser.parse(row["date"]),
                         persona=row["from"],
                     )
 

From 04ccb435f8512b79a1c02759cd8a459d04f120a0 Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Thu, 25 Mar 2021 09:51:41 -0400
Subject: [PATCH 12/21] Implement `check_same_thread` = False storage adapter.

Add start of AsyncSQLStorageAdapter
---
 chatter/storage_adapters.py | 73 +++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 chatter/storage_adapters.py

diff --git a/chatter/storage_adapters.py b/chatter/storage_adapters.py
new file mode 100644
index 0000000..4de2f00
--- /dev/null
+++ b/chatter/storage_adapters.py
@@ -0,0 +1,73 @@
+from chatterbot.storage import StorageAdapter, SQLStorageAdapter
+
+
+class MyDumbSQLStorageAdapter(SQLStorageAdapter):
+    def __init__(self, **kwargs):
+        super(SQLStorageAdapter, self).__init__(**kwargs)
+
+        from sqlalchemy import create_engine
+        from sqlalchemy.orm import sessionmaker
+
+        self.database_uri = kwargs.get("database_uri", False)
+
+        # None results in a sqlite in-memory database as the default
+        if self.database_uri is None:
+            self.database_uri = "sqlite://"
+
+        # Create a file database if the database is not a connection string
+        if not self.database_uri:
+            self.database_uri = "sqlite:///db.sqlite3"
+
+        self.engine = create_engine(
+            self.database_uri, convert_unicode=True, connect_args={"check_same_thread": False}
+        )
+
+        if self.database_uri.startswith("sqlite://"):
+            from sqlalchemy.engine import Engine
+            from sqlalchemy import event
+
+            @event.listens_for(Engine, "connect")
+            def set_sqlite_pragma(dbapi_connection, connection_record):
+                dbapi_connection.execute("PRAGMA journal_mode=WAL")
+                dbapi_connection.execute("PRAGMA synchronous=NORMAL")
+
+        if not self.engine.dialect.has_table(self.engine, "Statement"):
+            self.create_database()
+
+        self.Session = sessionmaker(bind=self.engine, expire_on_commit=True)
+
+
+class AsyncSQLStorageAdapter(SQLStorageAdapter):
+    def __init__(self, **kwargs):
+        super(SQLStorageAdapter, self).__init__(**kwargs)
+
+        self.database_uri = kwargs.get("database_uri", False)
+
+        # None results in a sqlite in-memory database as the default
+        if self.database_uri is None:
+            self.database_uri = "sqlite://"
+
+        # Create a file database if the database is not a connection string
+        if not self.database_uri:
+            self.database_uri = "sqlite:///db.sqlite3"
+
+    async def initialize(self):
+        # from sqlalchemy import create_engine
+        from aiomysql.sa import create_engine
+        from sqlalchemy.orm import sessionmaker
+
+        self.engine = await create_engine(self.database_uri, convert_unicode=True)
+
+        if self.database_uri.startswith("sqlite://"):
+            from sqlalchemy.engine import Engine
+            from sqlalchemy import event
+
+            @event.listens_for(Engine, "connect")
+            def set_sqlite_pragma(dbapi_connection, connection_record):
+                dbapi_connection.execute("PRAGMA journal_mode=WAL")
+                dbapi_connection.execute("PRAGMA synchronous=NORMAL")
+
+        if not self.engine.dialect.has_table(self.engine, "Statement"):
+            self.create_database()
+
+        self.Session = sessionmaker(bind=self.engine, expire_on_commit=True)

From 8feb21e34b70f26acf12c7d5af46e673032c9dc6 Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Thu, 25 Mar 2021 09:52:20 -0400
Subject: [PATCH 13/21] Add new kaggle trainers

---
 chatter/trainers.py | 155 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 151 insertions(+), 4 deletions(-)

diff --git a/chatter/trainers.py b/chatter/trainers.py
index 1fe5f62..d8de22c 100644
--- a/chatter/trainers.py
+++ b/chatter/trainers.py
@@ -1,5 +1,6 @@
 import asyncio
 import csv
+import html
 import logging
 import os
 import pathlib
@@ -56,13 +57,159 @@ class KaggleTrainer(Trainer):
             ),
         )
 
+    def train(self, *args, **kwargs):
+        log.error("See asynctrain instead")
+
+    def asynctrain(self, *args, **kwargs):
+        raise self.TrainerInitializationException()
+
+
+class SouthParkTrainer(KaggleTrainer):
+    def __init__(self, chatbot, datapath: pathlib.Path, **kwargs):
+        super().__init__(
+            chatbot,
+            datapath,
+            downloadpath="ubuntu_data_v2",
+            kaggle_dataset="tovarischsukhov/southparklines",
+            **kwargs,
+        )
+
+
+class MovieTrainer(KaggleTrainer):
+    def __init__(self, chatbot, datapath: pathlib.Path, **kwargs):
+        super().__init__(
+            chatbot,
+            datapath,
+            downloadpath="kaggle_movies",
+            kaggle_dataset="Cornell-University/movie-dialog-corpus",
+            **kwargs,
+        )
+
+    async def run_movie_training(self):
+        dialogue_file = "movie_lines.tsv"
+        conversation_file = "movie_conversations.tsv"
+        log.info(f"Beginning dialogue training on {dialogue_file}")
+        start_time = time.time()
+
+        tagger = PosLemmaTagger(language=self.chatbot.storage.tagger.language)
+
+        # [lineID, characterID, movieID, character name, text of utterance]
+        # File parsing from https://www.kaggle.com/mushaya/conversation-chatbot
+
+        with open(self.data_directory / conversation_file, "r", encoding="utf-8-sig") as conv_tsv:
+            conv_lines = conv_tsv.readlines()
+        with open(self.data_directory / dialogue_file, "r", encoding="utf-8-sig") as lines_tsv:
+            dialog_lines = lines_tsv.readlines()
+
+        # trans_dict = str.maketrans({"<u>": "__", "</u>": "__", '""': '"'})
+
+        lines_dict = {}
+        for line in dialog_lines:
+            _line = line[:-1].strip('"').split("\t")
+            if len(_line) >= 5:  # Only good lines
+                lines_dict[_line[0]] = (
+                    html.unescape(("".join(_line[4:])).strip())
+                    .replace("<u>", "__")
+                    .replace("</u>", "__")
+                    .replace('""', '"')
+                )
+            else:
+                log.debug(f"Bad line {_line}")
+
+        # collecting line ids for each conversation
+        conv = []
+        for line in conv_lines[:-1]:
+            _line = line[:-1].split("\t")[-1][1:-1].replace("'", "").replace(" ", ",")
+            conv.append(_line.split(","))
+
+        # conversations = csv.reader(conv_tsv, delimiter="\t")
+        #
+        # reader = csv.reader(lines_tsv, delimiter="\t")
+        #
+        #
+        #
+        # lines_dict = {}
+        # for row in reader:
+        #     try:
+        #         lines_dict[row[0].strip('"')] = row[4]
+        #     except:
+        #         log.exception(f"Bad line: {row}")
+        #         pass
+        #     else:
+        #         # print(f"Good line: {row}")
+        #         pass
+        #
+        # # lines_dict = {row[0].strip('"'): row[4] for row in reader_list}
+
+        statements_from_file = []
+
+        # [characterID of first, characterID of second, movieID, list of utterances]
+        async for lines in AsyncIter(conv):
+            previous_statement_text = None
+            previous_statement_search_text = ""
+
+            for line in lines:
+                text = lines_dict[line]
+                statement = Statement(
+                    text=text,
+                    in_response_to=previous_statement_text,
+                    conversation="training",
+                )
+
+                for preprocessor in self.chatbot.preprocessors:
+                    statement = preprocessor(statement)
+
+                statement.search_text = tagger.get_text_index_string(statement.text)
+                statement.search_in_response_to = previous_statement_search_text
+
+                previous_statement_text = statement.text
+                previous_statement_search_text = statement.search_text
+
+                statements_from_file.append(statement)
+
+            if statements_from_file:
+                print(statements_from_file)
+                self.chatbot.storage.create_many(statements_from_file)
+                statements_from_file = []
+
+        print("Training took", time.time() - start_time, "seconds.")
+
+    async def asynctrain(self, *args, **kwargs):
+        extracted_lines = self.data_directory / "movie_lines.tsv"
+        extracted_lines: pathlib.Path
+
+        # Download and extract the Ubuntu dialog corpus if needed
+        if not extracted_lines.exists():
+            await self.download(self.kaggle_dataset)
+        else:
+            log.info("Movie dialog already downloaded")
+        if not extracted_lines.exists():
+            raise FileNotFoundError(f"{extracted_lines}")
+
+        await self.run_movie_training()
+
+        return True
+
+        # train_dialogue = kwargs.get("train_dialogue", True)
+        # train_196_dialogue = kwargs.get("train_196", False)
+        # train_301_dialogue = kwargs.get("train_301", False)
+        #
+        # if train_dialogue:
+        #     await self.run_dialogue_training(extracted_dir, "dialogueText.csv")
+        #
+        # if train_196_dialogue:
+        #     await self.run_dialogue_training(extracted_dir, "dialogueText_196.csv")
+        #
+        # if train_301_dialogue:
+        #     await self.run_dialogue_training(extracted_dir, "dialogueText_301.csv")
+
 
 class UbuntuCorpusTrainer2(KaggleTrainer):
     def __init__(self, chatbot, datapath: pathlib.Path, **kwargs):
         super().__init__(
             chatbot,
             datapath,
-            downloadpath="ubuntu_data_v2",
+            downloadpath="kaggle_ubuntu",
             kaggle_dataset="rtatman/ubuntu-dialogue-corpus",
             **kwargs,
         )
@@ -91,6 +238,8 @@ class UbuntuCorpusTrainer2(KaggleTrainer):
         if train_301_dialogue:
             await self.run_dialogue_training(extracted_dir, "dialogueText_301.csv")
 
+        return True
+
     async def run_dialogue_training(self, extracted_dir, dialogue_file):
         log.info(f"Beginning dialogue training on {dialogue_file}")
         start_time = time.time()
@@ -120,6 +269,7 @@ class UbuntuCorpusTrainer2(KaggleTrainer):
                     if count >= save_every:
                         if statements_from_file:
                             self.chatbot.storage.create_many(statements_from_file)
+                            statements_from_file = []
                         count = 0
 
                 if len(row) > 0:
@@ -147,9 +297,6 @@ class UbuntuCorpusTrainer2(KaggleTrainer):
 
         print("Training took", time.time() - start_time, "seconds.")
 
-    def train(self, *args, **kwargs):
-        log.error("See asynctrain instead")
-
 
 class TwitterCorpusTrainer(Trainer):
     pass

From ac9cf1e589308e3489a4e4b2d3759faa129009f9 Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Thu, 25 Mar 2021 09:52:43 -0400
Subject: [PATCH 14/21] Implement movie trainer, guild cache, and learning
 toggle

---
 chatter/chat.py | 161 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 117 insertions(+), 44 deletions(-)

diff --git a/chatter/chat.py b/chatter/chat.py
index 7d3c40f..65966fa 100644
--- a/chatter/chat.py
+++ b/chatter/chat.py
@@ -17,7 +17,7 @@ from redbot.core.commands import Cog
 from redbot.core.data_manager import cog_data_path
 from redbot.core.utils.predicates import MessagePredicate
 
-from chatter.trainers import TwitterCorpusTrainer, UbuntuCorpusTrainer2
+from chatter.trainers import MovieTrainer, TwitterCorpusTrainer, UbuntuCorpusTrainer2
 
 log = logging.getLogger("red.fox_v3.chatter")
 
@@ -63,6 +63,7 @@ class Chatter(Cog):
             "convo_delta": 15,
             "chatchannel": None,
             "reply": True,
+            "learning": True,
         }
         path: pathlib.Path = cog_data_path(self)
         self.data_path = path / "database.sqlite3"
@@ -95,7 +96,8 @@ class Chatter(Cog):
 
         return ChatBot(
             "ChatterBot",
-            storage_adapter="chatterbot.storage.SQLStorageAdapter",
+            # storage_adapter="chatterbot.storage.SQLStorageAdapter",
+            storage_adapter="chatter.storage_adapters.MyDumbSQLStorageAdapter",
             database_uri="sqlite:///" + str(self.data_path),
             statement_comparison_function=self.similarity_algo,
             response_selection_method=get_random_response,
@@ -176,10 +178,30 @@ class Chatter(Cog):
         trainer.train()
         return True
 
-    async def _train_ubuntu2(self):
-        trainer = UbuntuCorpusTrainer2(self.chatbot, cog_data_path(self))
+    async def _train_movies(self):
+        trainer = MovieTrainer(self.chatbot, cog_data_path(self))
         await trainer.asynctrain()
 
+    async def _train_ubuntu2(self, intensity):
+        train_kwarg = {}
+        if intensity == 196:
+            train_kwarg["train_dialogue"] = False
+            train_kwarg["train_196"] = True
+        elif intensity == 301:
+            train_kwarg["train_dialogue"] = False
+            train_kwarg["train_301"] = True
+        elif intensity == 497:
+            train_kwarg["train_dialogue"] = False
+            train_kwarg["train_196"] = True
+            train_kwarg["train_301"] = True
+        elif intensity >= 9000:  # NOT 9000!
+            train_kwarg["train_dialogue"] = True
+            train_kwarg["train_196"] = True
+            train_kwarg["train_301"] = True
+
+        trainer = UbuntuCorpusTrainer2(self.chatbot, cog_data_path(self))
+        return await trainer.asynctrain(**train_kwarg)
+
     def _train_english(self):
         trainer = ChatterBotCorpusTrainer(self.chatbot)
         # try:
@@ -205,7 +227,7 @@ class Chatter(Cog):
         """
         Base command for this cog. Check help for the commands list.
         """
-        pass
+        self._guild_cache[ctx.guild.id] = {}  # Clear cache when modifying values
 
     @commands.admin()
     @chatter.command(name="channel")
@@ -240,19 +262,39 @@ class Chatter(Cog):
         await self.config.guild(ctx.guild).reply.set(toggle)
 
         if toggle:
-            await ctx.send("I will now respond to you if conversation continuity is not present")
+            await ctx.maybe_send_embed("I will now respond to you if conversation continuity is not present")
         else:
-            await ctx.send(
+            await ctx.maybe_send_embed(
                 "I will not reply to your message if conversation continuity is not present, anymore"
             )
 
+    @commands.admin()
+    @chatter.command(name="learning")
+    async def chatter_learning(self, ctx: commands.Context, toggle: Optional[bool] = None):
+        """
+        Toggle the bot learning from its conversations.
+
+        This is on by default.
+        """
+        learning = await self.config.guild(ctx.guild).learning()
+        if toggle is None:
+            toggle = not learning
+        await self.config.guild(ctx.guild).learning.set(toggle)
+
+        if toggle:
+            await ctx.maybe_send_embed("I will now learn from conversations.")
+        else:
+            await ctx.maybe_send_embed("I will no longer learn from conversations.")
+
     @commands.is_owner()
     @chatter.command(name="cleardata")
     async def chatter_cleardata(self, ctx: commands.Context, confirm: bool = False):
         """
-        This command will erase all training data and reset your configuration settings
+        This command will erase all training data and reset your configuration settings.
 
-        Use `[p]chatter cleardata True`
+        This applies to all guilds.
+
+        Use `[p]chatter cleardata True` to confirm.
         """
 
         if not confirm:
@@ -364,7 +406,6 @@ class Chatter(Cog):
             return
 
         await self.config.guild(ctx.guild).convo_delta.set(minutes)
-        self._guild_cache[ctx.guild.id]["convo_delta"] = minutes
 
         await ctx.tick()
 
@@ -420,7 +461,64 @@ class Chatter(Cog):
         """Commands for training the bot"""
         pass
 
-    @commands.is_owner()
+    @chatter_train.group(name="kaggle")
+    async def chatter_train_kaggle(self, ctx: commands.Context):
+        """
+        Base command for kaggle training sets.
+
+        See `[p]chatter kaggle` for details on how to enable this option
+        """
+        pass
+
+    @chatter_train_kaggle.command(name="ubuntu")
+    async def chatter_train_kaggle_ubuntu(
+        self, ctx: commands.Context, confirmation: bool = False, intensity=0
+    ):
+        """
+        WARNING: Large Download! Trains the bot using *NEW* Ubuntu Dialog Corpus data.
+        """
+
+        if not confirmation:
+            await ctx.maybe_send_embed(
+                "Warning: This command downloads ~800 then eats your CPU for training\n"
+                "If you're sure you want to continue, run `[p]chatter train kaggle ubuntu True`"
+            )
+            return
+
+        async with ctx.typing():
+            future = await self._train_ubuntu2(intensity)
+
+        if future:
+            await ctx.maybe_send_embed("Training successful!")
+        else:
+            await ctx.maybe_send_embed("Error occurred :(")
+
+    @chatter_train_kaggle.command(name="movies")
+    async def chatter_train_kaggle_movies(self, ctx: commands.Context, confirmation: bool = False):
+        """
+        WARNING: Language! Trains the bot using Cornell University's "Movie Dialog Corpus".
+
+        This training set contains dialog from a spread of movies with different MPAA.
+        This dialog includes racism, sexism, and any number of sensitive topics.
+
+        Use at your own risk.
+        """
+
+        if not confirmation:
+            await ctx.maybe_send_embed(
+                "Warning: This command downloads ~800 then eats your CPU for training\n"
+                "If you're sure you want to continue, run `[p]chatter train kaggle movies True`"
+            )
+            return
+
+        async with ctx.typing():
+            future = await self._train_movies()
+
+        if future:
+            await ctx.maybe_send_embed("Training successful!")
+        else:
+            await ctx.maybe_send_embed("Error occurred :(")
+
     @chatter_train.command(name="ubuntu")
     async def chatter_train_ubuntu(self, ctx: commands.Context, confirmation: bool = False):
         """
@@ -438,33 +536,10 @@ class Chatter(Cog):
             future = await self.loop.run_in_executor(None, self._train_ubuntu)
 
         if future:
-            await ctx.send("Training successful!")
+            await ctx.maybe_send_embed("Training successful!")
         else:
-            await ctx.send("Error occurred :(")
+            await ctx.maybe_send_embed("Error occurred :(")
 
-    @commands.is_owner()
-    @chatter_train.command(name="ubuntu2")
-    async def chatter_train_ubuntu2(self, ctx: commands.Context, confirmation: bool = False):
-        """
-        WARNING: Large Download! Trains the bot using *NEW* Ubuntu Dialog Corpus data.
-        """
-
-        if not confirmation:
-            await ctx.maybe_send_embed(
-                "Warning: This command downloads ~800 then eats your CPU for training\n"
-                "If you're sure you want to continue, run `[p]chatter train ubuntu2 True`"
-            )
-            return
-
-        async with ctx.typing():
-            future = await self._train_ubuntu2()
-
-        if future:
-            await ctx.send("Training successful!")
-        else:
-            await ctx.send("Error occurred :(")
-
-    @commands.is_owner()
     @chatter_train.command(name="english")
     async def chatter_train_english(self, ctx: commands.Context):
         """
@@ -478,7 +553,6 @@ class Chatter(Cog):
         else:
             await ctx.maybe_send_embed("Error occurred :(")
 
-    @commands.is_owner()
     @chatter_train.command(name="list")
     async def chatter_train_list(self, ctx: commands.Context):
         """Trains the bot based on an uploaded list.
@@ -495,7 +569,6 @@ class Chatter(Cog):
 
         await ctx.send("Not yet implemented")
 
-    @commands.is_owner()
     @chatter_train.command(name="channel")
     async def chatter_train_channel(self, ctx: commands.Context, channel: discord.TextChannel):
         """
@@ -563,6 +636,9 @@ class Chatter(Cog):
         # Thank you Cog-Creators
         channel: discord.TextChannel = message.channel
 
+        if not self._guild_cache[guild.id]:
+            self._guild_cache[guild.id] = await self.config.guild(guild).all()
+
         is_reply = False  # this is only useful with in_response_to
         if (
             message.reference is not None
@@ -571,7 +647,7 @@ class Chatter(Cog):
         ):
             is_reply = True  # this is only useful with in_response_to
             pass  # this is a reply to the bot, good to go
-        elif guild is not None and channel.id == await self.config.guild(guild).chatchannel():
+        elif guild is not None and channel.id == self._guild_cache[guild.id]["chatchannel"]:
             pass  # good to go
         else:
             when_mentionables = commands.when_mentioned(self.bot, message)
@@ -588,9 +664,6 @@ class Chatter(Cog):
 
         async with ctx.typing():
 
-            if not self._guild_cache[ctx.guild.id]:
-                self._guild_cache[ctx.guild.id] = await self.config.guild(ctx.guild).all()
-
             if is_reply:
                 in_response_to = message.reference.resolved.content
             elif self._last_message_per_channel[ctx.channel.id] is not None:
@@ -611,7 +684,7 @@ class Chatter(Cog):
                 None, self.chatbot.generate_response, Statement(text)
             )
 
-            if in_response_to is not None:
+            if in_response_to is not None and self._guild_cache[guild.id]["learning"]:
                 log.debug("learning response")
                 await self.loop.run_in_executor(
                     None,
@@ -623,7 +696,7 @@ class Chatter(Cog):
                 )
 
             replying = None
-            if await self.config.guild(guild).reply():
+            if self._guild_cache[guild.id]["reply"]:
                 if message != ctx.channel.last_message:
                     replying = message
 

From b4f20dd7d283ed64ab7429824839a533c8abf2e7 Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Thu, 25 Mar 2021 09:54:14 -0400
Subject: [PATCH 15/21] Don't print everything, use log

---
 chatter/trainers.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/chatter/trainers.py b/chatter/trainers.py
index d8de22c..962fa08 100644
--- a/chatter/trainers.py
+++ b/chatter/trainers.py
@@ -136,7 +136,7 @@ class MovieTrainer(KaggleTrainer):
         #         log.exception(f"Bad line: {row}")
         #         pass
         #     else:
-        #         # print(f"Good line: {row}")
+        #         # log.info(f"Good line: {row}")
         #         pass
         #
         # # lines_dict = {row[0].strip('"'): row[4] for row in reader_list}
@@ -168,11 +168,10 @@ class MovieTrainer(KaggleTrainer):
                 statements_from_file.append(statement)
 
             if statements_from_file:
-                print(statements_from_file)
                 self.chatbot.storage.create_many(statements_from_file)
                 statements_from_file = []
 
-        print("Training took", time.time() - start_time, "seconds.")
+        log.info("Training took", time.time() - start_time, "seconds.")
 
     async def asynctrain(self, *args, **kwargs):
         extracted_lines = self.data_directory / "movie_lines.tsv"
@@ -295,7 +294,7 @@ class UbuntuCorpusTrainer2(KaggleTrainer):
             if statements_from_file:
                 self.chatbot.storage.create_many(statements_from_file)
 
-        print("Training took", time.time() - start_time, "seconds.")
+        log.info("Training took", time.time() - start_time, "seconds.")
 
 
 class TwitterCorpusTrainer(Trainer):

From 59fd96fc5af9d1a0ab9e5c70199f40369381c6ba Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Thu, 25 Mar 2021 10:01:56 -0400
Subject: [PATCH 16/21] add save_every for less disk intensive work.

---
 chatter/trainers.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/chatter/trainers.py b/chatter/trainers.py
index 962fa08..adf042f 100644
--- a/chatter/trainers.py
+++ b/chatter/trainers.py
@@ -142,6 +142,8 @@ class MovieTrainer(KaggleTrainer):
         # # lines_dict = {row[0].strip('"'): row[4] for row in reader_list}
 
         statements_from_file = []
+        save_every = 50
+        count = 0
 
         # [characterID of first, characterID of second, movieID, list of utterances]
         async for lines in AsyncIter(conv):
@@ -167,9 +169,15 @@ class MovieTrainer(KaggleTrainer):
 
                 statements_from_file.append(statement)
 
-            if statements_from_file:
-                self.chatbot.storage.create_many(statements_from_file)
-                statements_from_file = []
+            count += 1
+            if count >= save_every:
+                if statements_from_file:
+                    self.chatbot.storage.create_many(statements_from_file)
+                    statements_from_file = []
+                count = 0
+
+        if statements_from_file:
+            self.chatbot.storage.create_many(statements_from_file)
 
         log.info("Training took", time.time() - start_time, "seconds.")
 

From 802929d757458f9ff4fac99203ced1f033c9bdbc Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Thu, 25 Mar 2021 10:02:02 -0400
Subject: [PATCH 17/21] better wording

---
 chatter/chat.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/chatter/chat.py b/chatter/chat.py
index 65966fa..9e3379f 100644
--- a/chatter/chat.py
+++ b/chatter/chat.py
@@ -480,7 +480,7 @@ class Chatter(Cog):
 
         if not confirmation:
             await ctx.maybe_send_embed(
-                "Warning: This command downloads ~800 then eats your CPU for training\n"
+                "Warning: This command downloads ~800MB and is CPU intensive during training\n"
                 "If you're sure you want to continue, run `[p]chatter train kaggle ubuntu True`"
             )
             return
@@ -506,7 +506,7 @@ class Chatter(Cog):
 
         if not confirmation:
             await ctx.maybe_send_embed(
-                "Warning: This command downloads ~800 then eats your CPU for training\n"
+                "Warning: This command downloads ~29MB and is CPU intensive during training\n"
                 "If you're sure you want to continue, run `[p]chatter train kaggle movies True`"
             )
             return
@@ -527,7 +527,7 @@ class Chatter(Cog):
 
         if not confirmation:
             await ctx.maybe_send_embed(
-                "Warning: This command downloads ~500MB then eats your CPU for training\n"
+                "Warning: This command downloads ~500MB and is CPU intensive during training\n"
                 "If you're sure you want to continue, run `[p]chatter train ubuntu True`"
             )
             return

From 1319d98972e0b79677a34585fc0b1be2786802e2 Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Thu, 25 Mar 2021 10:56:48 -0400
Subject: [PATCH 18/21] Less often, still writing too much.

---
 chatter/trainers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chatter/trainers.py b/chatter/trainers.py
index adf042f..4f80b79 100644
--- a/chatter/trainers.py
+++ b/chatter/trainers.py
@@ -142,7 +142,7 @@ class MovieTrainer(KaggleTrainer):
         # # lines_dict = {row[0].strip('"'): row[4] for row in reader_list}
 
         statements_from_file = []
-        save_every = 50
+        save_every = 300
         count = 0
 
         # [characterID of first, characterID of second, movieID, list of utterances]

From db24bb4db4f81d2248b82219d7953798be4dc585 Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Thu, 25 Mar 2021 10:57:35 -0400
Subject: [PATCH 19/21] No differences

---
 chatter/chat.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/chatter/chat.py b/chatter/chat.py
index 9e3379f..fe50588 100644
--- a/chatter/chat.py
+++ b/chatter/chat.py
@@ -262,7 +262,9 @@ class Chatter(Cog):
         await self.config.guild(ctx.guild).reply.set(toggle)
 
         if toggle:
-            await ctx.maybe_send_embed("I will now respond to you if conversation continuity is not present")
+            await ctx.maybe_send_embed(
+                "I will now respond to you if conversation continuity is not present"
+            )
         else:
             await ctx.maybe_send_embed(
                 "I will not reply to your message if conversation continuity is not present, anymore"

From 87187abbb3423fc6539864c4239b858d54b280e7 Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Thu, 25 Mar 2021 11:11:57 -0400
Subject: [PATCH 20/21] Fix logging

---
 chatter/trainers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chatter/trainers.py b/chatter/trainers.py
index 4f80b79..3cc92da 100644
--- a/chatter/trainers.py
+++ b/chatter/trainers.py
@@ -179,7 +179,7 @@ class MovieTrainer(KaggleTrainer):
         if statements_from_file:
             self.chatbot.storage.create_many(statements_from_file)
 
-        log.info("Training took", time.time() - start_time, "seconds.")
+        log.info(f"Training took {time.time() - start_time} seconds.")
 
     async def asynctrain(self, *args, **kwargs):
         extracted_lines = self.data_directory / "movie_lines.tsv"
@@ -302,7 +302,7 @@ class UbuntuCorpusTrainer2(KaggleTrainer):
             if statements_from_file:
                 self.chatbot.storage.create_many(statements_from_file)
 
-        log.info("Training took", time.time() - start_time, "seconds.")
+        log.info(f"Training took {time.time() - start_time} seconds.")
 
 
 class TwitterCorpusTrainer(Trainer):

From e1297a4dcaec7b12bc1958a728f47d96cfdac5dc Mon Sep 17 00:00:00 2001
From: bobloy <alboblexloy@gmail.com>
Date: Thu, 25 Mar 2021 11:12:05 -0400
Subject: [PATCH 21/21] Return success value

---
 chatter/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chatter/chat.py b/chatter/chat.py
index fe50588..d999d94 100644
--- a/chatter/chat.py
+++ b/chatter/chat.py
@@ -180,7 +180,7 @@ class Chatter(Cog):
 
     async def _train_movies(self):
         trainer = MovieTrainer(self.chatbot, cog_data_path(self))
-        await trainer.asynctrain()
+        return await trainer.asynctrain()
 
     async def _train_ubuntu2(self, intensity):
         train_kwarg = {}