From 25044d16d43bb121141891fbf7d553ab69cca904 Mon Sep 17 00:00:00 2001 From: WhatDidYouExpect <89535984+WhatDidYouExpect@users.noreply.github.com> Date: Wed, 2 Jul 2025 01:32:45 +0200 Subject: [PATCH 1/3] changed sentenceprocessing.py to use spaCy instead of NLTK for both aspects --- example.env | 3 -- modules/globalvars.py | 2 +- modules/sentenceprocessing.py | 78 +++++++++++------------------------ requirements.txt | 3 +- 4 files changed, 26 insertions(+), 60 deletions(-) diff --git a/example.env b/example.env index 7339bf4..343e0c3 100644 --- a/example.env +++ b/example.env @@ -2,9 +2,6 @@ DISCORD_BOT_TOKEN=token BOT_PREFIX="g." PING_LINE="The Beretta fires fast and won't make you feel any better!" BLACKLISTED_USERS= -cooldown=10800 -hourlyspeak=1318263176134918246 -ownerid=542701119948849163 USERTRAIN_ENABLED="true" showmemenabled="true" NAME="an instance of goober" diff --git a/modules/globalvars.py b/modules/globalvars.py index 20f5eac..7adc523 100644 --- a/modules/globalvars.py +++ b/modules/globalvars.py @@ -39,5 +39,5 @@ arch = platform.machine() slash_commands_enabled = False launched = False latest_version = "0.0.0" -local_version = "1.0.6" +local_version = "2.0.0a1 (spaCy)" os.environ['gooberlocal_version'] = local_version diff --git a/modules/sentenceprocessing.py b/modules/sentenceprocessing.py index 47e482c..ac3a9e6 100644 --- a/modules/sentenceprocessing.py +++ b/modules/sentenceprocessing.py @@ -2,59 +2,43 @@ import re from modules.globalvars import * from modules.translations import * -import nltk -import nltk.data +import spacy +from spacy.tokens import Doc +from spacytextblob.spacytextblob import SpacyTextBlob +nlp = spacy.load("en_core_web_sm") +nlp.add_pipe("spacytextblob") +Doc.set_extension("polarity", getter=lambda doc: doc._.blob.polarity) -# Ensure required NLTK resources are available def check_resources(): - # Check for required NLTK resources and download if missing - resources = { - 'vader_lexicon': 'sentiment/vader_lexicon', - 'punkt_tab': 'tokenizers/punkt', - } - for resource, path in resources.items(): - try: - nltk.data.find(path) - logger.info(f"{resource} is already installed.") - except Exception: - nltk.download(str(resource)) + try: + nlp = spacy.load("en_core_web_sm") + except OSError: + print("spaCy model not found. Downloading en_core_web_sm...") + spacy.cli.download("en_core_web_sm") + nlp = spacy.load("en_core_web_sm") + if "spacytextblob" not in nlp.pipe_names: + nlp.add_pipe("spacytextblob") + print("spaCy model and spacytextblob are ready.") check_resources() -from nltk.sentiment.vader import SentimentIntensityAnalyzer -from nltk.tokenize import word_tokenize - -# Initialize the sentiment analyzer -analyzer = SentimentIntensityAnalyzer() - def is_positive(sentence): - """ - Determines if the sentiment of the sentence is positive. - logger.infos debug information and returns True if sentiment score > 0.1. - """ - scores = analyzer.polarity_scores(sentence) - sentiment_score = scores['compound'] + doc = nlp(sentence) + sentiment_score = doc._.polarity # from spacytextblob - # logger.info debug message with sentiment score debug_message = f"{DEBUG}{get_translation(LOCALE, 'sentence_positivity')} {sentiment_score}{RESET}" - logger.info(debug_message) + print(debug_message) return sentiment_score > 0.1 async def send_message(ctx, message=None, embed=None, file=None, edit=False, message_reference=None): - """ - Sends or edits a message in a Discord context. - Handles both slash command and regular command contexts. - """ if edit and message_reference: try: - # Editing the existing message await message_reference.edit(content=message, embed=embed) except Exception as e: await ctx.send(f"{RED}{get_translation(LOCALE, 'edit_fail')} {e}{RESET}") else: if hasattr(ctx, "respond"): - # For slash command contexts sent_message = None if embed: sent_message = await ctx.respond(embed=embed, ephemeral=False) @@ -63,7 +47,6 @@ async def send_message(ctx, message=None, embed=None, file=None, edit=False, mes if file: sent_message = await ctx.respond(file=file, ephemeral=False) else: - # For regular command contexts sent_message = None if embed: sent_message = await ctx.send(embed=embed) @@ -74,34 +57,19 @@ async def send_message(ctx, message=None, embed=None, file=None, edit=False, mes return sent_message def append_mentions_to_18digit_integer(message): - """ - Removes 18-digit integers from the message (commonly used for Discord user IDs). - """ pattern = r'\b\d{18}\b' - return re.sub(pattern, lambda match: f"", message) + return re.sub(pattern, lambda match: "", message) def preprocess_message(message): - """ - Preprocesses the message by removing 18-digit integers and non-alphanumeric tokens. - Returns the cleaned message as a string. - """ message = append_mentions_to_18digit_integer(message) - tokens = word_tokenize(message) - tokens = [token for token in tokens if token.isalnum()] + doc = nlp(message) + tokens = [token.text for token in doc if token.is_alpha or token.is_digit] return " ".join(tokens) def improve_sentence_coherence(sentence): - """ - Improves sentence coherence by capitalizing isolated 'i' pronouns. - """ - sentence = sentence.replace(" i ", " I ") - return sentence + return re.sub(r'\bi\b', 'I', sentence) def rephrase_for_coherence(sentence): - """ - Rephrases the sentence for coherence by joining words with spaces. - (Currently a placeholder function.) - """ words = sentence.split() coherent_sentence = " ".join(words) - return coherent_sentence \ No newline at end of file + return coherent_sentence diff --git a/requirements.txt b/requirements.txt index e7d8898..1b9e42c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ discord.py markovify -nltk +spacy +spacytextblob requests psutil better_profanity From b5c9de3097c4d77c4bb9daf2a66b85b81cfd07a8 Mon Sep 17 00:00:00 2001 From: WhatDidYouExpect <89535984+WhatDidYouExpect@users.noreply.github.com> Date: Wed, 2 Jul 2025 01:34:43 +0200 Subject: [PATCH 2/3] i realized i had to use semantic versioning --- modules/globalvars.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/globalvars.py b/modules/globalvars.py index 7adc523..d8155e3 100644 --- a/modules/globalvars.py +++ b/modules/globalvars.py @@ -39,5 +39,5 @@ arch = platform.machine() slash_commands_enabled = False launched = False latest_version = "0.0.0" -local_version = "2.0.0a1 (spaCy)" +local_version = "1.0.7" os.environ['gooberlocal_version'] = local_version From f89de0699a54e668c8269dc789f607fed28a67ca Mon Sep 17 00:00:00 2001 From: WhatDidYouExpect <89535984+WhatDidYouExpect@users.noreply.github.com> Date: Wed, 2 Jul 2025 15:58:51 +0200 Subject: [PATCH 3/3] finish up the translations --- assets/locales/en.json | 3 +++ assets/locales/it.json | 3 +++ bot.py | 6 +++--- modules/image.py | 2 +- modules/sentenceprocessing.py | 4 ++-- 5 files changed, 12 insertions(+), 6 deletions(-) diff --git a/assets/locales/en.json b/assets/locales/en.json index 6d5093b..b8a7887 100644 --- a/assets/locales/en.json +++ b/assets/locales/en.json @@ -1,4 +1,7 @@ { + "active_users:": "Active users:", + "spacy_initialized": "spaCy and spacytextblob are ready.", + "spacy_model_not_found": "The spaCy model was not found! Downloading it....`", "env_file_not_found": "The .env file was not found! Please create one with the required variables.", "error_fetching_active_users": "Error fetching active users: {error}", "error_sending_alive_ping": "Error sending alive ping: {error}", diff --git a/assets/locales/it.json b/assets/locales/it.json index 60c1ed5..4e24440 100644 --- a/assets/locales/it.json +++ b/assets/locales/it.json @@ -1,4 +1,7 @@ { + "active_users:": "Utenti attivi:", + "spacy_initialized": "spaCy e spacytextblob sono pronti.", + "spacy_model_not_found": "Il modello spaCy non è stato trovato! Lo sto scaricando...", "env_file_not_found": "Il file .env non è stato trovato! Crea un file con le variabili richieste.", "error fetching_active_users": "Errore nel recupero degli utenti attivi:", "error_sending_alive_ping": "Errore nell'invio di aliveping:", diff --git a/bot.py b/bot.py index ee60c49..c04abaf 100644 --- a/bot.py +++ b/bot.py @@ -109,11 +109,11 @@ async def on_ready(): print(f"{GREEN}{get_translation(LOCALE, 'synced_commands')} {len(synced)} {get_translation(LOCALE, 'synced_commands2')} {RESET}") slash_commands_enabled = True ping_server() # ping_server from modules/central.py - # --- Mostra utenti attivi --- + # I FORGOT TO REMOVE THE ITALIAN VERSION FUCKKKKKKKKK active_users = await fetch_active_users() - print(f"{GREEN}Utenti attivi: {active_users}{RESET}") + print(f"{GREEN}{get_translation(LOCALE, 'active_users:')} {active_users}{RESET}") print(f"{GREEN}{get_translation(LOCALE, 'started').format(name=NAME)}{RESET}") - # --- Avvia il task periodico --- + bot.loop.create_task(send_alive_ping_periodically()) except discord.errors.Forbidden as perm_error: print(f"{RED}Permission error while syncing commands: {perm_error}{RESET}") diff --git a/modules/image.py b/modules/image.py index 87a5dbd..af7dcb8 100644 --- a/modules/image.py +++ b/modules/image.py @@ -3,7 +3,7 @@ import re from PIL import Image, ImageDraw, ImageFont from modules.markovmemory import load_markov_model from modules.sentenceprocessing import improve_sentence_coherence, rephrase_for_coherence - +# add comments l8r generated_sentences = set() async def gen_image(input_image_path, sentence_size=5, max_attempts=10): diff --git a/modules/sentenceprocessing.py b/modules/sentenceprocessing.py index ac3a9e6..df64161 100644 --- a/modules/sentenceprocessing.py +++ b/modules/sentenceprocessing.py @@ -13,12 +13,12 @@ def check_resources(): try: nlp = spacy.load("en_core_web_sm") except OSError: - print("spaCy model not found. Downloading en_core_web_sm...") + print(get_translation(LOCALE, 'spacy_model_not_found')) spacy.cli.download("en_core_web_sm") nlp = spacy.load("en_core_web_sm") if "spacytextblob" not in nlp.pipe_names: nlp.add_pipe("spacytextblob") - print("spaCy model and spacytextblob are ready.") + print(get_translation(LOCALE, 'spacy_initialized')) check_resources()