goober/modules/sentenceprocessing.py

96 lines
2.5 KiB
Python
Raw Permalink Normal View History

2025-06-21 11:22:08 +02:00
import re
import discord.ext
import discord.ext.commands
2025-06-21 11:22:08 +02:00
from modules.globalvars import *
import spacy
from spacy.tokens import Doc
from spacytextblob.spacytextblob import SpacyTextBlob
import discord
import modules.keys as k
2025-07-07 17:17:44 +02:00
2025-07-09 15:05:06 +02:00
import logging
2025-07-23 10:19:08 +03:00
2025-07-09 15:05:06 +02:00
logger = logging.getLogger("goober")
def check_resources():
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
logging.critical(k.spacy_model_not_found())
2025-07-23 10:19:08 +03:00
spacy.cli.download("en_core_web_sm") # type: ignore
nlp = spacy.load("en_core_web_sm")
if "spacytextblob" not in nlp.pipe_names:
nlp.add_pipe("spacytextblob")
logger.info(k.spacy_initialized())
2025-07-23 10:19:08 +03:00
check_resources()
2025-07-07 17:17:44 +02:00
nlp = spacy.load("en_core_web_sm")
nlp.add_pipe("spacytextblob")
Doc.set_extension("polarity", getter=lambda doc: doc._.blob.polarity)
2025-07-23 10:19:08 +03:00
2025-06-21 11:22:08 +02:00
def is_positive(sentence):
doc = nlp(sentence)
sentiment_score = doc._.polarity # from spacytextblob
2025-06-21 11:22:08 +02:00
debug_message = f"{k.sentence_positivity()} {sentiment_score}{RESET}"
2025-07-09 15:05:06 +02:00
logger.debug(debug_message)
2025-06-21 11:22:08 +02:00
2025-07-23 10:19:08 +03:00
return (
sentiment_score > 0.6
) # had to raise the bar because it kept saying "death to jews" was fine and it kept reacting to them
2025-06-21 11:22:08 +02:00
2025-07-23 10:19:08 +03:00
async def send_message(
ctx: discord.ext.commands.Context,
message: str | None = None,
embed: discord.Embed | None = None,
file: discord.File | None = None,
edit: bool = False,
message_reference: discord.Message | None = None,
) -> discord.Message | None:
sent_message: discord.Message | None = None
2025-07-23 10:19:08 +03:00
2025-06-21 11:22:08 +02:00
if edit and message_reference:
try:
await message_reference.edit(content=message, embed=embed)
return message_reference
2025-06-21 11:22:08 +02:00
except Exception as e:
await ctx.send(f"{k.edit_fail()} {e}")
return None
if embed:
sent_message = await ctx.send(embed=embed, content=message)
elif file:
sent_message = await ctx.send(file=file, content=message)
2025-06-21 11:22:08 +02:00
else:
sent_message = await ctx.send(content=message)
2025-07-23 10:19:08 +03:00
return sent_message
2025-07-23 10:19:08 +03:00
2025-06-21 11:22:08 +02:00
def append_mentions_to_18digit_integer(message):
2025-07-23 10:19:08 +03:00
pattern = r"\b\d{18}\b"
return re.sub(pattern, lambda match: "", message)
2025-06-21 11:22:08 +02:00
2025-07-23 10:19:08 +03:00
2025-06-21 11:22:08 +02:00
def preprocess_message(message):
message = append_mentions_to_18digit_integer(message)
doc = nlp(message)
tokens = [token.text for token in doc if token.is_alpha or token.is_digit]
2025-06-21 11:22:08 +02:00
return " ".join(tokens)
2025-06-21 12:00:49 +02:00
2025-07-23 10:19:08 +03:00
2025-06-21 12:00:49 +02:00
def improve_sentence_coherence(sentence):
2025-07-23 10:19:08 +03:00
return re.sub(r"\bi\b", "I", sentence)
2025-06-21 12:00:49 +02:00
def rephrase_for_coherence(sentence):
words = sentence.split()
coherent_sentence = " ".join(words)
return coherent_sentence