Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

archive bot v0.0.1 #372

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added tasks/archiving/__init__.py
Empty file.
18 changes: 18 additions & 0 deletions tasks/archiving/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from typing import Dict, Union, List

# Define constants for configuration keys
TEMPLATE_NAME_KEY = 'template_name_with_namespace'
ARCHIVING_TEMPLATE_KEY = 'automated_archiving_template'
SECTION_TYPE_KEYS = 'section_type'
SKIP_TEMPLATES_KEY = 'skip_templates'

# Define the type for configuration values
ConfigValue = Union[str, int, bool, List[str]] # Extend as needed

# Define the configuration dictionary
USER_CONFIG: Dict[str, ConfigValue] = {
TEMPLATE_NAME_KEY: 'قالب:أرشيف_آلي',
ARCHIVING_TEMPLATE_KEY: 'أرشفة آلية',
SECTION_TYPE_KEYS: ['حجم', 'قسم'], # Example list of section types
SKIP_TEMPLATES_KEY: ['رشف', 'آخر'] # List of skip templates
}
Empty file.
196 changes: 196 additions & 0 deletions tasks/archiving/core/archiver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
import re
from datetime import datetime
import wikitextparser as wtp
import pywikibot
import hashlib
from core.utils.helpers import prepare_str


class Options:
def __init__(self, page: pywikibot.Page, template_name: str = "أرشفة آلية"):
"""
Initializes the object with the given `page` and `template_name`.

Parameters:
page (pywikibot.Page): The page object.
template_name (str, optional): The name of the template. Defaults to "أرشفة آلية".

Returns:
None
"""
self.template_name = template_name
self.page = page
self.option = ('قسم', '3', None)
self._get_params()

def _get_template(self):
"""
Retrieves the template with the specified name from the page's wikitext.

Returns:
wtp.Template or None: The template object if found, None otherwise.
"""
text = self.page.get()
templates = wtp.parse(text).templates
for t in templates:
if t.name == self.template_name:
return t
return None

def _get_params(self):
"""
Retrieves the parameters from the template.

Returns:
tuple or None: A tuple containing the values of the template arguments if the template has exactly three arguments,
or None if the template is not found or has a different number of arguments.
"""
template = self._get_template()
if template is None:
return None

arguments = template.arguments
if len(arguments) == 3:
self.option = (arguments[0].value, arguments[1].value, arguments[2].value)



class Section:
def __init__(self, title, content):
self.title = title.strip()
self.content = content
self.id = self._generate_id()
self.skip = False
self.skip_templates = [prepare_str("لا للأرشفة")]
self._skip()
def _generate_id(self):
content_hash = hashlib.sha1(self.content.encode('utf-8', 'ignore')).hexdigest().encode('utf-8', 'ignore')
return f"{prepare_str(self.title)}_{content_hash}"
def _skip(self):
parse = wtp.parse(self.content)
for template in parse.templates:
if prepare_str(template.normal_name()) in self.skip_templates:
self.skip = True
break

class Archiver:
def __init__(self, page: pywikibot.Page):
"""
Initializes a Archiver object.
Args:
page (pywikibot.Page): The page to be edited.
"""
# The page to be edited
self.talk_page = page
self.options = (Options(self.talk_page)).option

def archive_talk_page(self):
"""
Archives the talk page of the user.
"""
text = self.talk_page.get()
header = self._extract_header(text)
current_time = datetime.utcnow()
archive_text = ''
remaining_text = ''

sections = self._split_sections(text)

last_comment_timestamps = self.get_last_comment_timestamps()



for section_title, section_content in sections:
section = Section(section_title, section_content)

if section.skip:
remaining_text += section_title + section_content
continue

if section.id in last_comment_timestamps:
last_comment_time = last_comment_timestamps[section.id]
if (current_time - last_comment_time).days > int(self.options[1]):
archive_text += section_title + section_content
else:
remaining_text += section_title + section_content
else:
remaining_text += section_title + section_content

if self.options[0] != 'قسم':
if len(self.talk_page.text) < int(self.options[1]) * 1000:
archive_text = ''

if archive_text:
print("test")
# archive_page = pywikibot.Page(self.site, f'{ARCHIVE_PAGE_PREFIX}{current_time.strftime("%Y-%m")}')
# archive_page.text += archive_text
# archive_page.save(summary='Archiving old discussions')
#
# self.talk_page.text = remaining_text
# self.talk_page.save(summary='Archiving old discussions')
else:
print("No sections to archive.")

def get_last_comment_timestamps(self):
history = self.talk_page.revisions(reverse=False, total=20, content=True) # Fetch last 500 revisions
section_last_edit = {}
seen_sections = set()

for revision in history:
try:
timestamp = revision.timestamp
content = revision.text

sections = self._split_sections(content)
current_sections = set()

for section_title, section_content in sections:
section = Section(section_title, section_content)
current_sections.add(section.id)

if section.id not in section_last_edit:
section_last_edit[section.id] = timestamp
else:
section_last_edit[section.id] = min(section_last_edit[section.id], timestamp)

removed_sections = seen_sections - current_sections
for section_id in removed_sections:
if section_id not in section_last_edit:
section_last_edit[section_id] = timestamp

seen_sections = current_sections
except Exception as e:
print(f"Error processing revision {revision.revid}: {e}")

return section_last_edit

def _split_sections(self, text):
parsed = wtp.parse(text)
sections = parsed.sections
# show only sections with level 2
return [(section.title, section.string) for section in sections if section.level == 2]

def _extract_header(self, text):
parsed = wtp.parse(text)
templates = parsed.templates

headers = []
for template in templates:
if template.name == 'رشف':
headers.append(template.span[0])
headers.append(template.span[1])
if len(headers) <= 1:
return ""
if len(headers) >= 2:
return text[headers[0]:headers[-1]]


site = pywikibot.Site('ar', 'wikipedia')
page_name = "نقاش_المستخدم:لوقا"
page = pywikibot.Page(site, page_name)
archive_obj = Archiver(page)
archive_obj.archive_talk_page()
"""
create class to archive sections
customez archive summary
"""
98 changes: 98 additions & 0 deletions tasks/archiving/core/bot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import pywikibot
import logging
from abc import ABC, abstractmethod
from typing import List, Dict, Callable
from tasks.archiving.config import USER_CONFIG, TEMPLATE_NAME_KEY


# Define the Job interface
class Job(ABC):
@abstractmethod
def perform(self, item):
"""
Perform an action on the given item.
:param item: The item to process, e.g., a page or a file
"""
pass


# Implement concrete strategies for different jobs
class ActionJob(Job):
def perform(self, page):
# Implement specific action here
print(f"Performing action on page: {page.title()}")
logging.info(f"Performing action on page: {page.title()}")



# Define a HookManager for dynamic hooks
class HookManager:
def __init__(self):
self.hooks: Dict[str, List[Callable]] = {
'before': [],
# 'main': [],
'after': []
}

def add_hook(self, point: str, hook: Callable):
if point in self.hooks:
self.hooks[point].append(hook)

def remove_hook(self, point: str, hook: Callable):
if point in self.hooks:
self.hooks[point].remove(hook)

def run_hooks(self, point: str, item):
if point in self.hooks:
for hook in self.hooks[point]:
hook(item)


# Define the CompositeJob class to handle multiple jobs
class CompositeJob(Job):
def __init__(self, hook_manager: HookManager):
self.jobs: List[Job] = []
self.hook_manager = hook_manager

def add_job(self, job: Job):
self.jobs.append(job)

def perform(self, item):
# Run before hooks
self.hook_manager.run_hooks('before', item)

# Execute main jobs
for job in self.jobs:
job.perform(item)

# Run after hooks
self.hook_manager.run_hooks('after', item)


# Define the abstract Processor class
class Processor(ABC):
def __init__(self, job: Job):
self.job = job # Dependency injection of Job strategy

@abstractmethod
def get_items(self):
"""
Retrieve the items to be processed.
:return: A list of items to process
"""
pass

def process_items(self):
items = self.get_items()
for item in items:
self.job.perform(item) # Delegate action to the injected Job strategy


# Example hook functions
def before_hook(item):
print(f"Before processing item: {item.title()}")


def after_hook(item):
print(f"After processing item: {item.title()}")

35 changes: 35 additions & 0 deletions tasks/archiving/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import pywikibot

from tasks.archiving.config import USER_CONFIG, TEMPLATE_NAME_KEY
from tasks.archiving.core.bot import HookManager, CompositeJob, ActionJob, Processor, Job


# Concrete implementation for WikiPage processing
class WikiPageProcessor(Processor):
def __init__(self, job: Job):
super().__init__(job)
self.site = pywikibot.Site('ar', 'wikipedia')
self.template_name = USER_CONFIG.get(TEMPLATE_NAME_KEY)
self.template_page = pywikibot.Page(self.site, self.template_name)

def get_items(self):
pages = self.template_page.embeddedin()
filtered_pages = [
page for page in pages
if page.depth == 0 and not ('edit' in page.protection() and 'sysop' in page.protection()['edit'])
]
return filtered_pages


# Create the HookManager
hook_manager = HookManager()
# hook_manager.add_hook('before', before_hook)
# hook_manager.add_hook('after', after_hook)

# Create and configure the composite job
composite_job = CompositeJob(hook_manager=hook_manager)
composite_job.add_job(ActionJob())

# Create the processor with the composite job
processor = WikiPageProcessor(job=composite_job)
processor.process_items()