Source code for mwclient.client

import json
import logging
import warnings
from collections import OrderedDict
from typing import Optional, Callable, Union, Mapping, Any, MutableMapping, List, Dict, \
    Tuple, cast, Iterable, BinaryIO, Iterator

import requests
from requests.auth import HTTPBasicAuth, AuthBase
from requests_oauthlib import OAuth1

import mwclient.errors as errors
import mwclient.listing as listing
from mwclient._types import Cookies, Namespace, VersionTuple
from mwclient.sleep import Sleeper, Sleepers
from mwclient.util import parse_timestamp, read_in_chunks, handle_limit

__version__ = '0.11.0'

log = logging.getLogger(__name__)

USER_AGENT = f'mwclient/{__version__} (https://github.com/mwclient/mwclient)'


[docs] class Site: """A MediaWiki site identified by its hostname. Examples: >>> import mwclient >>> wikipedia_site = mwclient.Site('en.wikipedia.org') >>> wikia_site = mwclient.Site('vim.wikia.com', path='/') Args: host: The hostname of a MediaWiki instance. Must not include a scheme (e.g. `https://`) - use the `scheme` argument instead. path: The instances script path (where the `index.php` and `api.php` scripts are located). Must contain a trailing slash (`/`). Defaults to `/w/`. ext: The file extension used by the MediaWiki API scripts. Defaults to `.php`. pool: A preexisting :class:`~requests.Session` to be used when executing API requests. When this is set, the `client_certificate`, `clients_useragent`, `custom_headers`, `http_auth` and all OAuth related parameters are all ignored. retry_timeout: The number of seconds to sleep for each past retry of a failing API request. Defaults to `30`. max_retries: The maximum number of retries to perform for failing API requests. Defaults to `25`. wait_callback: A callback function to be executed for each failing API request. clients_useragent: A prefix to be added to the default mwclient user-agent. Should follow the pattern `'{tool_name}/{tool_version} ({contact})'`. Check the `User-Agent policy <https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy>`_ for more information. max_lag: A `maxlag` parameter to be used in `index.php` calls. Consult the `documentation <https://www.mediawiki.org/wiki/Manual:Maxlag_parameter>`_ for more information. Defaults to `3`. compress: Whether to request and accept gzip compressed API responses. Defaults to `True`. force_login: Whether to require authentication when editing pages. Set to `False` to allow unauthenticated edits. Defaults to `True`. do_init: Whether to automatically initialize the :py:class:`Site` on initialization. When set to `False`, the :py:class:`Site` must be initialized manually using the :py:meth:`.site_init` method. Defaults to `True`. httpauth: An authentication method to be used when making API requests. This can be either an authentication object as provided by the :py:mod:`requests` library, or a tuple in the form `{username, password}`. Usernames and passwords provided as text strings are encoded as UTF-8. If dealing with a server that cannot handle UTF-8, please provide the username and password already encoded with the appropriate encoding. connection_options: Additional arguments to be passed to the :py:meth:`requests.Session.request` method when performing API calls. If the `timeout` key is empty, a default timeout of 30 seconds is added. consumer_token: OAuth1 consumer key. consumer_secret: OAuth1 consumer secret. access_token: OAuth1 access key. access_secret: OAuth1 access secret. client_certificate: A client certificate to be added to the session. custom_headers: A dictionary of custom headers to be added to all API requests. scheme: The URI scheme to use. This should be either `http` or `https` in most cases. Defaults to `https`. Raises: RuntimeError: The authentication passed to the `httpauth` parameter is invalid. You must pass either a tuple or a :class:`requests.auth.AuthBase` object. errors.OAuthAuthorizationError: The OAuth authorization is invalid. errors.LoginError: Login failed, the reason can be obtained from e.code and e.info (where e is the exception object) and will be one of the API:Login errors. The most common error code is "Failed", indicating a wrong username or password. """ # noqa: E501 api_limit = 500 def __init__( self, host: str, path: str = '/w/', ext: str = '.php', pool: Optional[requests.Session] = None, retry_timeout: int = 30, max_retries: int = 25, wait_callback: Callable[['Sleeper', int, Optional[Any]], Any] = lambda *x: None, clients_useragent: Optional[str] = None, max_lag: int = 3, compress: bool = True, force_login: bool = True, do_init: bool = True, httpauth: Union[ Tuple[Union[str, bytes], Union[str, bytes]], requests.auth.AuthBase, List[Union[str, bytes]], None, ] = None, connection_options: Optional[MutableMapping[str, Any]] = None, consumer_token: Optional[str] = None, consumer_secret: Optional[str] = None, access_token: Optional[str] = None, access_secret: Optional[str] = None, client_certificate: Optional[Union[str, Tuple[str, str]]] = None, custom_headers: Optional[Mapping[str, str]] = None, scheme: str = 'https', reqs: Optional[MutableMapping[str, Any]] = None ) -> None: # Setup member variables self.host = host self.path = path self.ext = ext self.credentials = None # type: Optional[Tuple[str, str, Optional[str]]] self.compress = compress self.max_lag = str(max_lag) self.force_login = force_login self.logged_in = False if reqs and connection_options: raise ValueError( "reqs is a deprecated alias of connection_options. Do not specify both." ) if reqs: warnings.warn( "reqs is deprecated in mwclient 1.0.0. Use connection_options instead", DeprecationWarning ) connection_options = reqs self.requests = connection_options or {} self.scheme = scheme if 'timeout' not in self.requests: self.requests['timeout'] = 30 # seconds if consumer_token is not None: auth = OAuth1(consumer_token, consumer_secret, access_token, access_secret) elif isinstance(httpauth, (list, tuple)): # workaround weird requests default to encode as latin-1 # https://github.com/mwclient/mwclient/issues/315 # https://github.com/psf/requests/issues/4564 httpauth = [ it.encode("utf-8") if isinstance(it, str) else it for it in httpauth ] auth = HTTPBasicAuth(*httpauth) elif httpauth is None or isinstance(httpauth, (AuthBase,)): auth = httpauth else: # FIXME: Raise a specific exception instead of a generic RuntimeError. raise RuntimeError('Authentication is not a tuple or an instance of AuthBase') self.sleepers = Sleepers(max_retries, retry_timeout, wait_callback) # Site properties self.blocked = False # type: Union[Tuple[str, str], bool] # Is user blocked? self.hasmsg = False # Whether current user has new messages self.groups = [] # type: List[str] # Groups current user is in self.rights = [] # type: List[str] # Rights current user has self.tokens = {} # type: Dict[str, str] # Edit tokens of the current user self.version = None # type: Optional[VersionTuple] self.namespaces = self.default_namespaces # type: Dict[int, str] # Setup connection if pool is None: self.connection = requests.Session() # type: requests.Session self.connection.auth = auth if client_certificate: self.connection.cert = client_certificate # Set User-Agent header field if clients_useragent: ua = clients_useragent + ' ' + USER_AGENT else: ua = USER_AGENT self.connection.headers['User-Agent'] = ua if custom_headers: self.connection.headers.update(custom_headers) else: self.connection = pool # Page generators self.pages = listing.PageList(self) self.categories = listing.PageList(self, namespace=14) self.images = listing.PageList(self, namespace=6) # Compat page generators self.Pages = self.pages self.Categories = self.categories self.Images = self.images # Initialization status self.initialized = False # Upload chunk size in bytes self.chunk_size = 1048576 if do_init: try: self.site_init() except errors.APIError as e: if e.args[0] == 'mwoauth-invalid-authorization': raise errors.OAuthAuthorizationError(self, e.code, e.info) # Private wiki, do init after login if e.args[0] not in {'unknown_action', 'readapidenied'}: raise
[docs] def site_init(self) -> None: """Populates the object with information about the current user and site. This is done automatically when creating the object, unless explicitly disabled using the `do_init=False` constructor argument.""" if self.initialized: info = self.get('query', meta='userinfo', uiprop='groups|rights') userinfo = info['query']['userinfo'] self.username = userinfo['name'] self.groups = userinfo.get('groups', []) self.rights = userinfo.get('rights', []) self.tokens = {} return meta = self.get('query', meta='siteinfo|userinfo', siprop='general|namespaces', uiprop='groups|rights', retry_on_error=False) # Extract site info self.site = meta['query']['general'] self.namespaces = { namespace['id']: namespace.get('*', '') for namespace in meta['query']['namespaces'].values() } self.version = self.version_tuple_from_generator(self.site['generator']) # Require MediaWiki version >= 1.16 self.require(1, 16) # User info userinfo = meta['query']['userinfo'] self.username = userinfo['name'] self.groups = userinfo.get('groups', []) self.rights = userinfo.get('rights', []) self.initialized = True
[docs] @staticmethod def version_tuple_from_generator( string: str, prefix: str = 'MediaWiki ' ) -> VersionTuple: """Return a version tuple from a MediaWiki Generator string. Example: >>> Site.version_tuple_from_generator("MediaWiki 1.5.1") (1, 5, 1) Args: string: The MediaWiki Generator string. prefix: The expected prefix of the string. Returns: tuple[int, int, Union[int, str]...]: The version tuple. """ if not string.startswith(prefix): raise errors.MediaWikiVersionError(f'Unknown generator {string}') version = string[len(prefix):] def _split_version(version: str) -> Iterator[str]: """Split a version string into segments. Args: version (str): The version string (without the prefix). Yields: str: The individual segments of the version string. """ current_segment = '' for curr_char in version: if curr_char in "-+_.": yield current_segment current_segment = '' elif current_segment and ( (current_segment[-1].isdigit() and curr_char.isalpha()) or (current_segment[-1].isalpha() and curr_char.isdigit()) ): yield current_segment current_segment = curr_char else: current_segment += curr_char yield current_segment version_tuple = tuple( int(segment) if segment.isdigit() else segment for segment in _split_version(version) ) # type: Tuple[Union[int, str], ...] if len(version_tuple) < 2: raise errors.MediaWikiVersionError(f'Unknown MediaWiki {".".join(version)}') # Ensure the major and minor version components are integers. # Non-integer values for these components are not supported and will # cause comparison issues. if not all(isinstance(segment, int) for segment in version_tuple[:2]): raise errors.MediaWikiVersionError( f'Unknown MediaWiki {".".join(version)}. ' 'Major and minor version must be integers.' ) return version_tuple
default_namespaces = { 0: '', 1: 'Talk', 2: 'User', 3: 'User talk', 4: 'Project', 5: 'Project talk', 6: 'Image', 7: 'Image talk', 8: 'MediaWiki', 9: 'MediaWiki talk', 10: 'Template', 11: 'Template talk', 12: 'Help', 13: 'Help talk', 14: 'Category', 15: 'Category talk', -1: 'Special', -2: 'Media' } def __repr__(self) -> str: return f"<{self.__class__.__name__} object '{self.host}{self.path}'>"
[docs] def get(self, action: str, *args: Tuple[str, Any], **kwargs: Any) -> Dict[str, Any]: """Perform a generic API call using GET. This is just a shorthand for calling api() with http_method='GET'. All arguments will be passed on. Args: action: The MediaWiki API action to be performed. *args: Tupled key-value pairs to be passed to the `api.php` script as data. In most cases, it is preferable to pass these as keyword arguments instead. This can be useful when the parameter name is a reserved Python keyword (e.g. `from`). **kwargs: Arguments to be passed to the API call. Returns: The raw response from the API call, as a dictionary. """ return self.api(action, 'GET', *args, **kwargs)
[docs] def post(self, action: str, *args: Tuple[str, Any], **kwargs: Any) -> Dict[str, Any]: """Perform a generic API call using POST. This is just a shorthand for calling api() with http_method='POST'. All arguments will be passed on. Args: action: The MediaWiki API action to be performed. *args: Tupled key-value pairs to be passed to the `api.php` script as data. In most cases, it is preferable to pass these as keyword arguments instead. This can be useful when the parameter name is a reserved Python keyword (e.g. `from`). **kwargs: Arguments to be passed to the API call. Returns: The raw response from the API call, as a dictionary. """ return self.api(action, 'POST', *args, **kwargs)
[docs] def api( self, action: str, http_method: str = 'POST', *args: Tuple[str, Any], **kwargs: Any ) -> Dict[str, Any]: """Perform a generic API call and handle errors. All arguments will be passed on. Args: action: The MediaWiki API action to be performed. http_method: The HTTP method to use. *args: Tupled key-value pairs to be passed to the `api.php` script as data. In most cases, it is preferable to pass these as keyword arguments instead. This can be useful when the parameter name is a reserved Python keyword (e.g. `from`). **kwargs: Arguments to be passed to the API call. Example: To get coordinates from the GeoData MediaWiki extension at English Wikipedia: >>> site = Site('en.wikipedia.org') >>> result = site.api('query', prop='coordinates', titles='Oslo|Copenhagen') >>> for page in result['query']['pages'].values(): ... if 'coordinates' in page: ... title = page['title'] ... lat = page['coordinates'][0]['lat'] ... lon = page['coordinates'][0]['lon'] ... print(f'{title} {lat} {lon}') Oslo 59.95 10.75 Copenhagen 55.6761 12.5683 Returns: The raw response from the API call, as a dictionary. """ kwargs.update(args) # this enables new-style continuation in mediawiki 1.21 # through 1.25, can be dropped when we bump baseline to 1.26 if action == 'query' and 'continue' not in kwargs: kwargs['continue'] = '' if action == 'query': if 'meta' in kwargs: kwargs['meta'] += '|userinfo' else: kwargs['meta'] = 'userinfo' if 'uiprop' in kwargs: kwargs['uiprop'] += '|blockinfo|hasmsg' else: kwargs['uiprop'] = 'blockinfo|hasmsg' sleeper = self.sleepers.make() while True: info = self.raw_api(action, http_method, **kwargs) if not info: info = {} if self.handle_api_result(info, sleeper=sleeper): return info
[docs] def handle_api_result( self, info: Mapping[str, Any], kwargs: Optional[Mapping[str, Any]] = None, sleeper: Optional['Sleeper'] = None ) -> bool: """Checks the given API response, raising an appropriate exception or sleeping if necessary. Args: info: The API result. kwargs: Additional arguments to be passed when raising an :class:`errors.APIError`. sleeper: A :class:`~sleep.Sleeper` instance to use when sleeping. Returns: `False` if the given API response contains an exception, else `True`. """ if sleeper is None: sleeper = self.sleepers.make() try: userinfo = info['query']['userinfo'] except KeyError: userinfo = {} if 'blockedby' in userinfo: self.blocked = (userinfo['blockedby'], userinfo.get('blockreason', '')) else: self.blocked = False self.hasmsg = 'messages' in userinfo if userinfo: self.logged_in = 'anon' not in userinfo if 'warnings' in info: for module, warning in info['warnings'].items(): if '*' in warning: log.warning(warning['*']) if 'error' in info: if info['error'].get('code') in {'internal_api_error_DBConnectionError', 'internal_api_error_DBQueryError'}: sleeper.sleep() return False # cope with https://phabricator.wikimedia.org/T106066 if ( info['error'].get('code') == 'mwoauth-invalid-authorization' and 'Nonce already used' in info['error'].get('info') ): log.warning('Retrying due to nonce error, see' 'https://phabricator.wikimedia.org/T106066') sleeper.sleep() return False if 'query' in info['error']: # Semantic Mediawiki does not follow the standard error format raise errors.APIError(None, info['error']['query'], kwargs) if '*' in info['error']: raise errors.APIError(info['error']['code'], info['error']['info'], info['error']['*']) raise errors.APIError(info['error']['code'], info['error']['info'], kwargs) return True
@staticmethod def _query_string(*args: Tuple[str, Any], **kwargs: Any) -> Dict[str, Any]: kwargs.update(args) qs1 = [ (k, v) for k, v in kwargs.items() if k not in {'wpEditToken', 'token'} ] qs2 = [ (k, v) for k, v in kwargs.items() if k in {'wpEditToken', 'token'} ] return OrderedDict(qs1 + qs2)
[docs] def raw_call( self, script: str, data: Mapping[str, Any], files: Optional[Mapping[str, Union[BinaryIO, Tuple[str, BinaryIO]]]] = None, retry_on_error: bool = True, http_method: str = 'POST' ) -> str: """ Perform a generic request and return the raw text. In the event of a network problem, or an HTTP response with status code 5XX, we'll wait and retry the configured number of times before giving up if `retry_on_error` is True. `requests.exceptions.HTTPError` is still raised directly for HTTP responses with status codes in the 4XX range, and invalid HTTP responses. Args: script: Script name, usually 'api'. data: Post data files: Files to upload retry_on_error: Retry on connection error http_method: The HTTP method, defaults to 'POST' Returns: The raw text response. Raises: errors.MaximumRetriesExceeded: The API request failed and the maximum number of retries was exceeded. requests.exceptions.HTTPError: Received an invalid HTTP response, or a status code in the 4xx range. requests.exceptions.ConnectionError: Encountered an unexpected error while performing the API request. requests.exceptions.Timeout: The API request timed out. """ headers = {} if self.compress: headers['Accept-Encoding'] = 'gzip' sleeper = self.sleepers.make((script, data)) scheme = self.scheme host = self.host if isinstance(host, (list, tuple)): # type: ignore[unreachable] warnings.warn( # type: ignore[unreachable] 'Specifying host as a tuple is deprecated as of mwclient 0.10.1. ' + 'Please use the new scheme argument instead.', DeprecationWarning ) scheme, host = host url = f'{scheme}://{host}{self.path}{script}{self.ext}' while True: toraise = None # type: Optional[Union[requests.RequestException, str]] wait_time = 0 args = {'files': files, 'headers': headers} # type: Dict[str, Any] for k, v in self.requests.items(): args[k] = v if http_method == 'GET': args['params'] = data else: args['data'] = data try: stream = self.connection.request(http_method, url, **args) if stream.headers.get('x-database-lag'): wait_time = int( stream.headers.get('retry-after') # type: ignore[arg-type] ) log.warning('Database lag exceeds max lag. ' 'Waiting for %d seconds', wait_time) # fall through to the sleep elif stream.status_code == 200: return stream.text elif ( (stream.status_code < 500 or stream.status_code > 599) and stream.status_code != 429 # 429 Too Many Requests is retryable ): stream.raise_for_status() else: if not retry_on_error: stream.raise_for_status() log.warning('Received %d response: %s. Retrying in a moment.', stream.status_code, stream.text) toraise = "stream" # fall through to the sleep except ( requests.exceptions.ConnectionError, requests.exceptions.Timeout ) as err: # In the event of a network problem # (e.g. DNS failure, refused connection, etc), # Requests will raise a ConnectionError exception. if not retry_on_error: raise log.warning('Connection error. Retrying in a moment.') toraise = err # proceed to the sleep # all retry paths come here try: sleeper.sleep(wait_time) except errors.MaximumRetriesExceeded: if toraise == "stream": stream.raise_for_status() elif toraise and isinstance(toraise, BaseException): raise toraise else: raise
[docs] def raw_api( self, action: str, http_method: str = 'POST', retry_on_error: bool = True, *args: Tuple[str, Any], **kwargs: Any ) -> Dict[str, Any]: """Send a call to the API. Args: action: The MediaWiki API action to perform. http_method: The HTTP method to use in the request. retry_on_error: Whether to retry API call on connection errors. *args: Tupled key-value pairs to be passed to the `api.php` script as data. In most cases, it is preferable to pass these as keyword arguments instead. This can be useful when the parameter name is a reserved Python keyword (e.g. `from`). **kwargs: Arguments to be passed to the `api.php` script as data. Returns: The API response. Raises: errors.APIDisabledError: The MediaWiki API is disabled for this instance. errors.InvalidResponse: The API response could not be decoded from JSON. errors.MaximumRetriesExceeded: The API request failed and the maximum number of retries was exceeded. requests.exceptions.HTTPError: Received an invalid HTTP response, or a status code in the 4xx range. requests.exceptions.ConnectionError: Encountered an unexpected error while performing the API request. requests.exceptions.Timeout: The API request timed out. """ kwargs['action'] = action kwargs['format'] = 'json' data = self._query_string(*args, **kwargs) res = self.raw_call('api', data, retry_on_error=retry_on_error, http_method=http_method) try: return cast(Dict[str, Any], json.loads(res, object_pairs_hook=OrderedDict)) except ValueError: if res.startswith('MediaWiki API is not enabled for this site.'): raise errors.APIDisabledError raise errors.InvalidResponse(res)
[docs] def raw_index( self, action: str, http_method: str = 'POST', *args: Tuple[str, Any], **kwargs: Any ) -> str: """Sends a call to index.php rather than the API. Args: action: The MediaWiki API action to perform. http_method: The HTTP method to use in the request. *args: Tupled key-value pairs to be passed to the `index.php` script as data. In most cases, it is preferable to pass these as keyword arguments instead. This can be useful when the parameter name is a reserved Python keyword (e.g. `from`). **kwargs: Arguments to be passed to the `index.php` script as data. Returns: The API response. Raises: errors.MaximumRetriesExceeded: The API request failed and the maximum number of retries was exceeded. requests.exceptions.HTTPError: Received an invalid HTTP response, or a status code in the 4xx range. requests.exceptions.ConnectionError: Encountered an unexpected error while performing the API request. requests.exceptions.Timeout: The API request timed out. """ kwargs['action'] = action kwargs['maxlag'] = self.max_lag data = self._query_string(*args, **kwargs) return self.raw_call('index', data, http_method=http_method)
[docs] def require( self, major: int, minor: int, revision: Optional[int] = None, raise_error: bool = True ) -> Optional[bool]: """Check whether the current wiki matches the required version. Args: major: The required major version. minor: The required minor version. revision: The required revision. raise_error: Whether to throw an error if the version of the current wiki is below the required version. Defaults to `True`. Returns: `False` if the version of the current wiki is below the required version, else `True`. If `raise_error` is `False` and the version is below the required version, `None` is returned. Raises: errors.MediaWikiVersionError: The current wiki is below the required version and `raise_error=True`. RuntimeError: It `raise_error` is `None` and the `version` attribute is unset This is usually done automatically on construction of the :class:`Site`, unless `do_init=False` is passed to the constructor. After instantiation, the :meth:`~Site.site_init` functon can be used to retrieve and set the `version`. NotImplementedError: If the `revision` argument was passed. The logic for this is currently unimplemented. """ if self.version is None: if raise_error is None: warnings.warn( # type: ignore[unreachable] 'Passing raise_error=None to require is deprecated and will be ' 'removed in a future version. Use raise_error=False instead.', DeprecationWarning ) return None elif raise_error is False: return None else: # FIXME: Replace this with a specific error raise RuntimeError(f'Site {repr(self)} has not yet been initialized') if revision is None: if self.version[:2] >= (major, minor): return True elif raise_error: raise errors.MediaWikiVersionError( f'Requires version {major}.{minor}, ' f'current version is {self.version[0]}.{self.version[1]}') else: return False else: raise NotImplementedError
# Actions
[docs] def email( self, user: str, text: str, subject: str, cc: bool = False ) -> Dict[str, Any]: """ Send email to a specified user on the wiki. >>> try: ... site.email('SomeUser', 'Some message', 'Some subject') ... except mwclient.errors.NoSpecifiedEmail: ... print('User does not accept email, or has no email address.') API doc: https://www.mediawiki.org/wiki/API:Email Args: user: Username of the recipient text: Body of the email subject: Subject of the email cc: True to send a copy of the email to yourself (default is False) Returns: Dictionary of the JSON response Raises: NoSpecifiedEmail (mwclient.errors.NoSpecifiedEmail): User doesn't accept email EmailError (mwclient.errors.EmailError): Other email errors """ token = self.get_token('email') try: info = self.post('emailuser', target=user, subject=subject, text=text, ccme=cc, token=token) except errors.APIError as e: if e.args[0] == 'noemail': raise errors.NoSpecifiedEmail(user, e.args[1]) raise errors.EmailError(*e) # type: ignore[misc] return info
[docs] def login( self, username: Optional[str] = None, password: Optional[str] = None, cookies: Optional[Cookies] = None, domain: Optional[str] = None ) -> None: """ Login to the wiki using a username and bot password. The method returns nothing if the login was successful, but raises and error if it was not. If you use mediawiki >= 1.27 and try to login with normal account (not botpassword account), you should use `clientlogin` instead, because login action is deprecated since 1.27 with normal account and will stop working in the near future. See these pages to learn more: - https://www.mediawiki.org/wiki/API:Login and - https://www.mediawiki.org/wiki/Manual:Bot_passwords Note: at least until v1.33.1, botpasswords accounts seem to not have "userrights" permission. If you need to update user's groups, this permission is required so you must use `client login` with a user who has userrights permission (a bureaucrat for eg.). Args: username: MediaWiki username password: MediaWiki password cookies: Custom cookies to include with the log-in request. domain: Sends domain name for authentication; used by some MediaWiki plug-ins like the 'LDAP Authentication' extension. Raises: LoginError (mwclient.errors.LoginError): Login failed, the reason can be obtained from e.code and e.info (where e is the exception object) and will be one of the API:Login errors. The most common error code is "Failed", indicating a wrong username or password. MaximumRetriesExceeded: API call to log in failed and was retried until all retries were exhausted. This will not occur if the credentials are merely incorrect. See MaximumRetriesExceeded for possible reasons. APIError: An API error occurred. Rare, usually indicates an internal server error. """ if username and password: self.credentials = (username, password, domain) if cookies: self.connection.cookies.update(cookies) if self.credentials: sleeper = self.sleepers.make() kwargs = { 'lgname': self.credentials[0], 'lgpassword': self.credentials[1] } if self.credentials[2]: kwargs['lgdomain'] = self.credentials[2] # Try to login using the scheme for MW 1.27+. If the wiki is read protected, # it is not possible to get the wiki version upfront using the API, so we just # have to try. If the attempt fails, we try the old method. try: kwargs['lgtoken'] = self.get_token('login') except (errors.APIError, KeyError): log.debug('Failed to get login token, MediaWiki is older than 1.27.') while True: login = self.post('login', **kwargs) if login['login']['result'] == 'Success': break elif login['login']['result'] == 'NeedToken': kwargs['lgtoken'] = login['login']['token'] elif login['login']['result'] == 'Throttled': sleeper.sleep(int(login['login'].get('wait', 5))) else: raise errors.LoginError(self, login['login']['result'], login['login']['reason']) self.site_init()
[docs] def clientlogin(self, cookies: Optional[Cookies] = None, **kwargs: Any) -> Any: """ Login to the wiki using a username and password. The method returns True if it's a success or the returned response if it's a multi-steps login process you started. In case of failure it raises some Errors. Example for classic username / password clientlogin request: >>> try: ... site.clientlogin(username='myusername', password='secret') ... except mwclient.errors.LoginError as e: ... print(f'Could not login to MediaWiki: {e}' ) Args: cookies: Custom cookies to include with the log-in request. **kwargs: Custom vars used for clientlogin as: - loginmergerequestfields - loginpreservestate - loginreturnurl, - logincontinue - logintoken - *: additional params depending on the available auth requests. to log with classic username / password, you need to add `username` and `password` See https://www.mediawiki.org/wiki/API:Login#Method_2._clientlogin Returns: bool | dict: True if login was successful, or the response if it's a multi-steps login process you started. Raises: LoginError (mwclient.errors.LoginError): Login failed, the reason can be obtained from e.code and e.info (where e is the exception object) and will be one of the API:Login errors. The most common error code is "Failed", indicating a wrong username or password. MaximumRetriesExceeded: API call to log in failed and was retried until all retries were exhausted. This will not occur if the credentials are merely incorrect. See MaximumRetriesExceeded for possible reasons. APIError: An API error occurred. Rare, usually indicates an internal server error. """ self.require(1, 27) if cookies: self.connection.cookies.update(cookies) if not kwargs: # TODO: Check if we should raise an error here. It's not clear what the # expected behavior is when no kwargs are passed. To update the # cookies, the user can update the connection object directly. return if 'logintoken' not in kwargs: kwargs['logintoken'] = self.get_token('login') if 'logincontinue' not in kwargs and 'loginreturnurl' not in kwargs: kwargs['loginreturnurl'] = f'{self.scheme}://{self.host}' response = self.post('clientlogin', **kwargs) status = response['clientlogin'].get('status') if status == 'PASS': self.site_init() return True elif status in ('UI', 'REDIRECT'): return response['clientlogin'] else: raise errors.LoginError(self, status, response['clientlogin'].get('message'))
[docs] def get_token( self, type: str, force: bool = False, title: Optional[str] = None ) -> str: """Request a MediaWiki access token of the given `type`. Args: type: The type of token to request. force: Force the request of a new token, even if a token of that type has already been cached. title: The page title for which to request a token. Only used for MediaWiki versions below 1.24. Returns: A MediaWiki token of the requested `type`. Raises: errors.APIError: A token of the given type could not be retrieved. """ if self.version is None or self.require(1, 24, raise_error=False): # The 'csrf' (cross-site request forgery) token introduced in 1.24 replaces # the majority of older tokens, like edittoken and movetoken. if type not in { 'watch', 'patrol', 'rollback', 'userrights', 'login', 'createaccount', }: type = 'csrf' if type not in self.tokens: self.tokens[type] = '0' if self.tokens.get(type, '0') == '0' or force: if self.version is None or self.require(1, 24, raise_error=False): # We use raw_api() rather than api() because api() is adding "userinfo" # to the query and this raises a readapideniederror if the wiki is read # protected, and we're trying to fetch a login token. info = self.raw_api('query', 'GET', meta='tokens', type=type) self.handle_api_result(info) # Note that for read protected wikis, we don't know the version when # fetching the login token. If it's < 1.27, the request below will # raise a KeyError that we should catch. self.tokens[type] = info['query']['tokens'][f'{type}token'] else: if title is None: # Some dummy title was needed to get a token prior to 1.24 title = 'Test' info = self.post('query', titles=title, prop='info', intoken=type) for i in info['query']['pages'].values(): if i['title'] == title: self.tokens[type] = i[f'{type}token'] return self.tokens[type]
[docs] def upload( self, file: Union[str, BinaryIO, None] = None, filename: Optional[str] = None, description: str = '', ignore: bool = False, file_size: Optional[int] = None, url: Optional[str] = None, filekey: Optional[str] = None, comment: Optional[str] = None, asynchronous: bool = False, stash: bool = False ) -> Dict[str, Any]: """Upload a file to the site. Note that one of `file`, `filekey` and `url` must be specified, but not more than one. For normal uploads, you specify `file`. For asynchronous uploads, upload specifying `stash=True`, then take the parameter `filekey` from the response JSON and call `upload` again, specifying the `filekey` and `asynchronous=True`. API doc: https://www.mediawiki.org/wiki/API:Upload Args: file: File object or stream to upload. filename: Destination filename, don't include namespace prefix like 'File:' description: Wikitext for the file description page. ignore: True to upload despite any warnings. file_size: Deprecated in mwclient 0.7 url: URL to fetch the file from. filekey: Key that identifies a previous upload that was stashed temporarily. comment: Upload comment. Also used as the initial page text for new files if `description` is not specified. asynchronous: Whether the server should upload the file asynchronously. Must be used with the filekey of a previously stashed file stash: If set, the file will be stashed instead of uploaded right away. Example: >>> client.upload(open('somefile', 'rb'), filename='somefile.jpg', description='Some description') Async Example: >>> response = client.upload(open('somefile','rb'), filename='somefile.jpg', description='Some description', stash=True) >>> client.upload(filekey=response["filekey"], asynchronous=True) Returns: JSON result from the API. Raises: errors.InsufficientPermission requests.exceptions.HTTPError errors.FileExists: The file already exists and `ignore` is `False`. """ if file_size is not None: # Note that DeprecationWarning is hidden by default since Python 2.7 warnings.warn( 'file_size is deprecated since mwclient 0.7', DeprecationWarning ) if filename is None: raise TypeError('filename must be specified') if len([x for x in [file, filekey, url] if x is not None]) != 1: raise TypeError( "exactly one of 'file', 'filekey' and 'url' must be specified" ) image = self.Images[filename] if not image.can('upload'): raise errors.InsufficientPermission(filename) if comment is None: comment = description text = None else: comment = comment text = description if file is not None: if not hasattr(file, 'read'): file = open(file, 'rb') # Narrowing the type of file from Union[str, BinaryIO, None] # to BinaryIO, since we know it's not a str at this point. file = cast(BinaryIO, file) content_size = file.seek(0, 2) file.seek(0) if (self.require(1, 20, raise_error=False) and content_size > self.chunk_size): return self.chunk_upload(file, filename, ignore, comment, text) predata = { 'action': 'upload', 'format': 'json', 'filename': filename, 'comment': comment, 'text': text, 'token': image.get_token('edit'), } if stash: predata['stash'] = 'true' if ignore: predata['ignorewarnings'] = 'true' if url: predata['url'] = url if asynchronous: if filekey is None: raise TypeError( """'asynchronous' must be used with the filekey from a previously stashed upload.""" ) predata['async'] = 'true' # sessionkey was renamed to filekey in MediaWiki 1.18 # https://phabricator.wikimedia.org/rMW5f13517e36b45342f228f3de4298bb0fe186995d if not self.require(1, 18, raise_error=False): predata['sessionkey'] = filekey else: predata['filekey'] = filekey postdata = predata files = None if file is not None: # Workaround for https://github.com/mwclient/mwclient/issues/65 # ---------------------------------------------------------------- # Since the filename in Content-Disposition is not interpreted, # we can send some ascii-only dummy name rather than the real # filename, which might contain non-ascii. files = {'file': ('fake-filename', file)} sleeper = self.sleepers.make() while True: data = self.raw_call('api', postdata, files) info = json.loads(data) if not info: info = {} if self.handle_api_result(info, kwargs=predata, sleeper=sleeper): response = info.get('upload', {}) # type: Dict[str, Any] # Workaround for https://github.com/mwclient/mwclient/issues/211 # ---------------------------------------------------------------- # Raise an error if the file already exists. This is necessary because # MediaWiki returns a warning, not an error, leading to silent failure. # The user must explicitly set ignore=True (ignorewarnings=True) to # overwrite an existing file. if ignore is False and 'exists' in response.get('warnings', {}): raise errors.FileExists(filename) break if file is not None: file.close() return response
[docs] def chunk_upload( self, file: BinaryIO, filename: str, ignorewarnings: bool, comment: str, text: Optional[str] ) -> Dict[str, Any]: """Upload a file to the site in chunks. This method is called by `Site.upload` if you are connecting to a newer MediaWiki installation, so it's normally not necessary to call this method directly. Args: file: File object or stream to upload. filename: Destination filename. ignorewarnings: True to upload despite any warnings. comment: Upload comment. text: Initial page text for new files. """ image = self.Images[filename] content_size = file.seek(0, 2) file.seek(0) params = { 'action': 'upload', 'format': 'json', 'stash': 1, 'offset': 0, 'filename': filename, 'filesize': content_size, 'token': image.get_token('edit'), } if ignorewarnings: params['ignorewarnings'] = 'true' sleeper = self.sleepers.make() offset = 0 for chunk in read_in_chunks(file, self.chunk_size): while True: data = self.raw_call('api', params, files={'chunk': chunk}) info = json.loads(data) if self.handle_api_result(info, kwargs=params, sleeper=sleeper): response = info.get('upload', {}) # type: Dict[str, Any] break offset += chunk.tell() chunk.close() log.debug('%s: Uploaded %d of %d bytes', filename, offset, content_size) params['filekey'] = response['filekey'] if response['result'] == 'Continue': params['offset'] = response['offset'] elif response['result'] == 'Success': file.close() break else: # Some kind or error or warning occurred. In any case, we do not # get the parameters we need to continue, so we should return # the response now. file.close() return response del params['action'] del params['stash'] del params['offset'] params['comment'] = comment params['text'] = text return self.post('upload', **params)
[docs] def parse( self, text: Optional[str] = None, title: Optional[str] = None, page: Optional[str] = None, prop: Optional[str] = None, redirects: bool = False, mobileformat: bool = False ) -> Any: """Parses the given content and returns parser output. API doc: https://www.mediawiki.org/wiki/API:Parse Args: text: Text to parse. title: Title of page the text belongs to. page: The name of a page to parse. Cannot be used together with text and title. prop: Which pieces of information to get. Multiple values should be separated using the pipe (`|`) character. redirects: Resolve the redirect, if the given `page` is a redirect. Defaults to `False`. mobileformat: Return parse output in a format suitable for mobile devices. Defaults to `False`. Returns: The parse output as generated by MediaWiki. """ kwargs = {} if text is not None: kwargs['text'] = text if title is not None: kwargs['title'] = title if page is not None: kwargs['page'] = page if prop is not None: kwargs['prop'] = prop if redirects: kwargs['redirects'] = '1' if mobileformat: kwargs['mobileformat'] = '1' result = self.post('parse', **kwargs) return result['parse']
# def block(self): TODO? # def unblock: TODO? # def import: TODO?
[docs] def patrol( self, rcid: Optional[int] = None, revid: Optional[int] = None, tags: Optional[str] = None ) -> Any: """Patrol a page or a revision. Either ``rcid`` or ``revid`` (but not both) must be given. The ``rcid`` and ``revid`` arguments may be obtained using the :meth:`Site.recentchanges` function. API doc: https://www.mediawiki.org/wiki/API:Patrol Args: rcid: The recentchanges ID to patrol. revid: The revision ID to patrol. tags: Change tags to apply to the entry in the patrol log. Multiple tags can be given, by separating them with the pipe (|) character. Returns: Dict[str, Any]: The API response as a dictionary containing: - **rcid** (int): The recentchanges id. - **nsid** (int): The namespace id. - **title** (str): The page title. Raises: errors.APIError: The MediaWiki API returned an error. Notes: - ``autopatrol`` rights are required in order to use this function. - ``revid`` requires at least MediaWiki 1.22. - ``tags`` requires at least MediaWiki 1.27. """ if self.require(1, 17, raise_error=False): token = self.get_token('patrol') else: # For MediaWiki versions earlier than 1.17, the patrol token is the same the # edit token. token = self.get_token('edit') result = self.post('patrol', rcid=rcid, revid=revid, tags=tags, token=token) return result['patrol']
# Lists
[docs] def allpages( self, start: Optional[str] = None, prefix: Optional[str] = None, namespace: Namespace = '0', filterredir: str = 'all', minsize: Optional[int] = None, maxsize: Optional[int] = None, prtype: Optional[str] = None, prlevel: Optional[str] = None, limit: Optional[int] = None, dir: str = 'ascending', filterlanglinks: str = 'all', generator: bool = True, end: Optional[str] = None, max_items: Optional[int] = None, api_chunk_size: Optional[int] = None, with_content: bool = False ) -> listing.List: """ Retrieve all pages on the wiki as a generator. API doc: https://www.mediawiki.org/wiki/API:Allpages """ (max_items, api_chunk_size) = handle_limit(limit, max_items, api_chunk_size) kwargs = listing.List.get_page_listing_args('ap', generator, with_content, { 'from': start, 'to': end, 'minsize': minsize, 'maxsize': maxsize, 'prtype': prtype, 'prlevel': prlevel, 'namespace': namespace, 'filterredir': filterredir, 'dir': dir, 'filterlanglinks': filterlanglinks }) return listing.List.get_list(generator)(self, 'allpages', 'ap', max_items=max_items, api_chunk_size=api_chunk_size, return_values='title', **kwargs)
[docs] def allimages( self, start: Optional[str] = None, prefix: Optional[str] = None, minsize: Optional[int] = None, maxsize: Optional[int] = None, limit: Optional[int] = None, dir: str = 'ascending', sha1: Optional[str] = None, sha1base36: Optional[str] = None, generator: bool = True, end: Optional[str] = None, max_items: Optional[int] = None, api_chunk_size: Optional[int] = None, with_content: bool = False ) -> listing.List: """ Retrieve all images on the wiki as a generator. API doc: https://www.mediawiki.org/wiki/API:Allimages """ (max_items, api_chunk_size) = handle_limit(limit, max_items, api_chunk_size) kwargs = listing.List.get_page_listing_args('ai', generator, with_content, { 'from': start, 'to': end, 'minsize': minsize, 'maxsize': maxsize, 'dir': dir, 'sha1': sha1, 'sha1base36': sha1base36 }) return listing.List.get_list(generator)(self, 'allimages', 'ai', max_items=max_items, api_chunk_size=api_chunk_size, return_values='timestamp|url', **kwargs)
[docs] def allcategories( self, start: Optional[str] = None, prefix: Optional[str] = None, dir: str = 'ascending', limit: Optional[int] = None, generator: bool = True, end: Optional[str] = None, max_items: Optional[int] = None, api_chunk_size: Optional[int] = None, with_content: bool = False ) -> listing.List: """ Retrieve all categories on the wiki as a generator. API doc: https://www.mediawiki.org/wiki/API:Allcategories """ (max_items, api_chunk_size) = handle_limit(limit, max_items, api_chunk_size) kwargs = listing.List.get_page_listing_args('ac', generator, with_content, { 'from': start, 'to': end, 'prefix': prefix, 'dir': dir }) return listing.List.get_list(generator)(self, 'allcategories', 'ac', max_items=max_items, api_chunk_size=api_chunk_size, **kwargs)
[docs] def allusers( self, start: Optional[str] = None, prefix: Optional[str] = None, group: Optional[str] = None, prop: Optional[str] = None, limit: Optional[int] = None, witheditsonly: bool = False, activeusers: bool = False, rights: Optional[str] = None, end: Optional[str] = None, max_items: Optional[int] = None, api_chunk_size: Optional[int] = None ) -> listing.List: """ Retrieve all users on the wiki as a generator. API doc: https://www.mediawiki.org/wiki/API:Allusers """ (max_items, api_chunk_size) = handle_limit(limit, max_items, api_chunk_size) kwargs = listing.List.get_listing_args('au', False, { 'from': start, 'to': end, 'prefix': prefix, 'group': group, 'prop': prop, 'rights': rights, 'witheditsonly': witheditsonly, 'activeusers': activeusers }) return listing.List(self, 'allusers', 'au', max_items=max_items, api_chunk_size=api_chunk_size, **kwargs)
[docs] def blocks( self, start: Optional[str] = None, end: Optional[str] = None, dir: str = 'older', ids: Optional[str] = None, users: Optional[str] = None, limit: Optional[int] = None, prop: str = 'id|user|by|timestamp|expiry|reason|flags', max_items: Optional[int] = None, api_chunk_size: Optional[int] = None ) -> listing.List: """Retrieve blocks as a generator. API doc: https://www.mediawiki.org/wiki/API:Blocks Returns: mwclient.listings.List: Generator yielding dicts, each dict containing: - user: The username or IP address of the user - id: The ID of the block - timestamp: When the block was added - expiry: When the block runs out (infinity for indefinite blocks) - reason: The reason they are blocked - allowusertalk: Key is present (empty string) if the user is allowed to edit their user talk page - by: the administrator who blocked the user - nocreate: key is present (empty string) if the user's ability to create accounts has been disabled. See Also: When using the ``users`` filter to search for blocked users, only one block per given user will be returned. If you want to retrieve the entire block log for a specific user, you can use the :meth:`Site.logevents` method with ``type=block`` and ``title='User:JohnDoe'``. """ # TODO: Fix. Fix what? (max_items, api_chunk_size) = handle_limit(limit, max_items, api_chunk_size) kwargs = listing.List.get_listing_args('bk', False, { 'start': start, 'end': end, 'dir': dir, 'ids': ids, 'users': users, 'prop': prop }) return listing.List(self, 'blocks', 'bk', max_items=max_items, api_chunk_size=api_chunk_size, **kwargs)
[docs] def deletedrevisions( self, start: Optional[str] = None, end: Optional[str] = None, dir: str = 'older', namespace: Optional[int] = None, limit: Optional[int] = None, prop: str = 'user|comment', max_items: Optional[int] = None, api_chunk_size: Optional[int] = None ) -> listing.List: """ Retrieve deleted revisions as a generator. API doc: https://www.mediawiki.org/wiki/API:Deletedrevs """ # TODO: Fix (max_items, api_chunk_size) = handle_limit(limit, max_items, api_chunk_size) kwargs = listing.List.get_listing_args('dr', False, { 'start': start, 'end': end, 'dir': dir, 'namespace': namespace, 'prop': prop, }) return listing.List(self, 'deletedrevs', 'dr', max_items=max_items, api_chunk_size=api_chunk_size, **kwargs)
[docs] def exturlusage( self, query: str, prop: Optional[str] = None, protocol: str = 'http', namespace: Optional[Namespace] = None, limit: Optional[int] = None, max_items: Optional[int] = None, api_chunk_size: Optional[int] = None ) -> listing.List: r"""Retrieve the list of pages that link to a particular domain or URL, as a generator. API doc: https://www.mediawiki.org/wiki/API:Exturlusage This API call mirrors the Special:LinkSearch function on-wiki. Query can be a domain like 'bbc.co.uk'. Wildcards can be used, e.g. '\*.bbc.co.uk'. Alternatively, a query can contain a full domain name and some or all of a URL: e.g. '\*.wikipedia.org/wiki/\*' See <https://meta.wikimedia.org/wiki/Help:Linksearch> for details. Returns: mwclient.listings.List: Generator yielding dicts, each dict containing: - url: The URL linked to. - ns: Namespace of the wiki page - pageid: The ID of the wiki page - title: The page title. """ (max_items, api_chunk_size) = handle_limit(limit, max_items, api_chunk_size) kwargs = listing.List.get_listing_args('eu', False, { 'query': query, 'protocol': protocol, 'namespace': namespace, 'prop': prop, }) return listing.List(self, 'exturlusage', 'eu', max_items=max_items, api_chunk_size=api_chunk_size, **kwargs)
[docs] def logevents( self, type: Optional[str] = None, prop: Optional[str] = None, start: Optional[str] = None, end: Optional[str] = None, dir: str = 'older', user: Optional[str] = None, title: Optional[str] = None, limit: Optional[int] = None, action: Optional[str] = None, max_items: Optional[int] = None, api_chunk_size: Optional[int] = None ) -> listing.List: """ Retrieve logevents as a generator. API doc: https://www.mediawiki.org/wiki/API:Logevents """ kwargs = listing.List.get_listing_args('le', False, { 'start': start, 'end': end, 'dir': dir, 'type': type, 'user': user, 'title': title, 'action': action, 'prop': prop, }) return listing.List(self, 'logevents', 'le', max_items=max_items, api_chunk_size=api_chunk_size, **kwargs)
[docs] def checkuserlog( self, user: Optional[str] = None, target: Optional[str] = None, limit: Optional[int] = None, dir: str = 'older', start: Optional[str] = None, end: Optional[str] = None, max_items: Optional[int] = None, api_chunk_size: Optional[int] = 10 ) -> listing.NestedList: """Retrieve checkuserlog items as a generator.""" (max_items, api_chunk_size) = handle_limit(limit, max_items, api_chunk_size) kwargs = listing.List.get_listing_args('cul', False, { 'start': start, 'end': end, 'dir': dir, 'target': target, 'user': user }) return listing.NestedList( 'entries', self, 'checkuserlog', 'cul', max_items=max_items, api_chunk_size=api_chunk_size, **kwargs, )
# def protectedtitles requires 1.15
[docs] def random( self, namespace: Namespace, limit: Optional[int] = None, max_items: Optional[int] = None, api_chunk_size: Optional[int] = 20 ) -> listing.List: """Retrieve a generator of random pages from a particular namespace. API doc: https://www.mediawiki.org/wiki/API:Random max_items specifies the number of random articles retrieved. api_chunk_size and limit (deprecated) specify the API chunk size. namespace is a namespace identifier integer. Generator contains dictionary with namespace, page ID and title. """ (max_items, api_chunk_size) = handle_limit(limit, max_items, api_chunk_size) kwargs = listing.List.get_listing_args('rn', False, { 'namespace': namespace, }) return listing.List(self, 'random', 'rn', max_items=max_items, api_chunk_size=api_chunk_size, **kwargs)
[docs] def recentchanges( self, start: Optional[str] = None, end: Optional[str] = None, dir: str = 'older', namespace: Optional[Namespace] = None, prop: Optional[str] = None, show: Optional[str] = None, limit: Optional[int] = None, type: Optional[str] = None, toponly: Optional[bool] = None, max_items: Optional[int] = None, api_chunk_size: Optional[int] = None ) -> listing.List: """ List recent changes to the wiki, à la Special:Recentchanges. API doc: https://www.mediawiki.org/wiki/API:Recentchanges """ (max_items, api_chunk_size) = handle_limit(limit, max_items, api_chunk_size) kwargs = listing.List.get_listing_args('rc', False, { 'start': start, 'end': end, 'dir': dir, 'namespace': namespace, 'prop': prop, 'show': show, 'type': type, 'toponly': '1' if toponly else None }) return listing.List(self, 'recentchanges', 'rc', max_items=max_items, api_chunk_size=api_chunk_size, **kwargs)
[docs] def revisions( self, revids: List[Union[int, str]], prop: str = 'ids|timestamp|flags|comment|user', slots: str = 'main', ) -> List[Dict[str, Any]]: """Get data about a list of revisions. See also the `Page.revisions()` method. API doc: https://www.mediawiki.org/wiki/API:Revisions Example: Get revision text for two revisions: >>> for revision in site.revisions([689697696, 689816909], prop='content'): ... print(revision['*']) Args: revids: A list of (max 50) revisions. prop: Which properties to get for each revision. Returns: A list of revisions """ kwargs = { 'prop': 'revisions', 'rvprop': prop, 'revids': '|'.join(map(str, revids)) } if self.version[:2] > (1, 31): # type: ignore[index] # https://github.com/mwclient/mwclient/issues/389 kwargs['rvslots'] = slots revisions = [] pages = self.get('query', **kwargs).get('query', {}).get('pages', {}).values() for page in pages: for revision in page.get('revisions', ()): revision['pageid'] = page.get('pageid') revision['pagetitle'] = page.get('title') if 'timestamp' in revision: revision['timestamp'] = parse_timestamp(revision['timestamp']) revisions.append(revision) return revisions
[docs] def search( self, search: str, namespace: Namespace = '0', what: Optional[str] = None, redirects: bool = False, limit: Optional[int] = None, max_items: Optional[int] = None, api_chunk_size: Optional[int] = None ) -> listing.List: """Perform a full text search. API doc: https://www.mediawiki.org/wiki/API:Search Example: >>> for result in site.search('prefix:Template:Citation/'): ... print(result.get('title')) Args: search: The query string namespace: The namespace to search (default: 0) what: Search scope: 'text' for fulltext, or 'title' for titles only. Depending on the search backend, both options may not be available. For instance `CirrusSearch <https://www.mediawiki.org/wiki/Help:CirrusSearch>`_ doesn't support 'title', but instead provides an "intitle:" query string filter. redirects: Include redirect pages in the search (option removed in MediaWiki 1.23). Returns: mwclient.listings.List: Search results iterator """ (max_items, api_chunk_size) = handle_limit(limit, max_items, api_chunk_size) kwargs = listing.List.get_listing_args('sr', False, { 'search': search, 'namespace': namespace, 'what': what, 'redirects': '1' if redirects else None }) return listing.List(self, 'search', 'sr', max_items=max_items, api_chunk_size=api_chunk_size, **kwargs)
[docs] def usercontributions( self, user: str, start: Optional[str] = None, end: Optional[str] = None, dir: str = 'older', namespace: Optional[Namespace] = None, prop: Optional[str] = None, show: Optional[str] = None, limit: Optional[int] = None, uselang: Optional[str] = None, max_items: Optional[int] = None, api_chunk_size: Optional[int] = None ) -> listing.List: """ List the contributions made by a given user to the wiki. API doc: https://www.mediawiki.org/wiki/API:Usercontribs """ (max_items, api_chunk_size) = handle_limit(limit, max_items, api_chunk_size) kwargs = listing.List.get_listing_args('uc', False, { 'start': start, 'end': end, 'dir': dir, 'namespace': namespace, 'prop': prop, 'show': show, 'user': user }) return listing.List(self, 'usercontribs', 'uc', max_items=max_items, api_chunk_size=api_chunk_size, uselang=uselang, **kwargs)
[docs] def users( self, users: Iterable[str], prop: str = 'blockinfo|groups|editcount' ) -> listing.List: """ Get information about a list of users. API doc: https://www.mediawiki.org/wiki/API:Users """ return listing.List(self, 'users', 'us', ususers='|'.join(users), usprop=prop)
[docs] def watchlist( self, allrev: bool = False, start: Optional[str] = None, end: Optional[str] = None, namespace: Optional[Namespace] = None, dir: str = 'older', prop: Optional[str] = None, show: Optional[str] = None, limit: Optional[int] = None, max_items: Optional[int] = None, api_chunk_size: Optional[int] = None ) -> listing.List: """ List the pages on the current user's watchlist. API doc: https://www.mediawiki.org/wiki/API:Watchlist """ (max_items, api_chunk_size) = handle_limit(limit, max_items, api_chunk_size) kwargs = listing.List.get_listing_args('wl', False, { 'start': start, 'end': end, 'dir': dir, 'namespace': namespace, 'prop': prop, 'show': show, 'allrev': '1' if allrev else None }) return listing.List(self, 'watchlist', 'wl', max_items=max_items, api_chunk_size=api_chunk_size, **kwargs)
[docs] def expandtemplates( self, text: str, title: Optional[str] = None, generatexml: bool = False ) -> Union[str, Tuple[str, str]]: """ Takes wikitext (text) and expands templates. API doc: https://www.mediawiki.org/wiki/API:Expandtemplates Args: text: Wikitext to convert. title: Title of the page. generatexml: Generate the XML parse tree. Defaults to `False`. Returns: If generatexml is False, returns the expanded wikitext as a string. If generatexml is True, returns a tuple of (expanded wikitext, XML parse tree). """ if self.require(1, 24, raise_error=False): return self._expandtemplates_1_24(text, title, generatexml) else: return self._expandtemplates_1_13(text, title, generatexml)
def _expandtemplates_1_24( self, text: str, title: Optional[str], generatexml: bool ) -> Union[str, Tuple[str, str]]: """Expand templates using the 1.24 API.""" result = self.post( 'expandtemplates', text=text, title=title, prop='wikitext|parsetree' if generatexml else 'wikitext', ) wikitext = str(result['expandtemplates']['wikitext']) if generatexml: parsetree = str(result['expandtemplates']['parsetree']) return wikitext, parsetree return wikitext def _expandtemplates_1_13( self, text: str, title: Optional[str], generatexml: bool ) -> Union[str, Tuple[str, str]]: """Expand templates using the 1.13-1.23 API.""" result = self.post( 'expandtemplates', text=text, title=title, generatexml='1' if generatexml else None, ) wikitext = str(result['expandtemplates']['*']) if generatexml: parsetree = str(result['parsetree']['*']) return wikitext, parsetree return wikitext
[docs] def ask(self, query: str, title: Optional[str] = None) -> Iterable[Dict[str, Any]]: """ Ask a query against Semantic MediaWiki. API doc: https://semantic-mediawiki.org/wiki/Ask_API Args: query: The SMW query to be executed. Returns: Generator for retrieving all search results, with each answer as a dictionary. If the query is invalid, an APIError is raised. A valid query with zero results will not raise any error. Examples: >>> query = "[[Category:my cat]]|[[Has name::a name]]|?Has property" >>> for answer in site.ask(query): >>> for title, data in answer.items() >>> print(title) >>> print(data) """ kwargs = {} # type: Dict[str, Any] if title is not None: kwargs['title'] = title offset = 0 # type: Optional[int] while offset is not None: results = self.raw_api('ask', query=f'{query}|offset={offset}', http_method='GET', **kwargs) self.handle_api_result(results) # raises APIError on error offset = cast(Optional[int], results.get('query-continue-offset')) answers = results['query'].get('results', []) if isinstance(answers, dict): # In older versions of Semantic MediaWiki (at least until 2.3.0) # a list was returned. In newer versions an object is returned # with the page title as key. answers = [answer for answer in answers.values()] yield from answers