Viewing file: ImportMediawiki.class.php (20.55 KB) -rw-rw-rw- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
<?php /* vim: set expandtab sw=4 ts=4 sts=4: */ /** * MediaWiki import plugin for phpMyAdmin * * @package PhpMyAdmin-Import * @subpackage MediaWiki */ if (! defined('PHPMYADMIN')) { exit; }
/* Get the import interface */ require_once 'libraries/plugins/ImportPlugin.class.php';
/** * Handles the import for the MediaWiki format * * @package PhpMyAdmin-Import * @subpackage MediaWiki */ class ImportMediawiki extends ImportPlugin { /** * Whether to analyze tables * * @var bool */ private $_analyze;
/** * Constructor */ public function __construct() { $this->setProperties(); }
/** * Sets the import plugin properties. * Called in the constructor. * * @return void */ protected function setProperties() { $this->_setAnalyze(false); if ($GLOBALS['plugin_param'] !== 'table') { $this->_setAnalyze(true); }
$props = 'libraries/properties/'; include_once "$props/plugins/ImportPluginProperties.class.php";
$importPluginProperties = new ImportPluginProperties(); $importPluginProperties->setText(__('MediaWiki Table')); $importPluginProperties->setExtension('txt'); $importPluginProperties->setMimeType('text/plain'); $importPluginProperties->setOptions(array()); $importPluginProperties->setOptionsText(__('Options'));
$this->properties = $importPluginProperties; }
/** * This method is called when any PluginManager to which the observer * is attached calls PluginManager::notify() * * @param SplSubject $subject The PluginManager notifying the observer * of an update. * * @return void */ public function update (SplSubject $subject) { }
/** * Handles the whole import logic * * @return void */ public function doImport() { global $error, $timeout_passed, $finished;
// Defaults for parser
// The buffer that will be used to store chunks read from the imported file $buffer = '';
// Used as storage for the last part of the current chunk data // Will be appended to the first line of the next chunk, if there is one $last_chunk_line = '';
// Remembers whether the current buffer line is part of a comment $inside_comment = false; // Remembers whether the current buffer line is part of a data comment $inside_data_comment = false; // Remembers whether the current buffer line is part of a structure comment $inside_structure_comment = false;
// MediaWiki only accepts "\n" as row terminator $mediawiki_new_line = "\n";
// Initialize the name of the current table $cur_table_name = "";
while (! $finished && ! $error && ! $timeout_passed ) { $data = PMA_importGetNextChunk();
if ($data === false) { // Subtract data we didn't handle yet and stop processing $GLOBALS['offset'] -= strlen($buffer); break; } elseif ($data === true) { // Handle rest of buffer } else { // Append new data to buffer $buffer = $data; unset($data); // Don't parse string if we're not at the end // and don't have a new line inside if ( strpos($buffer, $mediawiki_new_line) === false ) { continue; } }
// Because of reading chunk by chunk, the first line from the buffer // contains only a portion of an actual line from the imported file. // Therefore, we have to append it to the last line from the previous // chunk. If we are at the first chunk, $last_chunk_line should be empty. $buffer = $last_chunk_line . $buffer;
// Process the buffer line by line $buffer_lines = explode($mediawiki_new_line, $buffer);
$full_buffer_lines_count = count($buffer_lines); // If the reading is not finalised, the final line of the current chunk // will not be complete if (! $finished) { $full_buffer_lines_count -= 1; $last_chunk_line = $buffer_lines[$full_buffer_lines_count]; }
for ($line_nr = 0; $line_nr < $full_buffer_lines_count; ++ $line_nr) { $cur_buffer_line = trim($buffer_lines[$line_nr]);
// If the line is empty, go to the next one if ( $cur_buffer_line === '' ) { continue; }
$first_character = $cur_buffer_line[0]; $matches = array();
// Check beginnning of comment if (! strcmp(substr($cur_buffer_line, 0, 4), "<!--")) { $inside_comment = true; continue; } elseif ($inside_comment) { // Check end of comment if (! strcmp(substr($cur_buffer_line, 0, 4), "-->")) { // Only data comments are closed. The structure comments // will be closed when a data comment begins (in order to // skip structure tables) if ($inside_data_comment) { $inside_data_comment = false; }
// End comments that are not related to table structure if (! $inside_structure_comment) { $inside_comment = false; } } else { // Check table name $match_table_name = array(); if (preg_match( "/^Table data for `(.*)`$/", $cur_buffer_line, $match_table_name ) ) { $cur_table_name = $match_table_name[1]; $inside_data_comment = true;
// End ignoring structure rows if ($inside_structure_comment) { $inside_structure_comment = false; } } elseif (preg_match( "/^Table structure for `(.*)`$/", $cur_buffer_line, $match_table_name ) ) { // The structure comments will be ignored $inside_structure_comment = true; } } continue; } elseif (preg_match('/^\{\|(.*)$/', $cur_buffer_line, $matches)) { // Check start of table
// This will store all the column info on all rows from // the current table read from the buffer $cur_temp_table = array();
// Will be used as storage for the current row in the buffer // Once all its columns are read, it will be added to // $cur_temp_table and then it will be emptied $cur_temp_line = array();
// Helps us differentiate the header columns // from the normal columns $in_table_header = false; // End processing because the current line does not // contain any column information } elseif (substr($cur_buffer_line, 0, 2) === '|-' || substr($cur_buffer_line, 0, 2) === '|+' || substr($cur_buffer_line, 0, 2) === '|}' ) { // Check begin row or end table
// Add current line to the values storage if (! empty($cur_temp_line)) { // If the current line contains header cells // ( marked with '!' ), // it will be marked as table header if ( $in_table_header ) { // Set the header columns $cur_temp_table_headers = $cur_temp_line; } else { // Normal line, add it to the table $cur_temp_table [] = $cur_temp_line; } }
// Empty the temporary buffer $cur_temp_line = array();
// No more processing required at the end of the table if (substr($cur_buffer_line, 0, 2) === '|}') { $current_table = array( $cur_table_name, $cur_temp_table_headers, $cur_temp_table );
// Import the current table data into the database $this->_importDataOneTable($current_table);
// Reset table name $cur_table_name = ""; } // What's after the row tag is now only attributes
} elseif (($first_character === '|') || ($first_character === '!')) { // Check cell elements
// Header cells if ($first_character === '!') { // Mark as table header, but treat as normal row $cur_buffer_line = str_replace('!!', '||', $cur_buffer_line); // Will be used to set $cur_temp_line as table header $in_table_header = true; } else { $in_table_header = false; }
// Loop through each table cell $cells = $this->_explodeMarkup($cur_buffer_line); foreach ($cells as $cell) { // A cell could contain both parameters and data $cell_data = explode('|', $cell, 2);
// A '|' inside an invalid link should not // be mistaken as delimiting cell parameters if (strpos($cell_data[0], '[[') === true ) { if (count($cell_data) == 1) { $cell = $cell_data[0]; } else { $cell = $cell_data[1]; } }
// Delete the beginning of the column, if there is one $cell = trim($cell); $col_start_chars = array( "|", "!"); foreach ($col_start_chars as $col_start_char) { if (strpos($cell, $col_start_char) === 0) { $cell = trim(substr($cell, 1)); } }
// Add the cell to the row $cur_temp_line [] = $cell; } // foreach $cells } else { // If it's none of the above, then the current line has a bad // format $message = PMA_Message::error( __('Invalid format of mediawiki input on line: <br />%s.') ); $message->addParam($cur_buffer_line); $error = true; } } // End treating full buffer lines } // while - finished parsing buffer }
/** * Imports data from a single table * * @param array $table containing all table info: * <code> * $table[0] - string containing table name * $table[1] - array[] of table headers * $table[2] - array[][] of table content rows * </code> * * @global bool $analyze whether to scan for column types * * @return void */ private function _importDataOneTable ($table) { $analyze = $this->_getAnalyze(); if ($analyze) { // Set the table name $this->_setTableName($table[0]);
// Set generic names for table headers if they don't exist $this->_setTableHeaders($table[1], $table[2][0]);
// Create the tables array to be used in PMA_buildSQL() $tables = array(); $tables [] = array($table[0], $table[1], $table[2]);
// Obtain the best-fit MySQL types for each column $analyses = array(); $analyses [] = PMA_analyzeTable($tables[0]);
$this->_executeImportTables($tables, $analyses); }
// Commit any possible data in buffers PMA_importRunQuery(); }
/** * Sets the table name * * @param string &$table_name reference to the name of the table * * @return void */ private function _setTableName(&$table_name) { if (empty($table_name)) { $result = $GLOBALS['dbi']->fetchResult('SHOW TABLES'); // todo check if the name below already exists $table_name = 'TABLE ' . (count($result) + 1); } }
/** * Set generic names for table headers, if they don't exist * * @param array &$table_headers reference to the array containing the headers * of a table * @param array $table_row array containing the first content row * * @return void */ private function _setTableHeaders(&$table_headers, $table_row) { if (empty($table_headers)) { // The first table row should contain the number of columns // If they are not set, generic names will be given (COL 1, COL 2, etc) $num_cols = count($table_row); for ($i = 0; $i < $num_cols; ++ $i) { $table_headers [$i] = 'COL ' . ($i + 1); } } }
/** * Sets the database name and additional options and calls PMA_buildSQL() * Used in PMA_importDataAllTables() and $this->_importDataOneTable() * * @param array &$tables structure: * array( * array(table_name, array() column_names, array()() rows) * ) * @param array &$analyses structure: * $analyses = array( * array(array() column_types, array() column_sizes) * ) * * @global string $db name of the database to import in * * @return void */ private function _executeImportTables(&$tables, &$analyses) { global $db;
// $db_name : The currently selected database name, if applicable // No backquotes // $options : An associative array of options if (strlen($db)) { $db_name = $db; $options = array('create_db' => false); } else { $db_name = 'mediawiki_DB'; $options = null; }
// Array of SQL strings // Non-applicable parameters $create = null;
// Create and execute necessary SQL statements from data PMA_buildSQL($db_name, $tables, $analyses, $create, $options);
unset($tables); unset($analyses); }
/** * Replaces all instances of the '||' separator between delimiters * in a given string * * @param string $start_delim start delimiter * @param string $end_delim end delimiter * @param string $replace the string to be replaced with * @param string $subject the text to be replaced * * @return string with replacements */ private function _delimiterReplace($start_delim, $end_delim, $replace, $subject) { // String that will be returned $cleaned = ""; // Possible states of current character $inside_tag = false; $inside_attribute = false; // Attributes can be declared with either " or ' $start_attribute_character = false;
// The full separator is "||"; // This rembembers if the previous character was '|' $partial_separator = false;
// Parse text char by char for ($i = 0; $i < strlen($subject); $i ++) { $cur_char = $subject[$i]; // Check for separators if ($cur_char == '|') { // If we're not inside a tag, then this is part of a real separator, // so we append it to the current segment if (! $inside_attribute) { $cleaned .= $cur_char; if ($partial_separator) { $inside_tag = false; $inside_attribute = false; } } elseif ($partial_separator) { // If we are inside a tag, we replace the current char with // the placeholder and append that to the current segment $cleaned .= $replace; }
// If the previous character was also '|', then this ends a // full separator. If not, this may be the beginning of one $partial_separator = ! $partial_separator; } else { // If we're inside a tag attribute and the current character is // not '|', but the previous one was, it means that the single '|' // was not appended, so we append it now if ($partial_separator && $inside_attribute) { $cleaned .= "|"; } // If the char is different from "|", no separator can be formed $partial_separator = false;
// any other character should be appended to the current segment $cleaned .= $cur_char;
if ($cur_char == '<' && ! $inside_attribute) { // start of a tag $inside_tag = true; } elseif ($cur_char == '>' && ! $inside_attribute) { // end of a tag $inside_tag = false; } elseif (($cur_char == '"' || $cur_char == "'") && $inside_tag) { // start or end of an attribute if (! $inside_attribute) { $inside_attribute = true; // remember the attribute`s declaration character (" or ') $start_attribute_character = $cur_char; } else { if ($cur_char == $start_attribute_character) { $inside_attribute = false; // unset attribute declaration character $start_attribute_character = false; } } } } } // end for each character in $subject
return $cleaned; }
/** * Separates a string into items, similarly to explode * Uses the '||' separator (which is standard in the mediawiki format) * and ignores any instances of it inside markup tags * Used in parsing buffer lines containing data cells * * @param string $text text to be split * * @return array */ private function _explodeMarkup($text) { $separator = "||"; $placeholder = "\x00";
// Remove placeholder instances $text = str_replace($placeholder, '', $text);
// Replace instances of the separator inside HTML-like // tags with the placeholder $cleaned = $this->_delimiterReplace("<", ">", $placeholder, $text); // Explode, then put the replaced separators back in $items = explode($separator, $cleaned); foreach ($items as $i => $str) { $items[$i] = str_replace($placeholder, $separator, $str); }
return $items; }
/* ~~~~~~~~~~~~~~~~~~~~ Getters and Setters ~~~~~~~~~~~~~~~~~~~~ */
/** * Returns true if the table should be analyzed, false otherwise * * @return bool */ private function _getAnalyze() { return $this->_analyze; }
/** * Sets to true if the table should be analyzed, false otherwise * * @param bool $analyze status * * @return void */ private function _setAnalyze($analyze) { $this->_analyze = $analyze; } }
|