Index: trunk/parsers.php =================================================================== --- trunk/parsers.php (revision 0) +++ trunk/parsers.php (revision 0) @@ -0,0 +1,559 @@ +parse( $file ); + } +} + +/** + * WXR Parser that makes use of the SimpleXML PHP extension + */ +class WXR_Parser_SimpleXML { + function parse( $file ) { + $authors = $posts = $categories = $tags = $terms = array(); + + $internal_errors = libxml_use_internal_errors(true); + $xml = simplexml_load_file( $file ); + // halt if loading produces an error + if ( ! $xml ) + return new WP_Error( 'WXR_parse_error', __( 'There was an error when reading this WXR file', 'wordpress-importer' ) ); + + $wxr_version = $xml->xpath('/rss/channel/wp:wxr_version'); + if ( ! $wxr_version ) + return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) ); + + $wxr_version = (string) trim( $wxr_version[0] ); + // confirm that we are dealing with the correct file format + if ( ! preg_match( '/^\d\.\d$/', $wxr_version ) ) + return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) ); + + $base_url = $xml->xpath('/rss/channel/wp:base_site_url'); + $base_url = (string) trim( $base_url[0] ); + + $namespaces = $xml->getDocNamespaces(); + if ( ! isset( $namespaces['wp'] ) ) + $namespaces['wp'] = 'http://wordpress.org/export/1.1/'; + if ( ! isset( $namespaces['excerpt'] ) ) + $namespaces['excerpt'] = 'http://wordpress.org/export/1.1/excerpt/'; + + // grab authors + foreach ( $xml->xpath('/rss/channel/wp:author') as $author_arr ) { + $a = $author_arr->children( $namespaces['wp'] ); + $login = (string) $a->author_login; + $authors[$login] = array( + 'author_login' => $login, + 'author_email' => (string) $a->author_email, + 'author_display_name' => (string) $a->author_display_name, + 'author_first_name' => (string) $a->author_first_name, + 'author_last_name' => (string) $a->author_last_name + ); + } + + // grab cats, tags and terms + foreach ( $xml->xpath('/rss/channel/wp:category') as $term_arr ) { + $t = $term_arr->children( $namespaces['wp'] ); + $categories[] = array( + 'term_id' => (int) $t->term_id, + 'category_nicename' => (string) $t->category_nicename, + 'category_parent' => (string) $t->category_parent, + 'cat_name' => (string) $t->cat_name, + 'category_description' => (string) $t->category_description + ); + } + + foreach ( $xml->xpath('/rss/channel/wp:tag') as $term_arr ) { + $t = $term_arr->children( $namespaces['wp'] ); + $tags[] = array( + 'term_id' => (int) $t->term_id, + 'tag_slug' => (string) $t->tag_slug, + 'tag_name' => (string) $t->tag_name, + 'tag_description' => (string) $t->tag_description + ); + } + + foreach ( $xml->xpath('/rss/channel/wp:term') as $term_arr ) { + $t = $term_arr->children( $namespaces['wp'] ); + $terms[] = array( + 'term_id' => (int) $t->term_id, + 'term_taxonomy' => (string) $t->term_taxonomy, + 'slug' => (string) $t->term_slug, + 'term_parent' => (string) $t->term_parent, + 'term_name' => (string) $t->term_name, + 'term_description' => (string) $t->term_description + ); + } + + // grab posts + foreach ( $xml->channel->item as $item ) { + $post = array( + 'post_title' => (string) $item->title, + 'guid' => (string) $item->guid, + ); + + $dc = $item->children( 'http://purl.org/dc/elements/1.1/' ); + $post['post_author'] = (string) $dc->creator; + + $content = $item->children( 'http://purl.org/rss/1.0/modules/content/' ); + $excerpt = $item->children( $namespaces['excerpt'] ); + $post['post_content'] = (string) $content->encoded; + $post['post_excerpt'] = (string) $excerpt->encoded; + + $wp = $item->children( $namespaces['wp'] ); + $post['post_id'] = (int) $wp->post_id; + $post['post_date'] = (string) $wp->post_date; + $post['post_date_gmt'] = (string) $wp->post_date_gmt; + $post['comment_status'] = (string) $wp->comment_status; + $post['ping_status'] = (string) $wp->ping_status; + $post['post_name'] = (string) $wp->post_name; + $post['status'] = (string) $wp->status; + $post['post_parent'] = (int) $wp->post_parent; + $post['menu_order'] = (int) $wp->menu_order; + $post['post_type'] = (string) $wp->post_type; + $post['post_password'] = (string) $wp->post_password; + $post['is_sticky'] = (int) $wp->is_sticky; + + foreach ( $item->category as $c ) { + $att = $c->attributes(); + if ( isset( $att['nicename'] ) ) + $post['terms'][] = array( + 'name' => (string) $c, + 'slug' => (string) $att['nicename'], + 'domain' => (string) $att['domain'] + ); + } + + foreach ( $wp->postmeta as $meta ) { + $post['postmeta'][] = array( + 'key' => (string) $meta->meta_key, + 'value' => (string) $meta->meta_value, + ); + } + + foreach ( $wp->comment as $comment ) { + $post['comments'][] = array( + 'comment_id' => (int) $comment->comment_id, + 'comment_author' => (string) $comment->comment_author, + 'comment_author_email' => (string) $comment->comment_author_email, + 'comment_author_IP' => (string) $comment->comment_author_IP, + 'comment_author_url' => (string) $comment->comment_author_url, + 'comment_date' => (string) $comment->comment_date, + 'comment_date_gmt' => (string) $comment->comment_date_gmt, + 'comment_content' => (string) $comment->comment_content, + 'comment_approved' => (string) $comment->comment_approved, + 'comment_type' => (string) $comment->comment_type, + 'comment_parent' => (string) $comment->comment_parent, + 'comment_user_id' => (int) $comment->comment_user_id, + ); + } + + $posts[] = $post; + } + + return array( + 'authors' => $authors, + 'posts' => $posts, + 'categories' => $categories, + 'tags' => $tags, + 'terms' => $terms, + 'base_url' => $base_url + ); + } +} + +/** + * WXR Parser that makes use of the XML Parser PHP extension + * + * @todo wxr checking + */ +class WXR_Parser_XML { + var $wp_tags = array( + 'wp:post_id', 'wp:post_date', 'wp:post_date_gmt', 'wp:comment_status', 'wp:ping_status', + 'wp:status', 'wp:post_name', 'wp:post_parent', 'wp:menu_order', 'wp:post_type', 'wp:post_password', + 'wp:is_sticky', 'wp:term_id', 'wp:category_nicename', 'wp:category_parent', 'wp:cat_name', 'wp:category_description', + 'wp:tag_slug', 'wp:tag_name', 'wp:tag_description', 'wp:term_taxonomy', 'wp:term_parent', + 'wp:term_name', 'wp:term_description', 'wp:author_login', 'wp:author_email', 'wp:author_display_name', + 'wp:author_first_name', 'wp:author_last_name', + ); + var $wp_sub_tags = array( + 'wp:comment_id', 'wp:comment_author', 'wp:comment_author_email', 'wp:comment_author_url', + 'wp:comment_author_IP', 'wp:comment_date', 'wp:comment_date_gmt', 'wp:comment_content', + 'wp:comment_approved', 'wp:comment_type', 'wp:comment_parent', 'wp:comment_user_id', + ); + + function parse( $file ) { + $this->is_wxr_file = $this->in_post = $this->cdata = $this->data = $this->sub_data = $this->in_tag = $this->in_sub_tag = false; + $this->authors = $this->posts = $this->term = $this->category = $this->tag = array(); + + $xml = xml_parser_create( 'UTF-8' ); + xml_parser_set_option( $xml, XML_OPTION_SKIP_WHITE, 1 ); + xml_parser_set_option( $xml, XML_OPTION_CASE_FOLDING, 0 ); + xml_set_object( $xml, $this ); + xml_set_character_data_handler( $xml, 'cdata' ); + xml_set_element_handler( $xml, 'tag_open', 'tag_close' ); + + if ( ! xml_parse( $xml, file_get_contents( $file ), true ) ) { + $error_code = xml_get_error_code( $xml ); + $error_string = xml_error_string( $error_code ); + return new WP_Error( 'WXR_parse_error', 'There was an error when reading this WXR file', array( $error_code, $error_string ) ); + } + xml_parser_free( $xml ); + + if ( ! $this->is_wxr_file ) + return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) ); + + return array( + 'authors' => $this->authors, + 'posts' => $this->posts, + 'categories' => $this->category, + 'tags' => $this->tag, + 'terms' => $this->term, + 'base_url' => $this->base_url + ); + } + + function tag_open( $parse, $tag, $attr ) { + if ( in_array( $tag, $this->wp_tags ) ) { + $this->in_tag = substr( $tag, 3 ); + return; + } + + if ( in_array( $tag, $this->wp_sub_tags ) ) { + $this->in_sub_tag = substr( $tag, 3 ); + return; + } + + switch ( $tag ) { + case 'category': + if ( isset($attr['domain'], $attr['nicename']) ) { + $this->sub_data['domain'] = $attr['domain']; + $this->sub_data['slug'] = $attr['nicename']; + } + break; + case 'item': $this->in_post = true; + case 'title': if ( $this->in_post ) $this->in_tag = 'post_title'; break; + case 'guid': $this->in_tag = 'guid'; break; + case 'dc:creator': $this->in_tag = 'post_author'; break; + case 'content:encoded': $this->in_tag = 'post_content'; break; + case 'excerpt:encoded': $this->in_tag = 'post_excerpt'; break; + + case 'wp:term_slug': $this->in_tag = 'slug'; break; + case 'wp:meta_key': $this->in_sub_tag = 'key'; break; + case 'wp:meta_value': $this->in_sub_tag = 'value'; break; + } + } + + function cdata( $parser, $cdata ) { + if ( ! trim( $cdata ) ) + return; + + $this->cdata .= trim( $cdata ); + } + + function tag_close( $parser, $tag ) { + switch ( $tag ) { + case 'wp:comment': + if ( ! empty( $this->sub_data ) ) + $this->data['comments'][] = $this->sub_data; + $this->sub_data = false; + break; + case 'category': + if ( ! empty( $this->sub_data ) ) { + $this->sub_data['name'] = $this->cdata; + $this->data['terms'][] = $this->sub_data; + } + $this->sub_data = false; + break; + case 'wp:postmeta': + if ( ! empty( $this->sub_data ) ) + $this->data['postmeta'][] = $this->sub_data; + $this->sub_data = false; + break; + case 'item': + $this->posts[] = $this->data; + $this->data = false; + break; + case 'wp:category': + case 'wp:tag': + case 'wp:term': + $n = substr( $tag, 3 ); + array_push( $this->$n, $this->data ); + $this->data = false; + break; + case 'wp:author': + if ( ! empty($this->data['author_login']) ) + $this->authors[$this->data['author_login']] = $this->data; + $this->data = false; + break; + case 'wp:base_site_url': + $this->base_url = $this->cdata; + break; + case 'wp:wxr_version': + $this->is_wxr_file = preg_match( '/\d+\.\d+/', $this->cdata ); + break; + + default: + if ( $this->in_sub_tag ) { + $this->sub_data[$this->in_sub_tag] = ! empty( $this->cdata ) ? $this->cdata : ''; + $this->in_sub_tag = false; + } else if ( $this->in_tag ) { + $this->data[$this->in_tag] = ! empty( $this->cdata ) ? $this->cdata : ''; + $this->in_tag = false; + } + } + + $this->cdata = false; + } +} + +class WXR_Parser_Regex { + function WXR_Parser_Regex() { + $this->__construct(); + } + + function __construct() { + $this->has_gzip = is_callable( 'gzopen' ); + } + + function parse( $file ) { + $is_wxr = $in_post = false; + + $fp = $this->fopen( $file, 'r' ); + if ( $fp ) { + while ( ! $this->feof( $fp ) ) { + $importline = rtrim( $this->fgets( $fp ) ); + + if ( ! $is_wxr && preg_match( '|\d+\.\d+|', $importline ) ) + $is_wxr = true; + + if ( false !== strpos( $importline, '' ) ) { + preg_match( '|(.*?)|is', $importline, $url ); + $this->base_url = $url[1]; //esc_url (?) + continue; + } + if ( false !== strpos( $importline, '' ) ) { + preg_match( '|(.*?)|is', $importline, $category ); + $this->categories[] = $this->process_category( $category[1] ); + continue; + } + if ( false !== strpos( $importline, '' ) ) { + preg_match( '|(.*?)|is', $importline, $tag ); + $this->tags[] = $this->process_tag( $tag[1] ); + continue; + } + if ( false !== strpos( $importline, '' ) ) { + preg_match( '|(.*?)|is', $importline, $term ); + $this->terms[] = $this->process_term( $term[1] ); + continue; + } + if ( false !== strpos( $importline, '' ) ) { + preg_match( '|(.*?)|is', $importline, $author ); + $a = $this->process_author( $author[1] ); + $this->authors[$a['author_login']] = $a; + continue; + } + if ( false !== strpos( $importline, '' ) ) { + $post = ''; + $in_post = true; + continue; + } + if ( false !== strpos( $importline, '' ) ) { + $in_post = false; + $this->posts[] = $this->process_post( $post ); + continue; + } + if ( $in_post ) { + $post .= $importline . "\n"; + } + } + + $this->fclose($fp); + } + + if ( ! $is_wxr ) + return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) ); + + return array( + 'authors' => $this->authors, + 'posts' => $this->posts, + 'categories' => $this->categories, + 'tags' => $this->tags, + 'terms' => $this->terms, + 'base_url' => $this->base_url + ); + } + + function get_tag( $string, $tag ) { + global $wpdb; + preg_match( "|<$tag.*?>(.*?)|is", $string, $return ); + if ( isset( $return[1] ) ) { + $return = preg_replace( '|^$|s', '$1', $return[1] ); + $return = $wpdb->escape( trim( $return ) ); + } else { + $return = ''; + } + return $return; + } + + function process_category( $c ) { + return array( + 'term_id' => $this->get_tag( $c, 'wp:term_id' ), + 'cat_name' => $this->get_tag( $c, 'wp:cat_name' ), + 'category_nicename' => $this->get_tag( $c, 'wp:category_nicename' ), + 'category_parent' => $this->get_tag( $c, 'wp:category_parent' ), + 'category_description' => $this->get_tag( $c, 'wp:category_description' ), + ); + } + + function process_tag( $t ) { + return array( + 'term_id' => $this->get_tag( $t, 'wp:term_id' ), + 'tag_name' => $this->get_tag( $t, 'wp:tag_name' ), + 'tag_slug' => $this->get_tag( $t, 'wp:tag_slug' ), + 'tag_description' => $this->get_tag( $t, 'wp:tag_description' ), + ); + } + + function process_term( $t ) { + return array( + 'term_id' => $this->get_tag( $t, 'wp:term_id' ), + 'term_taxonomy' => $this->get_tag( $t, 'wp:term_taxonomy' ), + 'slug' => $this->get_tag( $t, 'wp:term_slug' ), + 'term_parent' => $this->get_tag( $t, 'wp:term_parent' ), + 'term_name' => $this->get_tag( $t, 'wp:term_name' ), + 'term_description' => $this->get_tag( $t, 'wp:term_description' ), + ); + } + + function process_author( $a ) { + return array( + 'author_login' => $this->get_tag( $a, 'wp:author_login' ), + 'author_email' => $this->get_tag( $a, 'wp:author_email' ), + 'author_display_name' => $this->get_tag( $a, 'wp:author_display_name' ), + 'author_first_name' => $this->get_tag( $a, 'wp:author_first_name' ), + 'author_last_name' => $this->get_tag( $a, 'wp:author_last_name' ), + ); + } + + function process_post( $post ) { + $post_id = $this->get_tag( $post, 'wp:post_id' ); + $post_title = $this->get_tag( $post, 'title' ); + $post_date = $this->get_tag( $post, 'wp:post_date' ); + $post_date_gmt = $this->get_tag( $post, 'wp:post_date_gmt' ); + $comment_status = $this->get_tag( $post, 'wp:comment_status' ); + $ping_status = $this->get_tag( $post, 'wp:ping_status' ); + $status = $this->get_tag( $post, 'wp:status' ); + $post_name = $this->get_tag( $post, 'wp:post_name' ); + $post_parent = $this->get_tag( $post, 'wp:post_parent' ); + $menu_order = $this->get_tag( $post, 'wp:menu_order' ); + $post_type = $this->get_tag( $post, 'wp:post_type' ); + $post_password = $this->get_tag( $post, 'wp:post_password' ); + $is_sticky = $this->get_tag( $post, 'wp:is_sticky' ); + $guid = $this->get_tag( $post, 'guid' ); + $post_author = $this->get_tag( $post, 'dc:creator' ); + + $post_excerpt = $this->get_tag( $post, 'excerpt:encoded' ); + $post_excerpt = preg_replace_callback( '|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_excerpt ); + $post_excerpt = str_replace( '
', '
', $post_excerpt ); + $post_excerpt = str_replace( '
', '
', $post_excerpt ); + + $post_content = $this->get_tag( $post, 'content:encoded' ); + $post_content = preg_replace_callback( '|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_content ); + $post_content = str_replace( '
', '
', $post_content ); + $post_content = str_replace( '
', '
', $post_content ); + + $postdata = compact( 'post_id', 'post_author', 'post_date', 'post_date_gmt', 'post_content', 'post_excerpt', + 'post_title', 'status', 'post_name', 'comment_status', 'ping_status', 'guid', 'post_parent', + 'menu_order', 'post_type', 'post_password', 'is_sticky' + ); + + preg_match_all( '|(.+?)|is', $post, $terms, PREG_SET_ORDER ); + foreach ( $terms as $t ) { + $post_terms[] = array( + 'slug' => $t[2], + 'domain' => $t[1], + 'name' => str_replace( array( '' ), '', $t[3] ), + ); + } + if ( ! empty( $post_terms ) ) $postdata['terms'] = $post_terms; + + preg_match_all( '|(.+?)|is', $post, $comments ); + $comments = $comments[1]; + if ( $comments ) { + foreach ( $comments as $comment ) { + $post_comments[] = array( + 'comment_id' => $this->get_tag( $comment, 'wp:comment_id' ), + 'comment_author' => $this->get_tag( $comment, 'wp:comment_author' ), + 'comment_author_email' => $this->get_tag( $comment, 'wp:comment_author_email' ), + 'comment_author_IP' => $this->get_tag( $comment, 'wp:comment_author_IP' ), + 'comment_author_url' => $this->get_tag( $comment, 'wp:comment_author_url' ), + 'comment_date' => $this->get_tag( $comment, 'wp:comment_date' ), + 'comment_date_gmt' => $this->get_tag( $comment, 'wp:comment_date_gmt' ), + 'comment_content' => $this->get_tag( $comment, 'wp:comment_content' ), + 'comment_approved' => $this->get_tag( $comment, 'wp:comment_approved' ), + 'comment_type' => $this->get_tag( $comment, 'wp:comment_type' ), + 'comment_parent' => $this->get_tag( $comment, 'wp:comment_parent' ), + ); + } + } + if ( ! empty( $post_comments ) ) $postdata['comments'] = $post_comments; + + preg_match_all( '|(.+?)|is', $post, $postmeta ); + $postmeta = $postmeta[1]; + if ( $postmeta) { + foreach ( $postmeta as $p ) { + $post_postmeta[] = array( + 'key' => $this->get_tag( $p, 'wp:meta_key' ), + 'value' => $this->get_tag( $p, 'wp:meta_value' ), + ); + } + } + if ( ! empty( $post_postmeta ) ) $postdata['postmeta'] = $post_postmeta; + + return $postdata; + } + + function _normalize_tag( $matches ) { + return '<' . strtolower( $matches[1] ); + } + + function fopen( $filename, $mode = 'r' ) { + if ( $this->has_gzip ) + return gzopen( $filename, $mode ); + return fopen( $filename, $mode ); + } + + function feof( $fp ) { + if ( $this->has_gzip ) + return gzeof( $fp ); + return feof( $fp ); + } + + function fgets( $fp, $len = 8192 ) { + if ( $this->has_gzip ) + return gzgets( $fp, $len ); + return fgets( $fp, $len ); + } + + function fclose( $fp ) { + if ( $this->has_gzip ) + return gzclose( $fp ); + return fclose( $fp ); + } +} Index: trunk/readme.txt =================================================================== --- trunk/readme.txt (revision 303281) +++ trunk/readme.txt (working copy) @@ -3,26 +3,38 @@ Donate link: Tags: importer, wordpress Requires at least: 3.0 -Tested up to: 3.0 +Tested up to: 3.0.1 Stable tag: 0.2 -Import posts, pages, comments, custom fields, categories, and tags from a WordPress export file. +Import posts, pages, comments, custom fields, categories, tags and more from a WordPress export file. == Description == -Import posts, pages, comments, custom fields, categories, and tags from a WordPress export file. +Import posts, pages, comments, custom fields, categories, tags and more from a WordPress export file. == Installation == 1. Upload the `wordpress-importer` folder to the `/wp-content/plugins/` directory 1. Activate the plugin through the 'Plugins' menu in WordPress -1. Go to the Tools -> Import screen, Click on WordPress +1. Go to the Tools -> Import screen, click on WordPress -== Frequently Asked Questions == +== Changelog == -== Screenshots == += 0.3 = +* Use an XML Parser if possible +* Proper import support for nav menus +* ... and more -== Changelog == - = 0.1 = * Initial release + +== Upgrade Notice == + += 0.3 = +Upgrade for a more robust and reliable experience when importing WordPress export file. + +== Filters == + +The importer has a couple of filters to allow you to completely enable/block certain features: +* `import_allow_create_users`: return false if you only want to allow mapping to existing users +* `import_allow_fetch_attachments`: return false if you do not wish to allow importing and downloading of attachments Index: trunk/wordpress-importer.php =================================================================== --- trunk/wordpress-importer.php (revision 303281) +++ trunk/wordpress-importer.php (working copy) @@ -2,733 +2,603 @@ /* Plugin Name: WordPress Importer Plugin URI: http://wordpress.org/extend/plugins/wordpress-importer/ -Description: Import posts, pages, comments, custom fields, categories, and tags from a WordPress export file. +Description: Import posts, pages, comments, custom fields, categories, tags and more from a WordPress export file. Author: wordpressdotorg Author URI: http://wordpress.org/ -Version: 0.2 -Stable tag: 0.2 +Version: 0.3 License: GPL v2 - http://www.gnu.org/licenses/old-licenses/gpl-2.0.html */ -if ( !defined('WP_LOAD_IMPORTERS') ) +if ( ! defined( 'WP_LOAD_IMPORTERS' ) ) return; // Load Importer API require_once ABSPATH . 'wp-admin/includes/import.php'; -if ( !class_exists( 'WP_Importer' ) ) { +if ( ! class_exists( 'WP_Importer' ) ) { $class_wp_importer = ABSPATH . 'wp-admin/includes/class-wp-importer.php'; if ( file_exists( $class_wp_importer ) ) - require_once $class_wp_importer; + require $class_wp_importer; } +// include WXR file parsers +require dirname( __FILE__ ) . '/parsers.php'; + /** - * WordPress Importer + * WordPress Importer class for managing the import process of a WXR file * * @package WordPress * @subpackage Importer */ if ( class_exists( 'WP_Importer' ) ) { class WP_Import extends WP_Importer { - - var $post_ids_processed = array (); - var $orphans = array (); - var $file; var $id; - var $mtnames = array (); - var $newauthornames = array (); - var $allauthornames = array (); - var $author_ids = array (); - var $tags = array (); - var $categories = array (); - var $terms = array (); - var $authors = array (); + var $authors = array(); + var $posts = array(); + var $terms = array(); + var $categories = array(); + var $tags = array(); + var $base_url = ''; - var $j = -1; + var $processed_authors = array(); + var $processed_terms = array(); + var $processed_posts = array(); + var $post_orphans = array(); + var $processed_menu_items = array(); + var $menu_item_orphans = array(); + var $missing_menu_items = array(); + var $fetch_attachments = false; - var $url_remap = array (); + var $url_remap = array(); - function header() { - echo '
'; - screen_icon(); - echo '

'.__('Import WordPress', 'wordpress-importer').'

'; - } + function WP_Import() { /* nothing */ } - function footer() { - echo '
'; + function dispatch() { + $this->header(); + + $step = empty( $_GET['step'] ) ? 0 : (int) $_GET['step']; + switch ( $step ) { + case 0: + $this->greet(); + break; + case 1: + check_admin_referer( 'import-upload' ); + if ( $this->handle_upload() ) + $this->import_options(); + break; + case 2: + check_admin_referer( 'import-wordpress' ); + $this->fetch_attachments = ( ! empty( $_POST['fetch_attachments'] ) && $this->allow_fetch_attachments() ); + $this->id = (int) $_POST['import_id']; + $file = get_attached_file( $this->id ); + $this->import( $file ); + break; + } + + $this->footer(); } - function greet() { - echo '
'; - echo '

'.__('Howdy! Upload your WordPress eXtended RSS (WXR) file and we’ll import the posts, pages, comments, custom fields, categories, and tags into this site.', 'wordpress-importer').'

'; - echo '

'.__('Choose a WordPress WXR file to upload, then click Upload file and import.', 'wordpress-importer').'

'; - wp_import_upload_form("admin.php?import=wordpress&step=1"); - echo '
'; - } + function import( $file ) { + add_filter( 'import_post_meta_key', array( $this, 'is_valid_meta_key' ) ); - function get_tag( $string, $tag ) { - global $wpdb; - preg_match("|<$tag.*?>(.*?)|is", $string, $return); - if ( isset($return[1]) ) { - $return = preg_replace('|^$|s', '$1', $return[1]); - $return = $wpdb->escape( trim( $return ) ); - } else { - $return = ''; - } - return $return; - } + $this->import_start( $file ); - function has_gzip() { - return is_callable('gzopen'); - } + $this->get_author_mapping(); - function fopen($filename, $mode='r') { - if ( $this->has_gzip() ) - return gzopen($filename, $mode); - return fopen($filename, $mode); - } + wp_suspend_cache_invalidation( true ); + $this->process_categories(); + $this->process_tags(); + $this->process_terms(); + $this->process_posts(); + wp_suspend_cache_invalidation( false ); - function feof($fp) { - if ( $this->has_gzip() ) - return gzeof($fp); - return feof($fp); - } + // update items with missing/incorrect parent IDs + $this->backfill_parents(); + // update attachment references within posts and postmeta + $this->backfill_attachment_urls(); - function fgets($fp, $len=8192) { - if ( $this->has_gzip() ) - return gzgets($fp, $len); - return fgets($fp, $len); + $this->import_end(); } - function fclose($fp) { - if ( $this->has_gzip() ) - return gzclose($fp); - return fclose($fp); - } - - function get_entries($process_post_func=NULL) { - set_magic_quotes_runtime(0); - - $doing_entry = false; - $is_wxr_file = false; - - $fp = $this->fopen($this->file, 'r'); - if ($fp) { - while ( !$this->feof($fp) ) { - $importline = rtrim($this->fgets($fp)); - - // this doesn't check that the file is perfectly valid but will at least confirm that it's not the wrong format altogether - if ( !$is_wxr_file && preg_match('|xmlns:wp="http://wordpress[.]org/export/\d+[.]\d+/"|', $importline) ) - $is_wxr_file = true; - - if ( false !== strpos($importline, '') ) { - preg_match('|(.*?)|is', $importline, $url); - $this->base_url = $url[1]; - continue; - } - if ( false !== strpos($importline, '') ) { - preg_match('|(.*?)|is', $importline, $category); - $this->categories[] = $category[1]; - continue; - } - if ( false !== strpos($importline, '') ) { - preg_match('|(.*?)|is', $importline, $tag); - $this->tags[] = $tag[1]; - continue; - } - if ( false !== strpos($importline, '') ) { - preg_match('|(.*?)|is', $importline, $term); - $this->terms[] = $term[1]; - continue; - } - if ( false !== strpos($importline, '') ) { - preg_match('|(.*?)|is', $importline, $author); - $this->authors[] = $author[1]; - continue; - } - if ( false !== strpos($importline, '') ) { - $this->post = ''; - $doing_entry = true; - continue; - } - if ( false !== strpos($importline, '') ) { - $doing_entry = false; - if ($process_post_func) - call_user_func($process_post_func, $this->post); - continue; - } - if ( $doing_entry ) { - $this->post .= $importline . "\n"; - } - } - - $this->fclose($fp); + function import_start( $file ) { + $import_arr = $this->parse( $file ); + + if ( is_wp_error( $import_arr ) ) { + echo '

' . __( 'Sorry, there has been an error.', 'wordpress-importer' ) . '

'; + echo '

' . esc_html( $import_arr->get_error_message() ) . '

'; + $this->footer(); + die(); } + + $this->authors = $import_arr['authors']; + $this->posts = $import_arr['posts']; + $this->terms = $import_arr['terms']; + $this->categories = $import_arr['categories']; + $this->tags = $import_arr['tags']; + $this->base_url = esc_url( $import_arr['base_url'] ); - return $is_wxr_file; + wp_defer_term_counting( true ); + wp_defer_comment_counting( true ); + do_action( 'import_start' ); } - function get_wp_authors() { - // We need to find unique values of author names, while preserving the order, so this function emulates the unique_value(); php function, without the sorting. - $temp = $this->allauthornames; - $authors[0] = array_shift($temp); - $y = count($temp) + 1; - for ($x = 1; $x < $y; $x ++) { - $next = array_shift($temp); - if (!(in_array($next, $authors))) - array_push($authors, $next); + function import_end() { + wp_import_cleanup( $this->id ); + + wp_cache_flush(); + foreach ( get_taxonomies() as $tax ) { + delete_option( "{$tax}_children" ); + _get_term_hierarchy( $tax ); } + + wp_defer_term_counting( false ); + wp_defer_comment_counting( false ); + + echo '

' . __( 'All done.' ) . ' ' . __( 'Have fun!' ) . '' . '

'; - return $authors; + do_action( 'import_end' ); } - function get_authors_from_post() { - global $current_user; + function handle_upload() { + $file = wp_import_handle_upload(); - // this will populate $this->author_ids with a list of author_names => user_ids + if ( isset( $file['error'] ) ) { + echo '

' . __( 'Sorry, there has been an error.', 'wordpress-importer' ) . '

'; + echo '

' . esc_html( $file['error'] ) . '

'; + return false; + } - foreach ( (array) $_POST['author_in'] as $i => $in_author_name ) { - - if ( !empty($_POST['user_select'][$i]) ) { - // an existing user was selected in the dropdown list - $user = get_userdata( intval($_POST['user_select'][$i]) ); - if ( isset($user->ID) ) - $this->author_ids[$in_author_name] = $user->ID; - } - elseif ( $this->allow_create_users() ) { - // nothing was selected in the dropdown list, so we'll use the name in the text field - - $new_author_name = trim($_POST['user_create'][$i]); - // if the user didn't enter a name, assume they want to use the same name as in the import file - if ( empty($new_author_name) ) - $new_author_name = $in_author_name; - - $user_id = username_exists($new_author_name); - if ( !$user_id ) { - $user_id = wp_create_user($new_author_name, wp_generate_password()); - } - - if ( !is_wp_error( $user_id ) ) { - $this->author_ids[$in_author_name] = $user_id; - } - } - - // failsafe: if the user_id was invalid, default to the current user - if ( empty($this->author_ids[$in_author_name]) ) { - $this->author_ids[$in_author_name] = intval($current_user->ID); - } + $this->id = (int) $file['id']; + $import_data = $this->parse( $file['file'] ); + if ( is_wp_error( $import_data ) ) { + echo '

' . __( 'Sorry, there has been an error.', 'wordpress-importer' ) . '

'; + echo '

' . esc_html( $import_data->get_error_message() ) . '

'; + return false; } + $this->authors = $import_data['authors']; + return true; } - function wp_authors_form() { + function import_options() { + $j = 0; ?> -

-

admins entries.', 'wordpress-importer'); ?>

-allow_create_users() ) { - echo '

'.__('If a new user is created by WordPress, a password will be randomly generated. Manually change the user’s details if necessary.', 'wordpress-importer')."

\n"; - } +
+ + +authors ) ) : ?> +

+

admins entries.', 'wordpress-importer' ); ?>

+allow_create_users() ) : ?> +

+ +
    +authors as $author ) : ?> +
  1. author_select( $j++, $author ); ?>
  2. + +
+ - $authors = $this->get_wp_authors(); - echo ''; - wp_nonce_field('import-wordpress'); -?> -
    -'.__('Import author:', 'wordpress-importer').' '.$author.'
    '; - $this->users_form($j, $author); - echo ''; - } +allow_fetch_attachments() ) : ?> +

    +

    + + +

    + - if ( $this->allow_fetch_attachments() ) { -?> -
-

-

- - -

- +

+
'; - echo ''.'
'; - echo '

'; - echo ''; + function author_select( $n, $author ) { + if ( $this->allow_create_users() ) + printf( __( 'Import author %1$s or map to existing user', 'wordpress-importer' ), '' . esc_html( $author['author_display_name'] ) . '' ); + else + printf( __( 'Map author %1$s to existing user', 'wordpress-importer' ), '' . esc_html( $author['author_display_name'] ) . '' ); + $users = get_users_of_blog(); ?> + + +allow_create_users() ) { - printf('
'); - } - else { - echo __('Map to existing', 'wordpress-importer').'
'; - } + foreach ( (array) $_POST['imported_authors'] as $i => $login ) { + $login = sanitize_user( $login, true ); - // keep track of $n => $author name - echo ''; + if ( ! empty( $_POST['user_map'][$i] ) ) { + $user = get_userdata( intval($_POST['user_map'][$i]) ); + if ( isset( $user->ID ) ) + $this->processed_authors[$login] = $user->ID; + } else if ( $this->allow_create_users() ) { + $user_id = username_exists( $login ); + if ( ! $user_id ) { + $user_data = array( + 'user_login' => $login, + 'user_pass' => wp_generate_password(), + 'user_email' => $this->authors[$login]['author_email'], + 'display_name' => $this->authors[$login]['author_display_name'], + 'first_name' => $this->authors[$login]['author_first_name'], + 'last_name' => $this->authors[$login]['author_last_name'], + ); + $user_id = wp_insert_user( $user_data ); + } - $users = get_users_of_blog(); -?> - processed_authors[$login] = $user_id; + } - function select_authors() { - $is_wxr_file = $this->get_entries(array(&$this, 'process_author')); - if ( $is_wxr_file ) { - $this->wp_authors_form(); + // failsafe: if the user_id was invalid, default to the current user + if ( empty( $this->processed_authors[$login] ) ) + $this->processed_authors[$login] = (int) get_current_user_id(); } - else { - echo '

'.__('Invalid file', 'wordpress-importer').'

'; - echo '

'.__('Please upload a valid WXR (WordPress eXtended RSS) export file.', 'wordpress-importer').'

'; - } } - // fetch the user ID for a given author name, respecting the mapping preferences - function checkauthor($author) { - global $current_user; - - if ( !empty($this->author_ids[$author]) ) - return $this->author_ids[$author]; - - // failsafe: map to the current user - return $current_user->ID; - } - - - function process_categories() { - global $wpdb; + if ( empty( $this->categories ) ) + return; - $cat_names = (array) get_terms('category', array('fields' => 'names')); - - while ( $c = array_shift($this->categories) ) { - $cat_name = trim($this->get_tag( $c, 'wp:cat_name' )); - - // If the category exists we leave it alone - if ( in_array($cat_name, $cat_names) ) + foreach ( $this->categories as $cat ) { + // if the category already exists leave it alone + $term_id = term_exists( $cat['category_nicename'], 'category' ); + if ( $term_id ) { + if ( is_array($term_id) ) $term_id = $term_id['term_id']; + $this->processed_terms[intval($cat['term_id'])] = (int) $term_id; continue; + } - $category_nicename = $this->get_tag( $c, 'wp:category_nicename' ); - $category_description = $this->get_tag( $c, 'wp:category_description' ); - $posts_private = (int) $this->get_tag( $c, 'wp:posts_private' ); - $links_private = (int) $this->get_tag( $c, 'wp:links_private' ); + $category_parent = empty( $cat['category_parent'] ) ? 0 : category_exists( $cat['category_parent'] ); + $category_description = isset( $cat['category_description'] ) ? $cat['category_description'] : ''; + $catarr = array( + 'category_nicename' => $cat['category_nicename'], + 'category_parent' => $category_parent, + 'cat_name' => $cat['cat_name'], + 'category_description' => $category_description + ); - $parent = $this->get_tag( $c, 'wp:category_parent' ); - - if ( empty($parent) ) - $category_parent = '0'; - else - $category_parent = category_exists($parent); - - $catarr = compact('category_nicename', 'category_parent', 'posts_private', 'links_private', 'posts_private', 'cat_name', 'category_description'); - - print '' . sprintf( __( 'Importing category %s…' , 'wordpress-importer'), esc_html($cat_name) ) . '
' . "\n"; - $cat_ID = wp_insert_category($catarr); + $id = wp_insert_category( $catarr ); + if ( ! is_wp_error( $id ) ) { + $this->processed_terms[intval($cat['term_id'])] = $id; + } else { + echo 'Error importing category: ' . $id->get_error_message() . '
'; + continue; + } } } function process_tags() { - global $wpdb; + if ( empty( $this->tags ) ) + return; - $tag_names = (array) get_terms('post_tag', array('fields' => 'names')); - - while ( $c = array_shift($this->tags) ) { - $tag_name = trim($this->get_tag( $c, 'wp:tag_name' )); - - // If the category exists we leave it alone - if ( in_array($tag_name, $tag_names) ) + foreach ( $this->tags as $tag ) { + // if the tag already exists leave it alone + $term_id = term_exists( $tag['tag_slug'], 'post_tag' ); + if ( $term_id ) { + if ( is_array($term_id) ) $term_id = $term_id['term_id']; + $this->processed_terms[intval($tag['term_id'])] = (int) $term_id; continue; + } - $slug = $this->get_tag( $c, 'wp:tag_slug' ); - $description = $this->get_tag( $c, 'wp:tag_description' ); + $tag_desc = isset( $tag['tag_description'] ) ? $tag['tag_description'] : ''; + $tagarr = array( 'slug' => $tag['tag_slug'], 'description' => $tag_desc ); - $tagarr = compact('slug', 'description'); - - print '' . sprintf( __( 'Importing tag %s…' , 'wordpress-importer'), esc_html($tag_name) ) . '
' . "\n"; - $tag_ID = wp_insert_term($tag_name, 'post_tag', $tagarr); + $id = wp_insert_term( $tag['tag_name'], 'post_tag', $tagarr ); + if ( ! is_wp_error( $id ) ) { + $this->processed_terms[intval($tag['term_id'])] = $id['term_id']; + } else { + echo 'Error importing post tag: ' . $id->get_error_message() . '
'; + continue; + } } } function process_terms() { - global $wpdb, $wp_taxonomies; + if ( empty( $this->terms ) ) + return; - $custom_taxonomies = $wp_taxonomies; - // get rid of the standard taxonomies - unset( $custom_taxonomies['category'] ); - unset( $custom_taxonomies['post_tag'] ); - unset( $custom_taxonomies['link_category'] ); + foreach ( $this->terms as $term ) { + // if the term already exists in the correct taxonomy leave it alone + $term_id = term_exists( $term['slug'], $term['term_taxonomy'] ); + if ( $term_id ) { + if ( is_array($term_id) ) $term_id = $term_id['term_id']; + $this->processed_terms[intval($term['term_id'])] = (int) $term_id; + continue; + } - $custom_taxonomies = array_keys( $custom_taxonomies ); - $current_terms = (array) get_terms( $custom_taxonomies, array('get' => 'all') ); - $taxonomies = array(); - foreach ( $current_terms as $term ) { - if ( isset( $_terms[$term->taxonomy] ) ) { - $taxonomies[$term->taxonomy] = array_merge( $taxonomies[$term->taxonomy], array($term->name) ); + if ( empty( $term['term_parent'] ) ) { + $parent = 0; } else { - $taxonomies[$term->taxonomy] = array($term->name); + $parent = term_exists( $term['term_parent'], $term['term_taxonomy'] ); + if ( is_array( $parent ) ) $parent = $parent['term_id']; } - } + $description = isset( $term['term_description'] ) ? $term['term_description'] : ''; + $termarr = array( 'slug' => $term['slug'], 'description' => $description, 'parent' => intval($parent) ); - while ( $c = array_shift($this->terms) ) { - $term_name = trim($this->get_tag( $c, 'wp:term_name' )); - $term_taxonomy = trim($this->get_tag( $c, 'wp:term_taxonomy' )); - - // If the term exists in the taxonomy we leave it alone - if ( isset($taxonomies[$term_taxonomy] ) && in_array( $term_name, $taxonomies[$term_taxonomy] ) ) + $id = wp_insert_term( $term['term_name'], $term['term_taxonomy'], $termarr ); + if ( ! is_wp_error( $id ) ) { + $this->processed_terms[intval($term['term_id'])] = $id['term_id']; + } else { + echo 'Error importing term: ' . $id->get_error_message() . '
'; continue; - - $slug = $this->get_tag( $c, 'wp:term_slug' ); - $description = $this->get_tag( $c, 'wp:term_description' ); - - $termarr = compact('slug', 'description'); - - print '' . sprintf( __( 'Importing %s…' , 'wordpress-importer'), esc_html($term_name) ) . '
' . "\n"; - $term_ID = wp_insert_term($term_name, $this->get_tag( $c, 'wp:term_taxonomy' ), $termarr); + } } } - function process_author($post) { - $author = $this->get_tag( $post, 'dc:creator' ); - if ($author) - $this->allauthornames[] = $author; - } - + // extract and compact (?) function process_posts() { - echo '
    '; + foreach ( $this->posts as $post ) { + if ( isset( $this->processed_posts[$post['post_id']] ) ) + continue; - $this->get_entries(array(&$this, 'process_post')); + if ( 'nav_menu_item' == $post['post_type'] ) { + $this->process_menu_item( $post ); + continue; + } - echo '
'; + $post_exists = post_exists( $post['post_title'], '', $post['post_date'] ); + if ( $post_exists ) { + $comment_post_ID = $post_id = $post_exists; + } else { + $post_parent = (int) $post['post_parent']; + if ( $post_parent ) { + // if we already know the parent, map it to the new local ID + if ( isset( $this->processed_posts[$post_parent] ) ) { + $post_parent = $this->processed_posts[$post_parent]; + // otherwise record the parent for later + } else { + $this->post_orphans[intval($post['post_id'])] = $post_parent; + $post_parent = 0; + } + } - wp_import_cleanup($this->id); - do_action('import_done', 'wordpress'); + // map the post author + if ( isset( $this->processed_authors[$post['post_author']] ) ) + $author = $this->processed_authors[$post['post_author']]; + else + $author = (int) get_current_user_id(); - echo '

'.sprintf(__('All done.', 'wordpress-importer').' '.__('Have fun!', 'wordpress-importer').'', get_option('home')).'

'; - } + $postdata = array( + 'import_id' => $post['post_id'], 'post_author' => $author, 'post_date' => $post['post_date'], + 'post_date_gmt' => $post['post_date_gmt'], 'post_content' => $post['post_content'], + 'post_excerpt' => $post['post_excerpt'], 'post_title' => $post['post_title'], + 'post_status' => $post['status'], 'post_name' => $post['post_name'], + 'comment_status' => $post['comment_status'], 'ping_status' => $post['ping_status'], + 'guid' => $post['guid'], 'post_parent' => $post_parent, 'menu_order' => $post['menu_order'], + 'post_type' => $post['post_type'], 'post_password' => $post['post_password'] + ); + + if ( 'attachment' == $postdata['post_type'] ) { + $remote_url = ! empty($post['attachment_url']) ? $post['attachment_url'] : $post['guid']; + $comment_post_ID = $post_id = $this->process_attachment( $postdata, $remote_url ); + } else { + $comment_post_ID = $post_id = wp_insert_post( $postdata, true ); + } - function _normalize_tag( $matches ) { - return '<' . strtolower( $matches[1] ); - } - - function process_post($post) { - global $wpdb; - - $post_ID = (int) $this->get_tag( $post, 'wp:post_id' ); - if ( $post_ID && !empty($this->post_ids_processed[$post_ID]) ) // Processed already - return 0; - - set_time_limit( 60 ); - - // There are only ever one of these - $post_title = $this->get_tag( $post, 'title' ); - $post_date = $this->get_tag( $post, 'wp:post_date' ); - $post_date_gmt = $this->get_tag( $post, 'wp:post_date_gmt' ); - $comment_status = $this->get_tag( $post, 'wp:comment_status' ); - $ping_status = $this->get_tag( $post, 'wp:ping_status' ); - $post_status = $this->get_tag( $post, 'wp:status' ); - $post_name = $this->get_tag( $post, 'wp:post_name' ); - $post_parent = $this->get_tag( $post, 'wp:post_parent' ); - $menu_order = $this->get_tag( $post, 'wp:menu_order' ); - $post_type = $this->get_tag( $post, 'wp:post_type' ); - $post_password = $this->get_tag( $post, 'wp:post_password' ); - $is_sticky = $this->get_tag( $post, 'wp:is_sticky' ); - $guid = $this->get_tag( $post, 'guid' ); - $post_author = $this->get_tag( $post, 'dc:creator' ); - - $post_excerpt = $this->get_tag( $post, 'excerpt:encoded' ); - $post_excerpt = preg_replace_callback('|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_excerpt); - $post_excerpt = str_replace('
', '
', $post_excerpt); - $post_excerpt = str_replace('
', '
', $post_excerpt); - - $post_content = $this->get_tag( $post, 'content:encoded' ); - $post_content = preg_replace_callback('|<(/?[A-Z]+)|', array( &$this, '_normalize_tag' ), $post_content); - $post_content = str_replace('
', '
', $post_content); - $post_content = str_replace('
', '
', $post_content); - - preg_match_all('|(.*?)|is', $post, $tags); - $tags = $tags[1]; - - $tag_index = 0; - foreach ($tags as $tag) { - $tags[$tag_index] = $wpdb->escape( html_entity_decode( str_replace(array( '' ), '', $tag ) ) ); - $tag_index++; - } - - preg_match_all('|(.*?)|is', $post, $categories); - $categories = $categories[1]; - - $cat_index = 0; - foreach ($categories as $category) { - $categories[$cat_index] = $wpdb->escape( html_entity_decode( str_replace( array( '' ), '', $category ) ) ); - $cat_index++; - } - - $post_exists = post_exists($post_title, '', $post_date); - - if ( $post_exists ) { - echo '
  • '; - printf(__('Post %s already exists.', 'wordpress-importer'), stripslashes($post_title)); - $comment_post_ID = $post_id = $post_exists; - } else { - - // If it has parent, process parent first. - $post_parent = (int) $post_parent; - if ($post_parent) { - // if we already know the parent, map it to the local ID - if ( isset( $this->post_ids_processed[$post_parent] ) ) { - $post_parent = $this->post_ids_processed[$post_parent]; // new ID of the parent + if ( is_wp_error( $post_id ) ) { + echo 'Error importing post object: ' . $post_id->get_error_message() . '
    '; + continue; } - else { - // record the parent for later - $this->orphans[intval($post_ID)] = $post_parent; - } - } - echo '
  • '; - - $post_author = $this->checkauthor($post_author); //just so that if a post already exists, new users are not created by checkauthor - - $postdata = compact('post_author', 'post_date', 'post_date_gmt', 'post_content', 'post_excerpt', 'post_title', 'post_status', 'post_name', 'comment_status', 'ping_status', 'guid', 'post_parent', 'menu_order', 'post_type', 'post_password'); - $postdata['import_id'] = $post_ID; - if ($post_type == 'attachment') { - $remote_url = $this->get_tag( $post, 'wp:attachment_url' ); - if ( !$remote_url ) - $remote_url = $guid; - - $comment_post_ID = $post_id = $this->process_attachment($postdata, $remote_url); - if ( !$post_id or is_wp_error($post_id) ) - return $post_id; - } - else { - printf(__('Importing post %s...', 'wordpress-importer') . "\n", stripslashes($post_title)); - $comment_post_ID = $post_id = wp_insert_post($postdata); - if ( $post_id && $is_sticky == 1 ) + if ( $post['is_sticky'] == 1 ) stick_post( $post_id ); - } - if ( is_wp_error( $post_id ) ) - return $post_id; + // map pre-import ID to local ID + $this->processed_posts[intval($post['post_id'])] = (int) $post_id; - // Memorize old and new ID. - if ( $post_id && $post_ID ) { - $this->post_ids_processed[intval($post_ID)] = intval($post_id); - } - - // Add categories. - if (count($categories) > 0) { - $post_cats = array(); - foreach ($categories as $category) { - if ( '' == $category ) - continue; - $slug = sanitize_term_field('slug', $category, 0, 'category', 'db'); - $cat = get_term_by('slug', $slug, 'category'); - $cat_ID = 0; - if ( ! empty($cat) ) - $cat_ID = $cat->term_id; - if ($cat_ID == 0) { - $category = $wpdb->escape($category); - $cat_ID = wp_insert_category(array('cat_name' => $category)); - if ( is_wp_error($cat_ID) ) + // add categories, tags and other terms + if ( ! empty( $post['terms'] ) ) { + foreach ( $post['terms'] as $term ) { + $term_exists = term_exists( $term['slug'], $term['domain'] ); + $term_id = is_array( $term_exists ) ? $term_exists['term_id'] : $term_exists; + if ( ! $term_id ) { + $t = wp_insert_term( $term['name'], $term['domain'], array( 'slug' => $term['slug'] ) ); + if ( ! is_wp_error( $t ) ) { + $term_id = $t['term_id']; + } else { + echo $term['name'] . ' :: ' . $t->get_error_message(); continue; + } } - $post_cats[] = $cat_ID; + $terms_to_set[$term['domain']][] = intval( $term_id ); } - wp_set_post_categories($post_id, $post_cats); + + foreach ( $terms_to_set as $tax => $ids ) { + $tt_ids = wp_set_post_terms( $post_id, $ids, $tax ); + } + unset( $post['terms'], $terms_to_set ); } - // Add tags. - if (count($tags) > 0) { - $post_tags = array(); - foreach ($tags as $tag) { - if ( '' == $tag ) - continue; - $slug = sanitize_term_field('slug', $tag, 0, 'post_tag', 'db'); - $tag_obj = get_term_by('slug', $slug, 'post_tag'); - $tag_id = 0; - if ( ! empty($tag_obj) ) - $tag_id = $tag_obj->term_id; - if ( $tag_id == 0 ) { - $tag = $wpdb->escape($tag); - $tag_id = wp_insert_term($tag, 'post_tag'); - if ( is_wp_error($tag_id) ) - continue; - $tag_id = $tag_id['term_id']; + // add/update comments + if ( ! empty( $post['comments'] ) ) { + $num_comments = 0; + $inserted_comments = array(); + foreach ( $post['comments'] as $comment ) { + $comment_id = $comment['comment_id']; + $newcomments[$comment_id]['comment_post_ID'] = $comment_post_ID; + $newcomments[$comment_id]['comment_author'] = $comment['comment_author']; + $newcomments[$comment_id]['comment_author_email'] = $comment['comment_author_email']; + $newcomments[$comment_id]['comment_author_IP'] = $comment['comment_author_IP']; + $newcomments[$comment_id]['comment_author_url'] = $comment['comment_author_url']; + $newcomments[$comment_id]['comment_date'] = $comment['comment_date']; + $newcomments[$comment_id]['comment_date_gmt'] = $comment['comment_date_gmt']; + $newcomments[$comment_id]['comment_content'] = $comment['comment_content']; + $newcomments[$comment_id]['comment_approved'] = $comment['comment_approved']; + $newcomments[$comment_id]['comment_type'] = ! empty( $comment['comment_type'] ) ? $comment['comment_type'] : 'comment'; + $newcomments[$comment_id]['comment_parent'] = $comment['comment_parent']; + } + ksort( $newcomments ); + + foreach ( $newcomments as $key => $comment ) { + // if this is a new post we can skip the comment_exists() check + if ( ! $post_exists || ! comment_exists( $comment['comment_author'], $comment['comment_date'] ) ) { + if ( isset( $inserted_comments[$comment['comment_parent']] ) ) + $comment['comment_parent'] = $inserted_comments[$comment['comment_parent']]; + $comment = wp_filter_comment( $comment ); + $inserted_comments[$key] = wp_insert_comment( $comment ); + $num_comments++; } - $post_tags[] = intval($tag_id); } - wp_set_post_tags($post_id, $post_tags); + unset( $newcomments, $inserted_comments, $post['comments'] ); } - } - // Now for comments - preg_match_all('|(.*?)|is', $post, $comments); - $comments = $comments[1]; - $num_comments = 0; - $inserted_comments = array(); - if ( $comments) { - foreach ($comments as $comment) { - $comment_id = $this->get_tag( $comment, 'wp:comment_id'); - $newcomments[$comment_id]['comment_post_ID'] = $comment_post_ID; - $newcomments[$comment_id]['comment_author'] = $this->get_tag( $comment, 'wp:comment_author'); - $newcomments[$comment_id]['comment_author_email'] = $this->get_tag( $comment, 'wp:comment_author_email'); - $newcomments[$comment_id]['comment_author_IP'] = $this->get_tag( $comment, 'wp:comment_author_IP'); - $newcomments[$comment_id]['comment_author_url'] = $this->get_tag( $comment, 'wp:comment_author_url'); - $newcomments[$comment_id]['comment_date'] = $this->get_tag( $comment, 'wp:comment_date'); - $newcomments[$comment_id]['comment_date_gmt'] = $this->get_tag( $comment, 'wp:comment_date_gmt'); - $newcomments[$comment_id]['comment_content'] = $this->get_tag( $comment, 'wp:comment_content'); - $newcomments[$comment_id]['comment_approved'] = $this->get_tag( $comment, 'wp:comment_approved'); - $newcomments[$comment_id]['comment_type'] = $this->get_tag( $comment, 'wp:comment_type'); - $newcomments[$comment_id]['comment_parent'] = $this->get_tag( $comment, 'wp:comment_parent'); - } - // Sort by comment ID, to make sure comment parents exist (if there at all) - ksort($newcomments); - foreach ($newcomments as $key => $comment) { - // if this is a new post we can skip the comment_exists() check - if ( !$post_exists || !comment_exists($comment['comment_author'], $comment['comment_date']) ) { - if (isset($inserted_comments[$comment['comment_parent']])) - $comment['comment_parent'] = $inserted_comments[$comment['comment_parent']]; - $comment = wp_filter_comment($comment); - $inserted_comments[$key] = wp_insert_comment($comment); - $num_comments++; + // add/update post meta + if ( isset( $post['postmeta'] ) ) { + foreach ( $post['postmeta'] as $meta ) { + $key = apply_filters( 'import_post_meta_key', $meta['key'] ); + if ( $key ) { + update_post_meta( $post_id, $key, $meta['value'] ); + do_action( 'import_post_meta', $post_id, $key, $meta['value'] ); + } } } } - - if ( $num_comments ) - printf(' '._n('(%s comment)', '(%s comments)', $num_comments, 'wordpress-importer'), $num_comments); - - // Now for post meta - preg_match_all('|(.*?)|is', $post, $postmeta); - $postmeta = $postmeta[1]; - if ( $postmeta) { foreach ($postmeta as $p) { - $key = $this->get_tag( $p, 'wp:meta_key' ); - $value = $this->get_tag( $p, 'wp:meta_value' ); - - $this->process_post_meta($post_id, $key, $value); - - } } - - do_action('import_post_added', $post_id); - print "
  • \n"; } - function process_post_meta($post_id, $key, $value) { - // the filter can return false to skip a particular metadata key - $_key = apply_filters('import_post_meta_key', $key); - if ( $_key ) { - add_post_meta( $post_id, $_key, $value ); - do_action('import_post_meta', $post_id, $_key, $value); - } - } - - function process_attachment($postdata, $remote_url) { - if ($this->fetch_attachments and $remote_url) { - printf( __('Importing attachment %s... ', 'wordpress-importer'), htmlspecialchars($remote_url) ); - - // If the URL is absolute, but does not contain http, upload it assuming the base_site_url variable - if ( preg_match('/^\/[\w\W]+$/', $remote_url) ) - $remote_url = rtrim($this->base_url,'/').$remote_url; - - $upload = $this->fetch_remote_file($postdata, $remote_url); - if ( is_wp_error($upload) ) { - printf( __('Remote file error: %s', 'wordpress-importer'), htmlspecialchars($upload->get_error_message()) ); - return $upload; - } - else { - print '('.size_format(filesize($upload['file'])).')'; - } - - if ( 0 == filesize( $upload['file'] ) ) { - print __( "Zero length file, deleting" , 'wordpress-importer') . "\n"; - unlink( $upload['file'] ); + function process_menu_item( $item ) { + if ( isset( $item['terms'][0]['slug'] ) ) { + $menu_id = term_exists( $item['terms'][0]['slug'], 'nav_menu' ); + if ( ! $menu_id ) { + echo 'Menu item skipped due to invalid menu slug'; return; + } else { + $menu_id = is_array( $menu_id ) ? $menu_id['term_id'] : $menu_id; } + } else { + echo 'Menu item skipped due to missing menu slug'; + return; + } - if ( $info = wp_check_filetype($upload['file']) ) { - $postdata['post_mime_type'] = $info['type']; - } - else { - print __('Invalid file type', 'wordpress-importer'); - return; - } + foreach ( $item['postmeta'] as $meta ) + $$meta['key'] = $meta['value']; - $postdata['guid'] = $upload['url']; + if ( 'taxonomy' == $_menu_item_type && isset( $this->processed_terms[intval($_menu_item_object_id)] ) ) { + $_menu_item_object_id = $this->processed_terms[intval($_menu_item_object_id)]; + } else if ( 'post_type' == $_menu_item_type && isset( $this->processed_posts[intval($_menu_item_object_id)] ) ) { + $_menu_item_object_id = $this->processed_posts[intval($_menu_item_object_id)]; + } else if ( 'custom' != $_menu_item_type ) { + // associated object is missing or not imported yet, we'll retry later + $this->missing_menu_items[] = $item; + return; + } - // as per wp-admin/includes/upload.php - $post_id = wp_insert_attachment($postdata, $upload['file']); - wp_update_attachment_metadata( $post_id, wp_generate_attachment_metadata( $post_id, $upload['file'] ) ); + if ( isset( $this->processed_menu_items[intval($_menu_item_menu_item_parent)] ) ) { + $_menu_item_menu_item_parent = $this->processed_menu_items[intval($_menu_item_menu_item_parent)]; + } else if ( $_menu_item_menu_item_parent ) { + $this->menu_item_orphans[intval($item['post_id'])] = (int) $_menu_item_menu_item_parent; + $_menu_item_menu_item_parent = 0; + } - // remap the thumbnail url. this isn't perfect because we're just guessing the original url. - if ( preg_match('@^image/@', $info['type']) && $thumb_url = wp_get_attachment_thumb_url($post_id) ) { - $parts = pathinfo($remote_url); - $ext = $parts['extension']; - $name = basename($parts['basename'], ".{$ext}"); - $this->url_remap[$parts['dirname'] . '/' . $name . '.thumbnail.' . $ext] = $thumb_url; - } + $args = array( + 'menu-item-object-id' => $_menu_item_object_id, + 'menu-item-object' => $_menu_item_object, + 'menu-item-parent-id' => $_menu_item_menu_item_parent, + 'menu-item-position' => intval( $item['menu_order'] ), + 'menu-item-type' => $_menu_item_type, + 'menu-item-title' => $item['post_title'], + 'menu-item-url' => $_menu_item_url, + 'menu-item-description' => $item['post_content'], + 'menu-item-attr-title' => $item['post_excerpt'], + 'menu-item-target' => $_menu_item_target, + 'menu-item-classes' => $_menu_item_classes, + 'menu-item-xfn' => $_menu_item_xfn, + 'menu-item-status' => $item['status'] + ); - return $post_id; - } - else { - printf( __('Skipping attachment %s', 'wordpress-importer'), htmlspecialchars($remote_url) ); - } + $id = wp_update_nav_menu_item( $menu_id, 0, $args ); + if ( $id && ! is_wp_error( $id ) ) + $this->processed_menu_items[intval($item['post_id'])] = (int) $id; } + + function process_attachment( $post, $url ) { + if ( ! ( $this->fetch_attachments && $url ) ) + return new WP_Error( 'attachment_processing_error', + __( 'Fetching attachments is not allowed or an empty URL was provided', 'wordpress-importer' ) ); + + // if the URL is absolute, but does not contain address, then upload it assuming base_site_url + if ( preg_match( '|^/[\w\W]+$|', $url ) ) + $url = rtrim( $this->base_url, '/' ) . $url; + + $upload = $this->fetch_remote_file( $url, $post ); + if ( is_wp_error( $upload ) ) + return $upload; + + if ( $info = wp_check_filetype( $upload['file'] ) ) + $post['post_mime_type'] = $info['type']; + else + return new WP_Error( 'attachment_processing_error', __('Invalid file type', 'wordpress-importer') ); + + $post['guid'] = $upload['url']; - function fetch_remote_file( $post, $url ) { - add_filter( 'http_request_timeout', array( &$this, 'bump_request_timeout' ) ); + // as per wp-admin/includes/upload.php + $post_id = wp_insert_attachment( $post, $upload['file'] ); + wp_update_attachment_metadata( $post_id, wp_generate_attachment_metadata( $post_id, $upload['file'] ) ); - $upload = wp_upload_dir($post['post_date']); + // remap the thumbnail url. this isn't perfect because we're just guessing the original url. + if ( preg_match( '@^image/@', $info['type'] ) && $thumb_url = wp_get_attachment_thumb_url( $post_id ) ) { + $parts = pathinfo( $url ); + $ext = $parts['extension']; + $name = basename($parts['basename'], ".{$ext}"); + $this->url_remap[$parts['dirname'] . '/' . $name . '.thumbnail.' . $ext] = $thumb_url; + } + return $post_id; + } + + function fetch_remote_file( $url, $post ) { + add_filter( 'http_request_timeout', array( &$this, 'bump_request_timeout' ) ); + // extract the file name and extension from the url - $file_name = basename($url); - - // get placeholder file in the upload dir with a unique sanitized filename - $upload = wp_upload_bits( $file_name, 0, '', $post['post_date']); - if ( $upload['error'] ) { - echo $upload['error']; + $file_name = basename( $url ); + + // get placeholder file in the upload dir with a unique, sanitized filename + $upload = wp_upload_bits( $file_name, 0, '', $post['post_date'] ); + if ( $upload['error'] ) return new WP_Error( 'upload_dir_error', $upload['error'] ); - } // fetch the remote url and write it to the placeholder file - $headers = wp_get_http($url, $upload['file']); + $headers = wp_get_http( $url, $upload['file'] ); - //Request failed + // request failed if ( ! $headers ) { - @unlink($upload['file']); + @unlink( $upload['file'] ); return new WP_Error( 'import_file_error', __('Remote server did not respond', 'wordpress-importer') ); } // make sure the fetch was successful if ( $headers['response'] != '200' ) { - @unlink($upload['file']); - return new WP_Error( 'import_file_error', sprintf(__('Remote file returned error response %1$d %2$s', 'wordpress-importer'), $headers['response'], get_status_header_desc($headers['response']) ) ); + @unlink( $upload['file'] ); + return new WP_Error( 'import_file_error', sprintf( __('Remote server returned error response %1$d %2$s', 'wordpress-importer'), $headers['response'], get_status_header_desc($headers['response']) ) ); } - elseif ( isset($headers['content-length']) && filesize($upload['file']) != $headers['content-length'] ) { - @unlink($upload['file']); + + $filesize = filesize( $upload['file'] ); + + if ( isset( $headers['content-length'] ) && $filesize != $headers['content-length'] ) { + @unlink( $upload['file'] ); return new WP_Error( 'import_file_error', __('Remote file is incorrect size', 'wordpress-importer') ); } + + if ( 0 == $filesize ) { + @unlink( $upload['file'] ); + return new WP_Error( 'import_file_error', __('Zero size file downloaded', 'wordpress-importer') ); + } - $max_size = $this->max_attachment_size(); - if ( !empty($max_size) and filesize($upload['file']) > $max_size ) { - @unlink($upload['file']); + $max_size = (int) $this->max_attachment_size(); + if ( ! empty( $max_size ) && $filesize > $max_size ) { + @unlink( $upload['file'] ); return new WP_Error( 'import_file_error', sprintf(__('Remote file is too large, limit is %s', size_format($max_size), 'wordpress-importer')) ); } @@ -736,170 +606,109 @@ $this->url_remap[$url] = $upload['url']; $this->url_remap[$post['guid']] = $upload['url']; // if the remote url is redirected somewhere else, keep track of the destination too - if ( $headers['x-final-location'] != $url ) + if ( isset($headers['x-final-location']) && $headers['x-final-location'] != $url ) $this->url_remap[$headers['x-final-location']] = $upload['url']; - return $upload; - + return $upload; } - /** - * Bump up the request timeout for http requests - * - * @param int $val - * @return int - */ - function bump_request_timeout( $val ) { - return 60; - } + function backfill_parents() { + global $wpdb; - // sort by strlen, longest string first - function cmpr_strlen($a, $b) { - return strlen($b) - strlen($a); + foreach ( $this->post_orphans as $child_id => $parent_id ) { + $local_child_id = $local_parent_id = false; + if ( isset( $this->processed_posts[$child_id] ) ) + $local_child_id = $this->processed_posts[$child_id]; + if ( isset( $this->processed_posts[$parent_id] ) ) + $local_parent_id = $this->processed_posts[$parent_id]; + + if ( $local_child_id && $local_parent_id ) + $wpdb->update( $wpdb->posts, array( 'post_parent' => $local_parent_id ), array( 'ID' => $local_child_id ), '%d', '%d' ); + } + + // all other posts/terms are imported, retry menu items with missing associated object + $missing_menu_items = $this->missing_menu_items; + foreach ( $missing_menu_items as $item ) + $this->process_menu_item( $item ); + + foreach ( $this->menu_item_orphans as $child_id => $parent_id ) { + $local_child_id = $local_parent_id = 0; + if ( isset( $this->processed_menu_items[$child_id] ) ) + $local_child_id = $this->processed_menu_items[$child_id]; + if ( isset( $this->processed_menu_items[$parent_id] ) ) + $local_parent_id = $this->processed_menu_items[$parent_id]; + + if ( $local_child_id && $local_parent_id ) + update_post_meta( $local_child_id, '_menu_item_menu_item_parent', (int) $local_parent_id ); + } } - // update url references in post bodies to point to the new local files function backfill_attachment_urls() { - + global $wpdb; + // make sure we do the longest urls first, in case one is a substring of another - uksort($this->url_remap, array(&$this, 'cmpr_strlen')); + uksort( $this->url_remap, array(&$this, 'cmpr_strlen') ); - global $wpdb; - foreach ($this->url_remap as $from_url => $to_url) { + foreach ( $this->url_remap as $from_url => $to_url ) { // remap urls in post_content - $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->posts} SET post_content = REPLACE(post_content, '%s', '%s')", $from_url, $to_url) ); + $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->posts} SET post_content = REPLACE(post_content, %s, %s)", $from_url, $to_url) ); // remap enclosure urls - $result = $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->postmeta} SET meta_value = REPLACE(meta_value, '%s', '%s') WHERE meta_key='enclosure'", $from_url, $to_url) ); + $result = $wpdb->query( $wpdb->prepare("UPDATE {$wpdb->postmeta} SET meta_value = REPLACE(meta_value, %s, %s) WHERE meta_key='enclosure'", $from_url, $to_url) ); } + } + + function parse( $file ) { + $parser = new WXR_Parser(); + return $parser->parse( $file ); } - // update the post_parent of orphans now that we know the local id's of all parents - function backfill_parents() { - global $wpdb; + function header() { + echo '
    '; + screen_icon(); + echo '

    ' . __( 'Import WordPress', 'wordpress-importer' ) . '

    '; + } - foreach ($this->orphans as $child_id => $parent_id) { - $local_child_id = $local_parent_id = false; - if ( isset( $this->post_ids_processed[$child_id] ) ) - $local_child_id = $this->post_ids_processed[$child_id]; - if ( isset( $this->post_ids_processed[$parent_id] ) ) - $local_parent_id = $this->post_ids_processed[$parent_id]; + function footer() { + echo '
    '; + } - if ($local_child_id and $local_parent_id) { - $wpdb->update($wpdb->posts, array('post_parent' => $local_parent_id), array('ID' => $local_child_id) ); - } - } + function greet() { + echo '
    '; + echo '

    '.__( 'Howdy! Upload your WordPress eXtended RSS (WXR) file and we’ll import the posts, pages, comments, custom fields, categories, and tags into this site.', 'wordpress-importer' ).'

    '; + echo '

    '.__( 'Choose a WXR file to upload, then click Upload file and import.', 'wordpress-importer' ).'

    '; + wp_import_upload_form( 'admin.php?import=wordpress&step=1' ); + echo '
    '; } - function is_valid_meta_key($key) { + function is_valid_meta_key( $key ) { // skip attachment metadata since we'll regenerate it from scratch - if ( $key == '_wp_attached_file' || $key == '_wp_attachment_metadata' ) + // skip _edit_lock and _edit_last + if ( in_array( $key, array( '_wp_attached_file', '_wp_attachment_metadata', '_edit_lock', '_edit_last' ) ) ) return false; return $key; } // give the user the option of creating new users to represent authors in the import file? function allow_create_users() { - return apply_filters('import_allow_create_users', true); + return apply_filters( 'import_allow_create_users', true ); } // give the user the option of downloading and importing attached files function allow_fetch_attachments() { - return apply_filters('import_allow_fetch_attachments', true); + return apply_filters( 'import_allow_fetch_attachments', true ); } - + + function bump_request_timeout() { + return 60; + } + function max_attachment_size() { - // can be overridden with a filter - 0 means no limit - return apply_filters('import_attachment_size_limit', 0); + return apply_filters( 'import_attachment_size_limit', 0 ); } - - function import_start() { - wp_defer_term_counting(true); - wp_defer_comment_counting(true); - do_action('import_start'); - } - - function import_end() { - do_action('import_end'); - - // clear the caches after backfilling - foreach ($this->post_ids_processed as $post_id) - clean_post_cache($post_id); - - wp_defer_term_counting(false); - wp_defer_comment_counting(false); - } - - function import($id, $fetch_attachments = false) { - $this->id = (int) $id; - $this->fetch_attachments = ($this->allow_fetch_attachments() && (bool) $fetch_attachments); - - add_filter('import_post_meta_key', array($this, 'is_valid_meta_key')); - $file = get_attached_file($this->id); - $this->import_file($file); - } - - function import_file($file) { - $this->file = $file; - - $this->import_start(); - $this->get_authors_from_post(); - wp_suspend_cache_invalidation(true); - $this->get_entries(); - $this->process_categories(); - $this->process_tags(); - $this->process_terms(); - $result = $this->process_posts(); - wp_suspend_cache_invalidation(false); - $this->backfill_parents(); - $this->backfill_attachment_urls(); - $this->import_end(); - - if ( is_wp_error( $result ) ) - return $result; - } - - function handle_upload() { - $file = wp_import_handle_upload(); - if ( isset($file['error']) ) { - echo '

    '.__('Sorry, there has been an error.', 'wordpress-importer').'

    '; - echo '

    ' . $file['error'] . '

    '; - return false; - } - $this->file = $file['file']; - $this->id = (int) $file['id']; - return true; - } - - function dispatch() { - if (empty ($_GET['step'])) - $step = 0; - else - $step = (int) $_GET['step']; - - $this->header(); - switch ($step) { - case 0 : - $this->greet(); - break; - case 1 : - check_admin_referer('import-upload'); - if ( $this->handle_upload() ) - $this->select_authors(); - break; - case 2: - check_admin_referer('import-wordpress'); - $fetch_attachments = ! empty( $_POST['attachments'] ); - $result = $this->import( $_GET['id'], $fetch_attachments); - if ( is_wp_error( $result ) ) - echo $result->get_error_message(); - break; - } - $this->footer(); - } - - function WP_Import() { - // Nothing. - } + + function cmpr_strlen( $a, $b ) { + return strlen($b) - strlen($a); + } } /**