Software: Apache. PHP/5.5.15 uname -a: Windows NT SVR-DMZ 6.1 build 7600 (Windows Server 2008 R2 Enterprise Edition) i586 SYSTEM Safe-mode: OFF (not secure) C:\Extranet\C\Archivos de programa\Mozilla Firefox\components\ drwxrwxrwx |
Viewing file: FeedProcessor.js (64.66 KB) -rw-rw-rw- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is Robert Sayre. * Portions created by the Initial Developer are Copyright (C) 2006 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Ben Goodger <beng@google.com> * Myk Melez <myk@mozilla.org> * Michael Ventnor <m.ventnor@gmail.com> * Will Guaraldi <will.guaraldi@pculture.org> * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ function LOG(str) { dump("*** " + str + "\n"); } const Ci = Components.interfaces; const Cc = Components.classes; const Cr = Components.results; Components.utils.import("resource://gre/modules/XPCOMUtils.jsm"); Components.utils.import("resource://gre/modules/ISO8601DateUtils.jsm"); const FP_CONTRACTID = "@mozilla.org/feed-processor;1"; const FP_CLASSID = Components.ID("{26acb1f0-28fc-43bc-867a-a46aabc85dd4}"); const FP_CLASSNAME = "Feed Processor"; const FR_CONTRACTID = "@mozilla.org/feed-result;1"; const FR_CLASSID = Components.ID("{072a5c3d-30c6-4f07-b87f-9f63d51403f2}"); const FR_CLASSNAME = "Feed Result"; const FEED_CONTRACTID = "@mozilla.org/feed;1"; const FEED_CLASSID = Components.ID("{5d0cfa97-69dd-4e5e-ac84-f253162e8f9a}"); const FEED_CLASSNAME = "Feed"; const ENTRY_CONTRACTID = "@mozilla.org/feed-entry;1"; const ENTRY_CLASSID = Components.ID("{8e4444ff-8e99-4bdd-aa7f-fb3c1c77319f}"); const ENTRY_CLASSNAME = "Feed Entry"; const TEXTCONSTRUCT_CONTRACTID = "@mozilla.org/feed-textconstruct;1"; const TEXTCONSTRUCT_CLASSID = Components.ID("{b992ddcd-3899-4320-9909-924b3e72c922}"); const TEXTCONSTRUCT_CLASSNAME = "Feed Text Construct"; const GENERATOR_CONTRACTID = "@mozilla.org/feed-generator;1"; const GENERATOR_CLASSID = Components.ID("{414af362-9ad8-4296-898e-62247f25a20e}"); const GENERATOR_CLASSNAME = "Feed Generator"; const PERSON_CONTRACTID = "@mozilla.org/feed-person;1"; const PERSON_CLASSID = Components.ID("{95c963b7-20b2-11db-92f6-001422106990}"); const PERSON_CLASSNAME = "Feed Person"; const IO_CONTRACTID = "@mozilla.org/network/io-service;1" const BAG_CONTRACTID = "@mozilla.org/hash-property-bag;1" const ARRAY_CONTRACTID = "@mozilla.org/array;1"; const SAX_CONTRACTID = "@mozilla.org/saxparser/xmlreader;1"; const UNESCAPE_CONTRACTID = "@mozilla.org/feed-unescapehtml;1"; var gIoService = null; const XMLNS = "http://www.w3.org/XML/1998/namespace"; const RSS090NS = "http://my.netscape.com/rdf/simple/0.9/"; const WAIROLE_NS = "http://www.w3.org/2005/01/wai-rdf/GUIRoleTaxonomy#"; /***** Some general utils *****/ function strToURI(link, base) { var base = base || null; if (!gIoService) gIoService = Cc[IO_CONTRACTID].getService(Ci.nsIIOService); try { return gIoService.newURI(link, null, base); } catch(e) { return null; } } function isArray(a) { return isObject(a) && a.constructor == Array; } function isObject(a) { return (a && typeof a == "object") || isFunction(a); } function isFunction(a) { return typeof a == "function"; } function isIID(a, iid) { var rv = false; try { a.QueryInterface(iid); rv = true; } catch(e) { } return rv; } function isIArray(a) { return isIID(a, Ci.nsIArray); } function isIFeedContainer(a) { return isIID(a, Ci.nsIFeedContainer); } function stripTags(someHTML) { return someHTML.replace(/<[^>]+>/g,""); } /** * Searches through an array of links and returns a JS array * of matching property bags. */ const IANA_URI = "http://www.iana.org/assignments/relation/"; function findAtomLinks(rel, links) { var rvLinks = []; for (var i = 0; i < links.length; ++i) { var linkElement = links.queryElementAt(i, Ci.nsIPropertyBag2); // atom:link MUST have @href if (bagHasKey(linkElement, "href")) { var relAttribute = null; if (bagHasKey(linkElement, "rel")) relAttribute = linkElement.getPropertyAsAString("rel") if ((!relAttribute && rel == "alternate") || relAttribute == rel) { rvLinks.push(linkElement); continue; } // catch relations specified by IANA URI if (relAttribute == IANA_URI + rel) { rvLinks.push(linkElement); } } } return rvLinks; } function xmlEscape(s) { s = s.replace(/&/g, "&"); s = s.replace(/>/g, ">"); s = s.replace(/</g, "<"); s = s.replace(/"/g, """); s = s.replace(/'/g, "'"); return s; } function arrayContains(array, element) { for (var i = 0; i < array.length; ++i) { if (array[i] == element) { return true; } } return false; } // XXX add hasKey to nsIPropertyBag function bagHasKey(bag, key) { try { bag.getProperty(key); return true; } catch (e) { return false; } } function makePropGetter(key) { return function FeedPropGetter(bag) { try { return value = bag.getProperty(key); } catch(e) { } return null; } } function W3CToIETFDate(dateString) { var date = ISO8601DateUtils.parse(dateString); return date.toUTCString(); } const RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; // namespace map var gNamespaces = { "http://webns.net/mvcb/":"admin", "http://backend.userland.com/rss":"", "http://blogs.law.harvard.edu/tech/rss":"", "http://www.w3.org/2005/Atom":"atom", "http://purl.org/atom/ns#":"atom03", "http://purl.org/rss/1.0/modules/content/":"content", "http://purl.org/dc/elements/1.1/":"dc", "http://purl.org/dc/terms/":"dcterms", "http://www.w3.org/1999/02/22-rdf-syntax-ns#":"rdf", "http://purl.org/rss/1.0/":"rss1", "http://my.netscape.com/rdf/simple/0.9/":"rss1", "http://wellformedweb.org/CommentAPI/":"wfw", "http://purl.org/rss/1.0/modules/wiki/":"wiki", "http://www.w3.org/XML/1998/namespace":"xml", "http://search.yahoo.com/mrss/":"media", "http://search.yahoo.com/mrss":"media" } // We allow a very small set of namespaces in XHTML content, // for attributes only var gAllowedXHTMLNamespaces = { "http://www.w3.org/XML/1998/namespace":"xml", "http://www.w3.org/TR/xhtml2":"xhtml2", "http://www.w3.org/2005/07/aaa":"aaa", // if someone ns qualifies XHTML, we have to prefix it to avoid an // attribute collision. "http://www.w3.org/1999/xhtml":"xhtml" } function FeedResult() {} FeedResult.prototype = { bozo: false, doc: null, version: null, headers: null, uri: null, stylesheet: null, registerExtensionPrefix: function FR_registerExtensionPrefix(ns, prefix) { throw Cr.NS_ERROR_NOT_IMPLEMENTED; }, // XPCOM stuff classDescription: FR_CLASSNAME, classID: FR_CLASSID, contractID: FR_CONTRACTID, QueryInterface: XPCOMUtils.generateQI([Ci.nsIFeedResult]) } function Feed() { this.subtitle = null; this.title = null; this.items = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray); this.link = null; this.id = null; this.generator = null; this.authors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray); this.contributors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray); this.baseURI = null; this.enclosureCount = 0; this.type = Ci.nsIFeed.TYPE_FEED; } Feed.prototype = { searchLists: { title: ["title", "rss1:title", "atom03:title", "atom:title"], subtitle: ["description","dc:description","rss1:description", "atom03:tagline","atom:subtitle"], items: ["items","atom03_entries","entries"], id: ["atom:id","rdf:about"], generator: ["generator"], authors : ["authors"], contributors: ["contributors"], title: ["title","rss1:title", "atom03:title","atom:title"], link: [["link",strToURI],["rss1:link",strToURI]], categories: ["categories", "dc:subject"], rights: ["atom03:rights","atom:rights"], cloud: ["cloud"], image: ["image", "rss1:image", "atom:logo"], textInput: ["textInput", "rss1:textinput"], skipDays: ["skipDays"], skipHours: ["skipHours"], updated: ["pubDate", "lastBuildDate", "atom03:modified", "dc:date", "dcterms:modified", "atom:updated"] }, normalize: function Feed_normalize() { fieldsToObj(this, this.searchLists); if (this.skipDays) this.skipDays = this.skipDays.getProperty("days"); if (this.skipHours) this.skipHours = this.skipHours.getProperty("hours"); if (this.updated) this.updated = dateParse(this.updated); // Assign Atom link if needed if (bagHasKey(this.fields, "links")) this._atomLinksToURI(); this._calcEnclosureCountAndFeedType(); // Resolve relative image links if (this.image && bagHasKey(this.image, "url")) this._resolveImageLink(); this._resetBagMembersToRawText([this.searchLists.subtitle, this.searchLists.title]); }, _calcEnclosureCountAndFeedType: function Feed_calcEnclosureCountAndFeedType() { var entries_with_enclosures = 0; var audio_count = 0; var image_count = 0; var video_count = 0; var other_count = 0; for (var i = 0; i < this.items.length; ++i) { var entry = this.items.queryElementAt(i, Ci.nsIFeedEntry); entry.QueryInterface(Ci.nsIFeedContainer); if (entry.enclosures && entry.enclosures.length > 0) { ++entries_with_enclosures; for (var e = 0; e < entry.enclosures.length; ++e) { var enc = entry.enclosures.queryElementAt(e, Ci.nsIWritablePropertyBag2); if (enc.hasKey("type")) { var enctype = enc.get("type"); if (/^audio/.test(enctype)) { ++audio_count; } else if (/^image/.test(enctype)) { ++image_count; } else if (/^video/.test(enctype)) { ++video_count; } else { ++other_count; } } else { ++other_count; } } } } var feedtype = Ci.nsIFeed.TYPE_FEED; // For a feed to be marked as TYPE_VIDEO, TYPE_AUDIO and TYPE_IMAGE, // we enforce two things: // // 1. all entries must have at least one enclosure // 2. all enclosures must be video for TYPE_VIDEO, audio for TYPE_AUDIO or image // for TYPE_IMAGE // // Otherwise it's a TYPE_FEED. if (entries_with_enclosures == this.items.length && other_count == 0) { if (audio_count > 0 && !video_count && !image_count) { feedtype = Ci.nsIFeed.TYPE_AUDIO; } else if (image_count > 0 && !audio_count && !video_count) { feedtype = Ci.nsIFeed.TYPE_IMAGE; } else if (video_count > 0 && !audio_count && !image_count) { feedtype = Ci.nsIFeed.TYPE_VIDEO; } } this.type = feedtype; this.enclosureCount = other_count + video_count + audio_count + image_count; }, _atomLinksToURI: function Feed_linkToURI() { var links = this.fields.getPropertyAsInterface("links", Ci.nsIArray); var alternates = findAtomLinks("alternate", links); if (alternates.length > 0) { var href = alternates[0].getPropertyAsAString("href"); var base; if (bagHasKey(alternates[0], "xml:base")) base = alternates[0].getPropertyAsAString("xml:base"); this.link = this._resolveURI(href, base); } }, _resolveImageLink: function Feed_resolveImageLink() { var base; if (bagHasKey(this.image, "xml:base")) base = this.image.getPropertyAsAString("xml:base"); var url = this._resolveURI(this.image.getPropertyAsAString("url"), base); if (url) this.image.setPropertyAsAString("url", url.spec); }, _resolveURI: function Feed_resolveURI(linkSpec, baseSpec) { var uri = null; try { var base = baseSpec ? strToURI(baseSpec, this.baseURI) : this.baseURI; uri = strToURI(linkSpec, base); } catch(e) { LOG(e); } return uri; }, // reset the bag to raw contents, not text constructs _resetBagMembersToRawText: function Feed_resetBagMembers(fieldLists) { for (var i=0; i<fieldLists.length; i++) { for (var j=0; j<fieldLists[i].length; j++) { if (bagHasKey(this.fields, fieldLists[i][j])) { var textConstruct = this.fields.getProperty(fieldLists[i][j]); this.fields.setPropertyAsAString(fieldLists[i][j], textConstruct.text); } } } }, // XPCOM stuff classDescription: FEED_CLASSNAME, classID: FEED_CLASSID, contractID: FEED_CONTRACTID, QueryInterface: XPCOMUtils.generateQI([Ci.nsIFeed, Ci.nsIFeedContainer]) } function Entry() { this.summary = null; this.content = null; this.title = null; this.fields = Cc["@mozilla.org/hash-property-bag;1"]. createInstance(Ci.nsIWritablePropertyBag2); this.link = null; this.id = null; this.baseURI = null; this.updated = null; this.published = null; this.authors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray); this.contributors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray); } Entry.prototype = { fields: null, enclosures: null, mediaContent: null, searchLists: { title: ["title", "rss1:title", "atom03:title", "atom:title"], link: [["link",strToURI],["rss1:link",strToURI]], id: [["guid", makePropGetter("guid")], "rdf:about", "atom03:id", "atom:id"], authors : ["authors"], contributors: ["contributors"], summary: ["description", "rss1:description", "dc:description", "atom03:summary", "atom:summary"], content: ["content:encoded","atom03:content","atom:content"], rights: ["atom03:rights","atom:rights"], published: ["pubDate", "atom03:issued", "dcterms:issued", "atom:published"], updated: ["pubDate", "atom03:modified", "dc:date", "dcterms:modified", "atom:updated"] }, normalize: function Entry_normalize() { fieldsToObj(this, this.searchLists); // Assign Atom link if needed if (bagHasKey(this.fields, "links")) this._atomLinksToURI(); // Populate enclosures array this._populateEnclosures(); // The link might be a guid w/ permalink=true if (!this.link && bagHasKey(this.fields, "guid")) { var guid = this.fields.getProperty("guid"); var isPermaLink = true; if (bagHasKey(guid, "isPermaLink")) isPermaLink = guid.getProperty("isPermaLink").toLowerCase() != "false"; if (guid && isPermaLink) this.link = strToURI(guid.getProperty("guid")); } if (this.updated) this.updated = dateParse(this.updated); if (this.published) this.published = dateParse(this.published); this._resetBagMembersToRawText([this.searchLists.content, this.searchLists.summary, this.searchLists.title]); }, _populateEnclosures: function Entry_populateEnclosures() { if (bagHasKey(this.fields, "links")) this._atomLinksToEnclosures(); // Add RSS2 enclosure to enclosures if (bagHasKey(this.fields, "enclosure")) this._enclosureToEnclosures(); // Add media:content to enclosures if (bagHasKey(this.fields, "mediacontent")) this._mediacontentToEnclosures(); // Add media:content in media:group to enclosures if (bagHasKey(this.fields, "mediagroup")) this._mediagroupToEnclosures(); }, __enclosure_map: null, _addToEnclosures: function Entry_addToEnclosures(new_enc) { // items we add to the enclosures array get displayed in the FeedWriter and // they must have non-empty urls. if (!bagHasKey(new_enc, "url") || new_enc.getPropertyAsAString("url") == "") return; if (this.__enclosure_map == null) this.__enclosure_map = {}; var previous_enc = this.__enclosure_map[new_enc.getPropertyAsAString("url")]; if (previous_enc != undefined) { previous_enc.QueryInterface(Ci.nsIWritablePropertyBag2); if (!bagHasKey(previous_enc, "type") && bagHasKey(new_enc, "type")) previous_enc.setPropertyAsAString("type", new_enc.getPropertyAsAString("type")); if (!bagHasKey(previous_enc, "length") && bagHasKey(new_enc, "length")) previous_enc.setPropertyAsAString("length", new_enc.getPropertyAsAString("length")); return; } if (this.enclosures == null) { this.enclosures = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray); this.enclosures.QueryInterface(Ci.nsIMutableArray); } this.enclosures.appendElement(new_enc, false); this.__enclosure_map[new_enc.getPropertyAsAString("url")] = new_enc; }, _atomLinksToEnclosures: function Entry_linkToEnclosure() { var links = this.fields.getPropertyAsInterface("links", Ci.nsIArray); var enc_links = findAtomLinks("enclosure", links); if (enc_links.length == 0) return; for (var i = 0; i < enc_links.length; ++i) { var link = enc_links[i]; // an enclosure must have an href if (!(link.getProperty("href"))) return; var enc = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2); // copy Atom bits over to equivalent enclosure bits enc.setPropertyAsAString("url", link.getPropertyAsAString("href")); if (bagHasKey(link, "type")) enc.setPropertyAsAString("type", link.getPropertyAsAString("type")); if (bagHasKey(link, "length")) enc.setPropertyAsAString("length", link.getPropertyAsAString("length")); this._addToEnclosures(enc); } }, _enclosureToEnclosures: function Entry_enclosureToEnclosures() { var enc = this.fields.getPropertyAsInterface("enclosure", Ci.nsIPropertyBag2); if (!(enc.getProperty("url"))) return; this._addToEnclosures(enc); }, _mediacontentToEnclosures: function Entry_mediacontentToEnclosures() { var mediacontent = this.fields.getPropertyAsInterface("mediacontent", Ci.nsIArray); for (var i = 0; i < mediacontent.length; ++i) { var contentElement = mediacontent.queryElementAt(i, Ci.nsIWritablePropertyBag2); // media:content don't require url, but if it's not there, we should // skip it. if (!bagHasKey(contentElement, "url")) continue; var enc = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2); // copy media:content bits over to equivalent enclosure bits enc.setPropertyAsAString("url", contentElement.getPropertyAsAString("url")); if (bagHasKey(contentElement, "type")) { enc.setPropertyAsAString("type", contentElement.getPropertyAsAString("type")); } if (bagHasKey(contentElement, "fileSize")) { enc.setPropertyAsAString("length", contentElement.getPropertyAsAString("fileSize")); } this._addToEnclosures(enc); } }, _mediagroupToEnclosures: function Entry_mediagroupToEnclosures() { var group = this.fields.getPropertyAsInterface("mediagroup", Ci.nsIPropertyBag2); var content = group.getPropertyAsInterface("mediacontent", Ci.nsIArray); for (var i = 0; i < content.length; ++i) { var contentElement = content.queryElementAt(i, Ci.nsIWritablePropertyBag2); // media:content don't require url, but if it's not there, we should // skip it. if (!bagHasKey(contentElement, "url")) continue; var enc = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2); // copy media:content bits over to equivalent enclosure bits enc.setPropertyAsAString("url", contentElement.getPropertyAsAString("url")); if (bagHasKey(contentElement, "type")) { enc.setPropertyAsAString("type", contentElement.getPropertyAsAString("type")); } if (bagHasKey(contentElement, "fileSize")) { enc.setPropertyAsAString("length", contentElement.getPropertyAsAString("fileSize")); } this._addToEnclosures(enc); } }, // XPCOM stuff classDescription: ENTRY_CLASSNAME, classID: ENTRY_CLASSID, contractID: ENTRY_CONTRACTID, QueryInterface: XPCOMUtils.generateQI( [Ci.nsIFeedEntry, Ci.nsIFeedContainer] ) } Entry.prototype._atomLinksToURI = Feed.prototype._atomLinksToURI; Entry.prototype._resolveURI = Feed.prototype._resolveURI; Entry.prototype._resetBagMembersToRawText = Feed.prototype._resetBagMembersToRawText; // TextConstruct represents and element that could contain (X)HTML function TextConstruct() { this.lang = null; this.base = null; this.type = "text"; this.text = null; this.unescapeHTML = Cc[UNESCAPE_CONTRACTID]. getService(Ci.nsIScriptableUnescapeHTML); } TextConstruct.prototype = { plainText: function TC_plainText() { if (this.type != "text") { return this.unescapeHTML.unescape(stripTags(this.text)); } return this.text; }, createDocumentFragment: function TC_createDocumentFragment(element) { if (this.type == "text") { var doc = element.ownerDocument; var docFragment = doc.createDocumentFragment(); var node = doc.createTextNode(this.text); docFragment.appendChild(node); return docFragment; } var isXML; if (this.type == "xhtml") isXML = true else if (this.type == "html") isXML = false; else return null; return this.unescapeHTML.parseFragment(this.text, isXML, this.base, element); }, // XPCOM stuff classDescription: TEXTCONSTRUCT_CLASSNAME, classID: TEXTCONSTRUCT_CLASSID, contractID: TEXTCONSTRUCT_CONTRACTID, QueryInterface: XPCOMUtils.generateQI([Ci.nsIFeedTextConstruct]) } // Generator represents the software that produced the feed function Generator() { this.lang = null; this.agent = null; this.version = null; this.uri = null; // nsIFeedElementBase this._attributes = null; this.baseURI = null; } Generator.prototype = { get attributes() { return this._attributes; }, set attributes(value) { this._attributes = value; this.version = this._attributes.getValueFromName("","version"); var uriAttribute = this._attributes.getValueFromName("","uri") || this._attributes.getValueFromName("","url"); this.uri = strToURI(uriAttribute, this.baseURI); // RSS1 uriAttribute = this._attributes.getValueFromName(RDF_NS,"resource"); if (uriAttribute) { this.agent = uriAttribute; this.uri = strToURI(uriAttribute, this.baseURI); } }, // XPCOM stuff classDescription: GENERATOR_CLASSNAME, classID: GENERATOR_CLASSID, contractID: GENERATOR_CONTRACTID, QueryInterface: XPCOMUtils.generateQI( [Ci.nsIFeedGenerator, Ci.nsIFeedElementBase] ) } function Person() { this.name = null; this.uri = null; this.email = null; // nsIFeedElementBase this.attributes = null; this.baseURI = null; } Person.prototype = { // XPCOM stuff classDescription: PERSON_CLASSNAME, classID: PERSON_CLASSID, contractID: PERSON_CONTRACTID, QueryInterface: XPCOMUtils.generateQI( [Ci.nsIFeedPerson, Ci.nsIFeedElementBase] ) } /** * Map a list of fields into properties on a container. * * @param container An nsIFeedContainer * @param fields A list of fields to search for. List members can * be a list, in which case the second member is * transformation function (like parseInt). */ function fieldsToObj(container, fields) { var props,prop,field,searchList; for (var key in fields) { searchList = fields[key]; for (var i=0; i < searchList.length; ++i) { props = searchList[i]; prop = null; field = isArray(props) ? props[0] : props; try { prop = container.fields.getProperty(field); } catch(e) { } if (prop) { prop = isArray(props) ? props[1](prop) : prop; container[key] = prop; } } } } /** * Lower cases an element's localName property * @param element A DOM element. * * @returns The lower case localName property of the specified element */ function LC(element) { return element.localName.toLowerCase(); } // TODO move these post-processor functions // create a generator element function atomGenerator(s, generator) { generator.QueryInterface(Ci.nsIFeedGenerator); generator.agent = trimString(s); return generator; } // post-process atom:logo to create an RSS2-like structure function atomLogo(s, logo) { logo.setPropertyAsAString("url", trimString(s)); } // post-process an RSS category, map it to the Atom fields. function rssCatTerm(s, cat) { // add slash handling? cat.setPropertyAsAString("term", trimString(s)); return cat; } // post-process a GUID function rssGuid(s, guid) { guid.setPropertyAsAString("guid", trimString(s)); return guid; } // post-process an RSS author element // // It can contain a field like this: // // <author>lawyer@boyer.net (Lawyer Boyer)</author> // // or, delightfully, a field like this: // // <dc:creator>Simon St.Laurent (mailto:simonstl@simonstl.com)</dc:creator> // // We want to split this up and assign it to corresponding Atom // fields. // function rssAuthor(s,author) { author.QueryInterface(Ci.nsIFeedPerson); // check for RSS2 string format var chars = trimString(s); var matches = chars.match(/(.*)\((.*)\)/); var emailCheck = /^([a-zA-Z0-9_\.\-])+\@(([a-zA-Z0-9\-])+\.)+([a-zA-Z0-9]{2,4})+$/; if (matches) { var match1 = trimString(matches[1]); var match2 = trimString(matches[2]); if (match2.indexOf("mailto:") == 0) match2 = match2.substring(7); if (emailCheck.test(match1)) { author.email = match1; author.name = match2; } else if (emailCheck.test(match2)) { author.email = match2; author.name = match1; } else { // put it back together author.name = match1 + " (" + match2 + ")"; } } else { author.name = chars; if (chars.indexOf('@')) author.email = chars; } return author; } // // skipHours and skipDays map to arrays, so we need to change the // string to an nsISupports in order to stick it in there. // function rssArrayElement(s) { var str = Cc["@mozilla.org/supports-string;1"]. createInstance(Ci.nsISupportsString); str.data = s; str.QueryInterface(Ci.nsISupportsString); return str; } /***** Some feed utils from TBird *****/ /** * Tests a RFC822 date against a regex. * @param aDateStr A string to test as an RFC822 date. * * @returns A boolean indicating whether the string is a valid RFC822 date. */ function isValidRFC822Date(aDateStr) { var regex = new RegExp(RFC822_RE); return regex.test(aDateStr); } /** * Removes leading and trailing whitespace from a string. * @param s The string to trim. * * @returns A new string with whitespace stripped. */ function trimString(s) { return(s.replace(/^\s+/, "").replace(/\s+$/, "")); } // Regular expression matching RFC822 dates const RFC822_RE = "^((Mon|Tue|Wed|Thu|Fri|Sat|Sun)([a-z]+)?,? *)?\\d\\d?" + " +(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)([a-z]+)?" + " +\\d\\d(\\d\\d)? +\\d?\\d:\\d\\d(:\\d\\d)?" + " +([+-]?\\d\\d\\d\\d|GMT|UT[C]?|(E|C|M|P)(ST|DT)|[A-IK-Z])$"; /** * XXX -- need to decide what this should return. * XXX -- Is there a Date class usable from C++? * * Tries tries parsing various date formats. * @param dateString * A string that is supposedly an RFC822 or RFC3339 date. * @returns A Date.toString XXX--fixme */ function dateParse(dateString) { var date = trimString(dateString); if (date.search(/^\d\d\d\d/) != -1) //Could be a ISO8601/W3C date return W3CToIETFDate(dateString); if (isValidRFC822Date(date)) return date; if (!isNaN(parseInt(date, 10))) { //It's an integer, so maybe it's a timestamp var d = new Date(parseInt(date, 10) * 1000); var now = new Date(); var yeardiff = now.getFullYear() - d.getFullYear(); if ((yeardiff >= 0) && (yeardiff < 3)) { // it's quite likely the correct date. 3 years is an arbitrary cutoff, // but this is an invalid date format, and there's no way to verify // its correctness. return d.toString(); } } // Can't help. return null; } const XHTML_NS = "http://www.w3.org/1999/xhtml"; // The XHTMLHandler handles inline XHTML found in things like atom:summary function XHTMLHandler(processor, isAtom, waiPrefixes) { this._buf = ""; this._processor = processor; this._depth = 0; this._isAtom = isAtom; // a stack of lists tracking in-scope namespaces this._inScopeNS = []; this._waiPrefixes = waiPrefixes; } // The fidelity can be improved here, to allow handling of stuff like // SVG and MathML. XXX XHTMLHandler.prototype = { // look back up at the declared namespaces // we always use the same prefixes for our safe stuff _isInScope: function XH__isInScope(ns) { for (var i in this._inScopeNS) { for (var uri in this._inScopeNS[i]) { if (this._inScopeNS[i][uri] == ns) return true; } } return false; }, startDocument: function XH_startDocument() { }, endDocument: function XH_endDocument() { }, startElement: function XH_startElement(uri, localName, qName, attributes) { ++this._depth; this._inScopeNS.push([]); // RFC4287 requires XHTML to be wrapped in a div that is *not* part of // the content. This prevents people from screwing up namespaces, but // we need to skip it here. if (this._isAtom && this._depth == 1 && localName == "div") return; // If it's an XHTML element, record it. Otherwise, it's ignored. if (uri == XHTML_NS) { this._buf += "<" + localName; var uri; for (var i=0; i < attributes.length; ++i) { uri = attributes.getURI(i); // XHTML attributes aren't in a namespace if (uri == "") { this._buf += (" " + attributes.getLocalName(i) + "='" + xmlEscape(attributes.getValue(i)) + "'"); } else { // write a small set of allowed attribute namespaces var prefix = gAllowedXHTMLNamespaces[uri]; if (prefix != null) { // The attribute value we'll attempt to write var attributeValue = xmlEscape(attributes.getValue(i)); // More QName abuse from W3C var rolePrefix = ""; if (attributes.getLocalName(i) == "role") { for (var aPrefix in this._waiPrefixes) { if (attributeValue.indexOf(aPrefix + ":") == 0) { // Now, due to the terrible layer mismatch // that is QNames in content, we have to see // if the attribute value clashes with our // namespace declarations. var isCollision = false; for (var uriKey in gAllowedXHTMLNamespaces) { if (gAllowedXHTMLNamespaces[uriKey] == aPrefix) isCollision = true; } if (isCollision) { rolePrefix = aPrefix + i; attributeValue = rolePrefix + ":" + attributeValue.substring(aPrefix.length + 1); } else { rolePrefix = aPrefix; } break; } } if (rolePrefix) this._buf += (" xmlns:" + rolePrefix + "='" + WAIROLE_NS + "'"); } // it's an allowed attribute NS. // write the attribute this._buf += (" " + prefix + ":" + attributes.getLocalName(i) + "='" + attributeValue + "'"); // write an xmlns declaration if necessary if (prefix != "xml" && !this._isInScope(uri)) { this._inScopeNS[this._inScopeNS.length - 1].push(uri); this._buf += " xmlns:" + prefix + "='" + uri + "'"; } } } } this._buf += ">"; } }, endElement: function XH_endElement(uri, localName, qName) { --this._depth; this._inScopeNS.pop(); // We need to skip outer divs in Atom. See comment in startElement. if (this._isAtom && this._depth == 0 && localName == "div") return; // When we peek too far, go back to the main processor if (this._depth < 0) { this._processor.returnFromXHTMLHandler(trimString(this._buf), uri, localName, qName); return; } // If it's an XHTML element, record it. Otherwise, it's ignored. if (uri == XHTML_NS) { this._buf += "</" + localName + ">"; } }, characters: function XH_characters(data) { this._buf += xmlEscape(data); }, startPrefixMapping: function XH_startPrefixMapping(prefix, uri) { if (prefix && uri == WAIROLE_NS) this._waiPrefixes[prefix] = WAIROLE_NS; }, endPrefixMapping: function FP_endPrefixMapping(prefix) { if (prefix) delete this._waiPrefixes[prefix]; }, processingInstruction: function XH_processingInstruction() { }, } /** * The ExtensionHandler deals with elements we haven't explicitly * added to our transition table in the FeedProcessor. */ function ExtensionHandler(processor) { this._buf = ""; this._depth = 0; this._hasChildElements = false; // The FeedProcessor this._processor = processor; // Fields of the outermost extension element. this._localName = null; this._uri = null; this._qName = null; this._attrs = null; } ExtensionHandler.prototype = { startDocument: function EH_startDocument() { }, endDocument: function EH_endDocument() { }, startElement: function EH_startElement(uri, localName, qName, attrs) { ++this._depth; var prefix = gNamespaces[uri] ? gNamespaces[uri] + ":" : ""; var key = prefix + localName; if (this._depth == 1) { this._uri = uri; this._localName = localName; this._qName = qName; this._attrs = attrs; } // if we descend into another element, we won't send text this._hasChildElements = (this._depth > 1); }, endElement: function EH_endElement(uri, localName, qName) { --this._depth; if (this._depth == 0) { var text = this._hasChildElements ? null : trimString(this._buf); this._processor.returnFromExtHandler(this._uri, this._localName, text, this._attrs); } }, characters: function EH_characters(data) { if (!this._hasChildElements) this._buf += data; }, startPrefixMapping: function EH_startPrefixMapping() { }, endPrefixMapping: function EH_endPrefixMapping() { }, processingInstruction: function EH_processingInstruction() { }, }; /** * ElementInfo is a simple container object that describes * some characteristics of a feed element. For example, it * says whether an element can be expected to appear more * than once inside a given entry or feed. */ function ElementInfo(fieldName, containerClass, closeFunc, isArray) { this.fieldName = fieldName; this.containerClass = containerClass; this.closeFunc = closeFunc; this.isArray = isArray; this.isWrapper = false; } /** * FeedElementInfo represents a feed element, usually the root. */ function FeedElementInfo(fieldName, feedVersion) { this.isWrapper = false; this.fieldName = fieldName; this.feedVersion = feedVersion; } /** * Some feed formats include vestigial wrapper elements that we don't * want to include in our object model, but we do need to keep track * of during parsing. */ function WrapperElementInfo(fieldName) { this.isWrapper = true; this.fieldName = fieldName; } /***** The Processor *****/ function FeedProcessor() { this._reader = Cc[SAX_CONTRACTID].createInstance(Ci.nsISAXXMLReader); this._buf = ""; this._feed = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2); this._handlerStack = []; this._xmlBaseStack = []; // sparse array keyed to nesting depth this._depth = 0; this._state = "START"; this._result = null; this._extensionHandler = null; this._xhtmlHandler = null; this._haveSentResult = false; // http://www.w3.org/WAI/PF/GUI/ uses QNames in content :( this._waiPrefixes = {}; // The nsIFeedResultListener waiting for the parse results this.listener = null; // These elements can contain (X)HTML or plain text. // We keep a table here that contains their default treatment this._textConstructs = {"atom:title":"text", "atom:summary":"text", "atom:rights":"text", "atom:content":"text", "atom:subtitle":"text", "description":"html", "rss1:description":"html", "dc:description":"html", "content:encoded":"html", "title":"text", "rss1:title":"text", "atom03:title":"text", "atom03:tagline":"text", "atom03:summary":"text", "atom03:content":"text"}; this._stack = []; this._trans = { "START": { //If we hit a root RSS element, treat as RSS2. "rss": new FeedElementInfo("RSS2", "rss2"), // If we hit an RDF element, if could be RSS1, but we can't // verify that until we hit a rss1:channel element. "rdf:RDF": new WrapperElementInfo("RDF"), // If we hit a Atom 1.0 element, treat as Atom 1.0. "atom:feed": new FeedElementInfo("Atom", "atom"), // Treat as Atom 0.3 "atom03:feed": new FeedElementInfo("Atom03", "atom03"), }, /********* RSS2 **********/ "IN_RSS2": { "channel": new WrapperElementInfo("channel") }, "IN_CHANNEL": { "item": new ElementInfo("items", Cc[ENTRY_CONTRACTID], null, true), "managingEditor": new ElementInfo("authors", Cc[PERSON_CONTRACTID], rssAuthor, true), "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID], rssAuthor, true), "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], rssAuthor, true), "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID], rssAuthor, true), "category": new ElementInfo("categories", null, rssCatTerm, true), "cloud": new ElementInfo("cloud", null, null, false), "image": new ElementInfo("image", null, null, false), "textInput": new ElementInfo("textInput", null, null, false), "skipDays": new ElementInfo("skipDays", null, null, false), "skipHours": new ElementInfo("skipHours", null, null, false), "generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID], atomGenerator, false), }, "IN_ITEMS": { "author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], rssAuthor, true), "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID], rssAuthor, true), "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], rssAuthor, true), "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID], rssAuthor, true), "category": new ElementInfo("categories", null, rssCatTerm, true), "enclosure": new ElementInfo("enclosure", null, null, false), "media:content": new ElementInfo("mediacontent", null, null, true), "media:group": new ElementInfo("mediagroup", null, null, false), "guid": new ElementInfo("guid", null, rssGuid, false) }, "IN_SKIPDAYS": { "day": new ElementInfo("days", null, rssArrayElement, true) }, "IN_SKIPHOURS":{ "hour": new ElementInfo("hours", null, rssArrayElement, true) }, "IN_MEDIAGROUP": { "media:content": new ElementInfo("mediacontent", null, null, true) }, /********* RSS1 **********/ "IN_RDF": { // If we hit a rss1:channel, we can verify that we have RSS1 "rss1:channel": new FeedElementInfo("rdf_channel", "rss1"), "rss1:image": new ElementInfo("image", null, null, false), "rss1:textinput": new ElementInfo("textInput", null, null, false), "rss1:item": new ElementInfo("items", Cc[ENTRY_CONTRACTID], null, true), }, "IN_RDF_CHANNEL": { "admin:generatorAgent": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID], null, false), "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID], rssAuthor, true), "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], rssAuthor, true), "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID], rssAuthor, true), }, /********* ATOM 1.0 **********/ "IN_ATOM": { "atom:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], null, true), "atom:generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID], atomGenerator, false), "atom:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID], null, true), "atom:link": new ElementInfo("links", null, null, true), "atom:logo": new ElementInfo("atom:logo", null, atomLogo, false), "atom:entry": new ElementInfo("entries", Cc[ENTRY_CONTRACTID], null, true) }, "IN_ENTRIES": { "atom:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], null, true), "atom:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID], null, true), "atom:link": new ElementInfo("links", null, null, true), }, /********* ATOM 0.3 **********/ "IN_ATOM03": { "atom03:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], null, true), "atom03:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID], null, true), "atom03:link": new ElementInfo("links", null, null, true), "atom03:entry": new ElementInfo("atom03_entries", Cc[ENTRY_CONTRACTID], null, true), "atom03:generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID], atomGenerator, false), }, "IN_ATOM03_ENTRIES": { "atom03:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], null, true), "atom03:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID], null, true), "atom03:link": new ElementInfo("links", null, null, true), "atom03:entry": new ElementInfo("atom03_entries", Cc[ENTRY_CONTRACTID], null, true) } } } // See startElement for a long description of how feeds are processed. FeedProcessor.prototype = { // Set ourselves as the SAX handler, and set the base URI _init: function FP_init(uri) { this._reader.contentHandler = this; this._reader.errorHandler = this; this._result = Cc[FR_CONTRACTID].createInstance(Ci.nsIFeedResult); if (uri) { this._result.uri = uri; this._reader.baseURI = uri; this._xmlBaseStack[0] = uri; } }, // This function is called once we figure out what type of feed // we're dealing with. Some feed types require digging a bit further // than the root. _docVerified: function FP_docVerified(version) { this._result.doc = Cc[FEED_CONTRACTID].createInstance(Ci.nsIFeed); this._result.doc.baseURI = this._xmlBaseStack[this._xmlBaseStack.length - 1]; this._result.doc.fields = this._feed; this._result.version = version; }, // When we're done with the feed, let the listener know what // happened. _sendResult: function FP_sendResult() { this._haveSentResult = true; try { // Can be null when a non-feed is fed to us if (this._result.doc) this._result.doc.normalize(); } catch (e) { LOG("FIXME: " + e); } try { if (this.listener != null) this.listener.handleResult(this._result); } finally { this._result = null; } }, // Parsing functions parseFromStream: function FP_parseFromStream(stream, uri) { this._init(uri); this._reader.parseFromStream(stream, null, stream.available(), "application/xml"); this._reader = null; }, parseFromString: function FP_parseFromString(inputString, uri) { this._init(uri); this._reader.parseFromString(inputString, "application/xml"); this._reader = null; }, parseAsync: function FP_parseAsync(requestObserver, uri) { this._init(uri); this._reader.parseAsync(requestObserver); }, // nsIStreamListener // The XMLReader will throw sensible exceptions if these get called // out of order. onStartRequest: function FP_onStartRequest(request, context) { // this will throw if the request is not a channel, but so will nsParser. var channel = request.QueryInterface(Ci.nsIChannel); channel.contentType = "application/vnd.mozilla.maybe.feed"; this._reader.onStartRequest(request, context); }, onStopRequest: function FP_onStopRequest(request, context, statusCode) { try { this._reader.onStopRequest(request, context, statusCode); } finally { this._reader = null; } }, onDataAvailable: function FP_onDataAvailable(request, context, inputStream, offset, count) { this._reader.onDataAvailable(request, context, inputStream, offset, count); }, // nsISAXErrorHandler // We only care about fatal errors. When this happens, we may have // parsed through the feed metadata and some number of entries. The // listener can still show some of that data if it wants, and we'll // set the bozo bit to indicate we were unable to parse all the way // through. fatalError: function FP_reportError() { this._result.bozo = true; //XXX need to QI to FeedProgressListener if (!this._haveSentResult) this._sendResult(); }, // nsISAXContentHandler startDocument: function FP_startDocument() { //LOG("----------"); }, endDocument: function FP_endDocument() { if (!this._haveSentResult) this._sendResult(); }, // The transitions defined above identify elements that contain more // than just text. For example RSS items contain many fields, and so // do Atom authors. The only commonly used elements that contain // mixed content are Atom Text Constructs of type="xhtml", which we // delegate to another handler for cleaning. That leaves a couple // different types of elements to deal with: those that should occur // only once, such as title elements, and those that can occur // multiple times, such as the RSS category element and the Atom // link element. Most of the RSS1/DC elements can occur multiple // times in theory, but in practice, the only ones that do have // analogues in Atom. // // Some elements are also groups of attributes or sub-elements, // while others are simple text fields. For the most part, we don't // have to pay explicit attention to the simple text elements, // unless we want to post-process the resulting string to transform // it into some richer object like a Date or URI. // // Elements that have more sophisticated content models still end up // being dictionaries, whether they are based on attributes like RSS // cloud, sub-elements like Atom author, or even items and // entries. These elements are treated as "containers". It's // theoretically possible for a container to have an attribute with // the same universal name as a sub-element, but none of the feed // formats allow this by default, and I don't of any extension that // works this way. // startElement: function FP_startElement(uri, localName, qName, attributes) { this._buf = ""; ++this._depth; var elementInfo; //LOG("<" + localName + ">"); // Check for xml:base var base = attributes.getValueFromName(XMLNS, "base"); if (base) { this._xmlBaseStack[this._depth] = strToURI(base, this._xmlBaseStack[this._xmlBaseStack.length - 1]); } // To identify the element we're dealing with, we look up the // namespace URI in our gNamespaces dictionary, which will give us // a "canonical" prefix for a namespace URI. For example, this // allows Dublin Core "creator" elements to be consistently mapped // to "dc:creator", for easy field access by consumer code. This // strategy also happens to shorten up our state table. var key = this._prefixForNS(uri) + localName; // Check to see if we need to hand this off to our XHTML handler. // The elements we're dealing with will look like this: // // <title type="xhtml"> // <div xmlns="http://www.w3.org/1999/xhtml"> // A title with <b>bold</b> and <i>italics</i>. // </div> // </title> // // When it returns in returnFromXHTMLHandler, the handler should // give us back a string like this: // // "A title with <b>bold</b> and <i>italics</i>." // // The Atom spec explicitly says the div is not part of the content, // and explicitly allows whitespace collapsing. // if ((this._result.version == "atom" || this._result.version == "atom03") && this._textConstructs[key] != null) { var type = attributes.getValueFromName("","type"); if (type != null && type.indexOf("xhtml") >= 0) { this._xhtmlHandler = new XHTMLHandler(this, (this._result.version == "atom"), this._waiPrefixes); this._reader.contentHandler = this._xhtmlHandler; return; } } // Check our current state, and see if that state has a defined // transition. For example, this._trans["atom:entry"]["atom:author"] // will have one, and it tells us to add an item to our authors array. if (this._trans[this._state] && this._trans[this._state][key]) { elementInfo = this._trans[this._state][key]; } else { // If we don't have a transition, hand off to extension handler this._extensionHandler = new ExtensionHandler(this); this._reader.contentHandler = this._extensionHandler; this._extensionHandler.startElement(uri, localName, qName, attributes); return; } // This distinguishes wrappers like 'channel' from elements // we'd actually like to do something with (which will test true). this._handlerStack[this._depth] = elementInfo; if (elementInfo.isWrapper) { this._state = "IN_" + elementInfo.fieldName.toUpperCase(); this._stack.push([this._feed, this._state]); } else if (elementInfo.feedVersion) { this._state = "IN_" + elementInfo.fieldName.toUpperCase(); // Check for the older RSS2 variants if (elementInfo.feedVersion == "rss2") elementInfo.feedVersion = this._findRSSVersion(attributes); else if (uri == RSS090NS) elementInfo.feedVersion = "rss090"; this._docVerified(elementInfo.feedVersion); this._stack.push([this._feed, this._state]); this._mapAttributes(this._feed, attributes); } else { this._state = this._processComplexElement(elementInfo, attributes); } }, // In the endElement handler, we decrement the stack and look // for cleanup/transition functions to execute. The second part // of the state transition works as above in startElement, but // the state we're looking for is prefixed with an underscore // to distinguish endElement events from startElement events. endElement: function FP_endElement(uri, localName, qName) { var elementInfo = this._handlerStack[this._depth]; //LOG("</" + localName + ">"); if (elementInfo && !elementInfo.isWrapper) this._closeComplexElement(elementInfo); // cut down xml:base context if (this._xmlBaseStack.length == this._depth + 1) this._xmlBaseStack = this._xmlBaseStack.slice(0, this._depth); // our new state is whatever is at the top of the stack now if (this._stack.length > 0) this._state = this._stack[this._stack.length - 1][1]; this._handlerStack = this._handlerStack.slice(0, this._depth); --this._depth; }, // Buffer up character data. The buffer is cleared with every // opening element. characters: function FP_characters(data) { this._buf += data; }, // TODO: It would be nice to check new prefixes here, and if they // don't conflict with the ones we've defined, throw them in a // dictionary to check. startPrefixMapping: function FP_startPrefixMapping(prefix, uri) { // Thanks for QNames in content, W3C // This will even be a perf hit for every single feed // http://www.w3.org/WAI/PF/GUI/ if (prefix && uri == WAIROLE_NS) this._waiPrefixes[prefix] = WAIROLE_NS; }, endPrefixMapping: function FP_endPrefixMapping(prefix) { if (prefix) delete this._waiPrefixes[prefix]; }, processingInstruction: function FP_processingInstruction(target, data) { if (target == "xml-stylesheet") { var hrefAttribute = data.match(/href=[\"\'](.*?)[\"\']/); if (hrefAttribute && hrefAttribute.length == 2) this._result.stylesheet = strToURI(hrefAttribute[1], this._result.uri); } }, // end of nsISAXContentHandler // Handle our more complicated elements--those that contain // attributes and child elements. _processComplexElement: function FP__processComplexElement(elementInfo, attributes) { var obj, key, prefix; // If the container is an entry/item, it'll need to have its // more esoteric properties put in the 'fields' property bag. if (elementInfo.containerClass == Cc[ENTRY_CONTRACTID]) { obj = elementInfo.containerClass.createInstance(Ci.nsIFeedEntry); obj.baseURI = this._xmlBaseStack[this._xmlBaseStack.length - 1]; this._mapAttributes(obj.fields, attributes); } else if (elementInfo.containerClass) { obj = elementInfo.containerClass.createInstance(Ci.nsIFeedElementBase); obj.baseURI = this._xmlBaseStack[this._xmlBaseStack.length - 1]; obj.attributes = attributes; // just set the SAX attributes } else { obj = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2); this._mapAttributes(obj, attributes); } // We should have a container/propertyBag that's had its // attributes processed. Now we need to attach it to its // container. var newProp; // First we'll see what's on top of the stack. var container = this._stack[this._stack.length - 1][0]; // Check to see if it has the property var prop; try { prop = container.getProperty(elementInfo.fieldName); } catch(e) { } if (elementInfo.isArray) { if (!prop) { container.setPropertyAsInterface(elementInfo.fieldName, Cc[ARRAY_CONTRACTID]. createInstance(Ci.nsIMutableArray)); } newProp = container.getProperty(elementInfo.fieldName); // XXX This QI should not be necessary, but XPConnect seems to fly // off the handle in the browser, and loses track of the interface // on large files. Bug 335638. newProp.QueryInterface(Ci.nsIMutableArray); newProp.appendElement(obj,false); // If new object is an nsIFeedContainer, we want to deal with // its member nsIPropertyBag instead. if (isIFeedContainer(obj)) newProp = obj.fields; } else { // If it doesn't, set it. if (!prop) { container.setPropertyAsInterface(elementInfo.fieldName,obj); } newProp = container.getProperty(elementInfo.fieldName); } // make our new state name, and push the property onto the stack var newState = "IN_" + elementInfo.fieldName.toUpperCase(); this._stack.push([newProp, newState, obj]); return newState; }, // Sometimes we need reconcile the element content with the object // model for a given feed. We use helper functions to do the // munging, but we need to identify array types here, so the munging // happens only to the last element of an array. _closeComplexElement: function FP__closeComplexElement(elementInfo) { var stateTuple = this._stack.pop(); var container = stateTuple[0]; var containerParent = stateTuple[2]; var element = null; var isArray = isIArray(container); // If it's an array and we have to post-process, // grab the last element if (isArray) element = container.queryElementAt(container.length - 1, Ci.nsISupports); else element = container; // Run the post-processing function if there is one. if (elementInfo.closeFunc) element = elementInfo.closeFunc(this._buf, element); // If an nsIFeedContainer was on top of the stack, // we need to normalize it if (elementInfo.containerClass == Cc[ENTRY_CONTRACTID]) containerParent.normalize(); // If it's an array, re-set the last element if (isArray) container.replaceElementAt(element, container.length - 1, false); }, _prefixForNS: function FP_prefixForNS(uri) { if (!uri) return ""; var prefix = gNamespaces[uri]; if (prefix) return prefix + ":"; if (uri.toLowerCase().indexOf("http://backend.userland.com") == 0) return ""; else return null; }, _mapAttributes: function FP__mapAttributes(bag, attributes) { // Cycle through the attributes, and set our properties using the // prefix:localNames we find in our namespace dictionary. for (var i = 0; i < attributes.length; ++i) { var key = this._prefixForNS(attributes.getURI(i)) + attributes.getLocalName(i); var val = attributes.getValue(i); bag.setPropertyAsAString(key, val); } }, // Only for RSS2esque formats _findRSSVersion: function FP__findRSSVersion(attributes) { var versionAttr = trimString(attributes.getValueFromName("", "version")); var versions = { "0.91":"rss091", "0.92":"rss092", "0.93":"rss093", "0.94":"rss094" } if (versions[versionAttr]) return versions[versionAttr]; if (versionAttr.substr(0,2) != "2.") return "rssUnknown"; return "rss2"; }, // unknown element values are returned here. See startElement above // for how this works. returnFromExtHandler: function FP_returnExt(uri, localName, chars, attributes) { --this._depth; // take control of the SAX events this._reader.contentHandler = this; if (localName == null && chars == null) return; // we don't take random elements inside rdf:RDF if (this._state == "IN_RDF") return; // Grab the top of the stack var top = this._stack[this._stack.length - 1]; if (!top) return; var container = top[0]; // Grab the last element if it's an array if (isIArray(container)) { var contract = this._handlerStack[this._depth].containerClass; // check if it's something specific, but not an entry if (contract && contract != Cc[ENTRY_CONTRACTID]) { var el = container.queryElementAt(container.length - 1, Ci.nsIFeedElementBase); // XXX there must be a way to flatten these interfaces if (contract == Cc[PERSON_CONTRACTID]) el.QueryInterface(Ci.nsIFeedPerson); else return; // don't know about this interface var propName = localName; var prefix = gNamespaces[uri]; // synonyms if ((uri == "" || prefix && ((prefix.indexOf("atom") > -1) || (prefix.indexOf("rss") > -1))) && (propName == "url" || propName == "href")) propName = "uri"; try { if (el[propName] !== "undefined") { var propValue = chars; // convert URI-bearing values to an nsIURI if (propName == "uri") { var base = this._xmlBaseStack[this._xmlBaseStack.length - 1]; propValue = strToURI(chars, base); } el[propName] = propValue; } } catch(e) { // ignore XPConnect errors } // the rest of the function deals with entry- and feed-level stuff return; } else { container = container.queryElementAt(container.length - 1, Ci.nsIWritablePropertyBag2); } } // Make the buffer our new property var propName = this._prefixForNS(uri) + localName; // But, it could be something containing HTML. If so, // we need to know about that. if (this._textConstructs[propName] != null && this._handlerStack[this._depth].containerClass !== null) { var newProp = Cc[TEXTCONSTRUCT_CONTRACTID]. createInstance(Ci.nsIFeedTextConstruct); newProp.text = chars; // Look up the default type in our table var type = this._textConstructs[propName]; var typeAttribute = attributes.getValueFromName("","type"); if (this._result.version == "atom" && typeAttribute != null) { type = typeAttribute; } else if (this._result.version == "atom03" && typeAttribute != null) { if (typeAttribute.toLowerCase().indexOf("xhtml") >= 0) { type = "xhtml"; } else if (typeAttribute.toLowerCase().indexOf("html") >= 0) { type = "html"; } else if (typeAttribute.toLowerCase().indexOf("text") >= 0) { type = "text"; } } // If it's rss feed-level description, it's not supposed to have html if (this._result.version.indexOf("rss") >= 0 && this._handlerStack[this._depth].containerClass != ENTRY_CONTRACTID) { type = "text"; } newProp.type = type; newProp.base = this._xmlBaseStack[this._xmlBaseStack.length - 1]; container.setPropertyAsInterface(propName, newProp); } else { container.setPropertyAsAString(propName, chars); } }, // Sometimes, we'll hand off SAX handling duties to an XHTMLHandler // (see above) that will scrape out non-XHTML stuff, normalize // namespaces, and remove the wrapper div from Atom 1.0. When the // XHTMLHandler is done, it'll callback here. returnFromXHTMLHandler: function FP_returnFromXHTMLHandler(chars, uri, localName, qName) { // retake control of the SAX content events this._reader.contentHandler = this; // Grab the top of the stack var top = this._stack[this._stack.length - 1]; if (!top) return; var container = top[0]; // Assign the property var newProp = newProp = Cc[TEXTCONSTRUCT_CONTRACTID]. createInstance(Ci.nsIFeedTextConstruct); newProp.text = chars; newProp.type = "xhtml"; newProp.base = this._xmlBaseStack[this._xmlBaseStack.length - 1]; container.setPropertyAsInterface(this._prefixForNS(uri) + localName, newProp); // XHTML will cause us to peek too far. The XHTML handler will // send us an end element to call. RFC4287-valid feeds allow a // more graceful way to handle this. Unfortunately, we can't count // on compliance at this point. this.endElement(uri, localName, qName); }, // XPCOM stuff classDescription: FP_CLASSNAME, classID: FP_CLASSID, contractID: FP_CONTRACTID, QueryInterface: XPCOMUtils.generateQI( [Ci.nsIFeedProcessor, Ci.nsISAXContentHandler, Ci.nsISAXErrorHandler, Ci.nsIStreamListener, Ci.nsIRequestObserver] ) } var components = [FeedProcessor, FeedResult, Feed, Entry, TextConstruct, Generator, Person]; function NSGetModule(compMgr, fileSpec) { return XPCOMUtils.generateModule(components); } |
:: Command execute :: | |
--[ c99shell v. 1.0 pre-release build #13 powered by Captain Crunch Security Team | http://ccteam.ru | Generation time: 0.0156 ]-- |