feedparser - drop request lib, use new parser (breaking)

This commit is contained in:
Dave Conway-Jones
2025-12-26 16:01:01 +00:00
parent 5de96ca8fd
commit 6e6a100663
6 changed files with 93 additions and 67 deletions

View File

@@ -1,4 +1,8 @@
<script type="text/html" data-template-name="feedparse">
<div class="form-row">
<label for="node-input-name"><i class="fa fa-tag"></i> <span data-i18n="node-red:common.label.name"></span></label>
<input type="text" id="node-input-name" data-i18n="[placeholder]node-red:common.label.name">
</div>
<div class="form-row">
<label for="node-input-url"><i class="fa fa-globe"></i> <span data-i18n="feedparse.label.feedurl"></span></label>
<input type="text" id="node-input-url">
@@ -7,15 +11,16 @@
<label for="node-input-interval"><i class="fa fa-repeat"></i> <span data-i18n="feedparse.label.refresh"></span></label>
<input type="text" id="node-input-interval" style="width:60px"> <span data-i18n="feedparse.label.minutes"></span>
</div>
<div class="form-row">
<div class="form-row">
<label> </label>
<input type="checkbox" id="node-input-sendarray" style="display:inline-block; width:20px; vertical-align:baseline;">
<span data-i18n="feedparse.label.sendarray"></span>
</div>
<div class="form-row" id=""node-input-ignorefirst-row">
<label> </label>
<input type="checkbox" id="node-input-ignorefirst" style="display:inline-block; width:20px; vertical-align:baseline;">
<span data-i18n="feedparse.label.ignorefirst"></span>
</div>
<div class="form-row">
<label for="node-input-name"><i class="fa fa-tag"></i> <span data-i18n="node-red:common.label.name"></span></label>
<input type="text" id="node-input-name" data-i18n="[placeholder]node-red:common.label.name">
</div>
</script>
<script type="text/javascript">
@@ -26,7 +31,8 @@
name: {value:""},
url: {value:"", required:true},
interval: { value:15, required:true, validate:function(v) {return (!isNaN(parseInt(v)) && (parseInt(v) <= 35790))} },
ignorefirst: { value:false }
ignorefirst: { value:false },
sendarray: { value:false }
},
inputs:0,
outputs:1,
@@ -39,6 +45,15 @@
},
labelStyle: function() {
return this.name?"node_label_italic":"";
},
oneditprepare: function() {
$('#node-input-sendarray').on('change', function() {
if ($(this).is(':checked')) {
$('#node-input-ignorefirst-row').hide()
} else {
$('#node-input-ignorefirst-row').show()
}
});
}
});
</script>

View File

@@ -1,9 +1,7 @@
module.exports = function(RED) {
"use strict";
var FeedParser = require("feedparser");
var request = require("request");
var url = require('url');
const { parseFeed } = require('@rowanmanning/feed-parser');
function FeedParseNode(n) {
RED.nodes.createNode(this,n);
@@ -12,58 +10,49 @@ module.exports = function(RED) {
this.interval = (parseInt(n.interval)||15) * 60000;
this.interval_id = null;
this.ignorefirst = n.ignorefirst || false;
this.sendarray = n.sendarray || false;
this.seen = {};
this.donefirst = false;
var node = this;
var parsedUrl = url.parse(this.url);
if (!(parsedUrl.host || (parsedUrl.hostname && parsedUrl.port)) && !parsedUrl.isUnix) {
node.error(RED._("feedparse.errors.invalidurl"),RED._("feedparse.errors.invalidurl"));
}
else {
var getFeed = function() {
var req = request(node.url, {timeout:10000, pool:false});
//req.setMaxListeners(50);
req.setHeader('user-agent', 'Mozilla/5.0 (Node-RED)');
req.setHeader('accept', 'application/rss+xml,text/html,application/xhtml+xml,application/xml');
var feedparser = new FeedParser();
async function getFeed() {
const response = await fetch(node.url);
if (response.status !== 200) {
node.error("Bad Feed: "+node.url, err)
node.status({fill:"red",shape:"dot",text:response.status+": "+RED._("feedparse.errors.badstatuscode")});
return;
}
const feed = parseFeed(await response.text());
if (node.sendarray === true) {
var msg = JSON.parse(JSON.stringify(feed));
node.send(msg);
}
else {
for (let a=0; a<feed.items.length; a++) {
const article = feed.items[a];
if (!(article.id in node.seen) || ( node.seen[article.id] !== 0 && node.seen[article.id] != new Date(article.published).getTime())) {
node.seen[article.id] = article.published ? new Date(article.published).getTime() : 0;
const msg = { article: JSON.parse(JSON.stringify(article)) };
msg.topic = msg.article.title;
msg.payload = msg.article.description;
msg.link = msg.article.url
msg.feed = node.url;
req.on('error', function(err) { node.error(err); });
req.on('response', function(res) {
if (res.statusCode != 200) { node.warn(RED._("feedparse.errors.badstatuscode")+" "+res.statusCode); }
else { res.pipe(feedparser); }
});
feedparser.on('error', function(error) { node.error(error,error); });
feedparser.on('readable', function () {
var stream = this, article;
while (article = stream.read()) { // jshint ignore:line
if (!(article.guid in node.seen) || ( node.seen[article.guid] !== 0 && node.seen[article.guid] != article.date.getTime())) {
node.seen[article.guid] = article.date ? article.date.getTime() : 0;
var msg = {
topic: article.origlink || article.link,
payload: article.description,
article: article
};
if (node.ignorefirst === true && node.donefirst === false) {
// do nothing
}
else {
node.send(msg);
}
if (node.ignorefirst === true && node.donefirst === false) {
// do nothing
}
else {
node.send(msg);
}
}
});
feedparser.on('meta', function (meta) {});
feedparser.on('end', function () {});
};
node.interval_id = setInterval(function() { node.donefirst = true; getFeed(); }, node.interval);
getFeed();
}
}
node.status({fill:"green",shape:"dot",text:""});
}
node.interval_id = setInterval(function() { node.donefirst = true; getFeed(); }, node.interval);
setTimeout(getFeed, 2000);
node.on("close", function() {
if (this.interval_id != null) {
clearInterval(this.interval_id);

View File

@@ -10,11 +10,25 @@ Run the following command in your Node-RED user directory - typically `~/.node-r
npm install node-red-node-feedparser
Usage
-----
### Input
Monitors an RSS/atom feed for new entries.
You can set the polling time in minutes. Defaults to 15 minutes.
**Breaking change** - v1.0 - the node has been re-written to remove the use of the deprecated
request library - and now uses a different parsing library. As a result the returned data is
slightly different and so the properties are not exactly as previous.
### Outputs
- topic - *string* - Title of article.
- payload - *string* - Description of article.</dd>
- link - *string* - URL link to article.
- feed - *string* - Top level feed link, as configured.
- article - *object* - Complete article object.
The <code>msg.article</code> property contains the complete article object,
which has properties such as <code>.title</code>, <code>.description</code>,
<code>.image</code> and so on.
If you select to return a single object - the only thing returned is the
complete original response, which has different properties from those listed above.
You can set the polling time in minutes. Defaults to 15 minutes. The refresh interval cannot be greater than 35790 minutes (approx 24.8 days)

View File

@@ -3,13 +3,20 @@
<h3>Outputs</h3>
<dl class="message-properties">
<dt>topic <span class="property-type">string</span></dt>
<dd>Original article link</dd>
<dd>Title of article.</dd>
<dt>payload <span class="property-type">string</span></dt>
<dd>Description</dd>
<dd>Description of article.</dd>
<dt>link <span class="property-type">string</span></dt>
<dd>URL link to article.</dd>
<dt>feed <span class="property-type">string</span></dt>
<dd>Top level feed link, as configured.</dd>
<dt>article <span class="property-type">object</span></dt>
<dd>Complete article object</dd>
<dd>Complete article object.</dd>
</dl>
<p>The <code>msg.article</code> property contains the complete article object,
which has properties such as <code>.title</code>, <code>.summary</code>, <code>.date</code> and so on.</p>
which has properties such as <code>.title</code>, <code>.description</code>,
<code>.image</code> and so on.</p>
<p>If you select to return a single object - the only thing returned is the
complete original response, which has different properties from those listed above.</p>
<p>The refresh interval cannot be greater than 35790 minutes (approx 24.8 days).
</script>

View File

@@ -5,12 +5,14 @@
"feedurl": "Feed url",
"refresh": "Refresh",
"minutes": "minutes",
"ignorefirst": "Ignore any stories older than restart"
"ignorefirst": "Ignore any stories older than restart",
"sendarray": "Send response as a single object"
},
"errors": {
"badstatuscode": "error - Bad status code",
"invalidurl": "Invalid url",
"invalidinterval": "Repeat interval too large"
"invalidinterval": "Repeat interval too large",
"badparse": "error - Bad feed parse"
}
}
}

View File

@@ -1,10 +1,9 @@
{
"name": "node-red-node-feedparser",
"version": "0.3.0",
"version": "1.0.0",
"description": "A Node-RED node to get RSS Atom feeds.",
"dependencies": {
"feedparser": "^2.2.10",
"request": "^2.88.2"
"@rowanmanning/feed-parser": "^2.1.1"
},
"repository": {
"type": "git",