1
0
mirror of https://github.com/node-red/node-red-nodes.git synced 2023-10-10 13:36:58 +02:00

mlsentiment: allow custom tokenizers (#1026)

* allow custom tokenizers

allows use of tokens object to specifiy custom tokenizers

updated mlsentiment to v2.0.0 (with temporary fix for comparative score applied)

updated documentation

* update unit tests
This commit is contained in:
Chuan Khoo 2023-09-01 06:25:00 +10:00 committed by GitHub
parent 8a29f9e379
commit b67452d569
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 138 additions and 90 deletions

View File

@ -4,14 +4,16 @@
<h3>Outputs</h3> <h3>Outputs</h3>
<dl class="message-properties"> <dl class="message-properties">
<dt>sentiment <span class="property-type">object</span></dt> <dt>sentiment <span class="property-type">object</span></dt>
<dd>contains the resulting AFINN-111 sentiment.</dd> <dd>contains the resulting AFINN-165 sentiment.</dd>
<dt>sentiment.score <span class="property-type">number</span></dt> <dt>sentiment.score <span class="property-type">number</span></dt>
<dd>the sentiment score.</dd> <dd>the sentiment score.</dd>
</dl> </dl>
<h3>Inputs</h3> <h3>Inputs</h3>
<dl class="message-properties"> <dl class="message-properties">
<dt>words <span class="property-type">object</span></dt> <dt>words <span class="property-type">object</span></dt>
<dd>an object of words and scores to override or add words can be supplied - <code>{ word:score,... }</code>.</dd> <dd>besides sending a raw string in msg.payload, an object of words and scores to override or add words can be supplied - <code>{ word:score,... }</code>.</dd>
<dt>tokens <span class="property-type">object</span></dt>
<dd>a `tokens` object allows custom tokenizers which may be required by some languages - <code>{ ['世界',...] }</code>.</dd>
</dl> </dl>
<dl class="message-properties"> <dl class="message-properties">
<dt>lang <span class="property-type">string</span></dt> <dt>lang <span class="property-type">string</span></dt>

View File

@ -4,14 +4,16 @@
<h3>出力</h3> <h3>出力</h3>
<dl class="message-properties"> <dl class="message-properties">
<dt>sentiment <span class="property-type">オブジェクト</span></dt> <dt>sentiment <span class="property-type">オブジェクト</span></dt>
<dd>AFINN-111による感情分析の結果</dd> <dd>AFINN-165による感情分析の結果</dd>
<dt>sentiment.score <span class="property-type">数値</span></dt> <dt>sentiment.score <span class="property-type">数値</span></dt>
<dd>感情分析スコア</dd> <dd>感情分析スコア</dd>
</dl> </dl>
<h3>入力</h3> <h3>入力</h3>
<dl class="message-properties"> <dl class="message-properties">
<dt>overrides <span class="property-type">オブジェクト</span></dt> <dt>words <span class="property-type">オブジェクト</span></dt>
<dd>単語スコアの上書きをするためのオブジェクト - <code>{ word:score,... }</code></dd> <dd>単語スコアの上書きをするためのオブジェクト - <code>{ word:score,... }</code></dd>
<dt>tokens <span class="property-type">object</span></dt>
<dd>一部の言語で必要となるカスタム トークナイザーが可能になります - <code>{ ['世界',...] }</code>.</dd>
</dl> </dl>
<h3>詳細</h3> <h3>詳細</h3>
<p>ゼロ以上のスコアはポジティブ、ゼロ以下はネガティブを意味します。</p> <p>ゼロ以上のスコアはポジティブ、ゼロ以下はネガティブを意味します。</p>

View File

@ -12,12 +12,9 @@ module.exports = function(RED) {
this.on("input", function (msg) { this.on("input", function (msg) {
var value = RED.util.getMessageProperty(msg, node.property); var value = RED.util.getMessageProperty(msg, node.property);
if (value !== undefined) { if (value !== undefined) {
if (msg.hasOwnProperty("overrides")) { multilangSentiment(value, node.lang || msg.lang || 'en', { 'words': msg.words || null, 'tokens': msg.tokens || null }, function (err, result) {
msg.extras = msg.overrides;
delete msg.overrides;
}
multilangsentiment(value, node.lang || msg.lang || 'en', {words: msg.extras || null}, function (err, result) {
msg.sentiment = result; msg.sentiment = result;
msg.sentiment.comparative = msg.sentiment.score / msg.sentiment.tokens.length; // temporarily addresses an issue in v2.0.0: https://github.com/marcellobarile/multilang-sentiment/issues/10
node.send(msg); node.send(msg);
}); });
} }

View File

@ -1,9 +1,9 @@
{ {
"name": "node-red-node-multilang-sentiment", "name": "node-red-node-multilang-sentiment",
"version" : "0.1.0", "version": "0.2.0",
"description" : "A Node-RED node that uses the AFINN-165 wordlists for sentiment analysis of words translated into multiple languages including emojis.", "description": "A Node-RED node that uses the AFINN-165 wordlists for sentiment analysis of words translated into multiple languages including emoji.",
"dependencies": { "dependencies": {
"multilang-sentiment" : "^1.2.0" "multilang-sentiment": "^2.0.0"
}, },
"repository": { "repository": {
"type": "git", "type": "git",
@ -11,7 +11,13 @@
"directory": "tree/master/analysis/mlsentiment" "directory": "tree/master/analysis/mlsentiment"
}, },
"license": "Apache-2.0", "license": "Apache-2.0",
"keywords": [ "node-red", "sentiment", "anaylsis", "AFINN" ], "keywords": [
"node-red",
"sentiment",
"anaylsis",
"AFINN",
"emoji"
],
"node-red": { "node-red": {
"nodes": { "nodes": {
"mlsentiment": "mlsentiment.js" "mlsentiment": "mlsentiment.js"
@ -21,5 +27,8 @@
"name": "Dave Conway-Jones", "name": "Dave Conway-Jones",
"email": "ceejay@vnet.ibm.com", "email": "ceejay@vnet.ibm.com",
"url": "http://nodered.org" "url": "http://nodered.org"
} },
"contributors" : [
"Chuan Khoo <khoo.chuan@gmail.com> (https://chuank.com)"
]
} }

View File

@ -173,8 +173,8 @@ describe('mlsentiment Node', function() {
done(); done();
}); });
var testString = 'sick, wicked'; var testString = 'sick, wicked';
var overrides = {'sick': 10, 'wicked': 10 }; var wordOverrides = { 'sick': 10, 'wicked': 10 };
jn1.receive({payload:testString,overrides:overrides}); jn1.receive({ payload: testString, words: wordOverrides });
}); });
}); });
@ -192,8 +192,46 @@ describe('mlsentiment Node', function() {
done(); done();
}); });
var testString = 'sick, wicked'; var testString = 'sick, wicked';
var overrides = {'sick': 10, 'wicked': 10 }; var wordOverrides = { 'sick': 10, 'wicked': 10 };
jn1.receive({foo:testString,overrides:overrides}); jn1.receive({ foo: testString, words: wordOverrides });
});
});
it('should allow you to use custom tokens', function (done) {
var flow = [{ id: "jn1", type: "mlsentiment", wires: [["jn2"]] },
{ id: "jn2", type: "helper" }];
helper.load(sentimentNode, flow, function () {
var jn1 = helper.getNode("jn1");
var jn2 = helper.getNode("jn2");
jn2.on("input", function (msg) {
msg.should.have.property('sentiment');
msg.sentiment.should.have.property('score');
msg.sentiment.score.should.be.a.Number();
msg.sentiment.score.should.equal(-3);
done();
});
var testString = '世界就是一个疯子的囚笼';
var tokenOverrides = ['世界', '就', '是', '一个', '疯子', '的', '囚笼'];
jn1.receive({ payload: testString, tokens: tokenOverrides });
});
});
it('should allow you to use custom tokens - alternative property', function (done) {
var flow = [{ id: "jn1", type: "mlsentiment", property: "foo", wires: [["jn2"]] },
{ id: "jn2", type: "helper" }];
helper.load(sentimentNode, flow, function () {
var jn1 = helper.getNode("jn1");
var jn2 = helper.getNode("jn2");
jn2.on("input", function (msg) {
msg.should.have.property('sentiment');
msg.sentiment.should.have.property('score');
msg.sentiment.score.should.be.a.Number();
msg.sentiment.score.should.equal(-3);
done();
});
var testString = '世界就是一个疯子的囚笼';
var tokenOverrides = ['世界', '就', '是', '一个', '疯子', '的', '囚笼'];
jn1.receive({ foo: testString, tokens: tokenOverrides });
}); });
}); });