feat: Complete zCode CLI X with Telegram bot integration

- Add full Telegram bot functionality with Z.AI API integration
- Implement 4 tools: Bash, FileEdit, WebSearch, Git
- Add 3 agents: Code Reviewer, Architect, DevOps Engineer
- Add 6 skills for common coding tasks
- Add systemd service file for 24/7 operation
- Add nginx configuration for HTTPS webhook
- Add comprehensive documentation
- Implement WebSocket server for real-time updates
- Add logging system with Winston
- Add environment validation

🤖 zCode CLI X - Agentic coder with Z.AI + Telegram integration
This commit is contained in:
admin
2026-05-05 09:01:26 +00:00
Unverified
parent 4a7035dd92
commit 875c7f9b91
24688 changed files with 3224957 additions and 221 deletions

View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2017 Dom Christie
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -0,0 +1,232 @@
# Turndown
Convert HTML into Markdown with JavaScript.
## Project Updates
* `to-markdown` has been renamed to Turndown. See the [migration guide](https://github.com/domchristie/to-markdown/wiki/Migrating-from-to-markdown-to-Turndown) for details.
* Turndown repository has changed its URL to https://github.com/mixmark-io/turndown.
## Installation
npm:
```
npm install turndown
```
Browser:
```html
<script src="https://unpkg.com/turndown/dist/turndown.js"></script>
```
For usage with RequireJS, UMD versions are located in `lib/turndown.umd.js` (for Node.js) and `lib/turndown.browser.umd.js` for browser usage. These files are generated when the npm package is published. To generate them manually, clone this repo and run `npm run build`.
## Usage
```js
// For Node.js
var TurndownService = require('turndown')
var turndownService = new TurndownService()
var markdown = turndownService.turndown('<h1>Hello world!</h1>')
```
Turndown also accepts DOM nodes as input (either element nodes, document nodes, or document fragment nodes):
```js
var markdown = turndownService.turndown(document.getElementById('content'))
```
## Options
Options can be passed in to the constructor on instantiation. For example:
```js
var turndownService = new TurndownService({ option: 'value' })
```
| Option | Valid values | Default |
| :-------------------- | :------------ | :------ |
| `headingStyle` | `setext` or `atx` | `setext` |
| `hr` | Any [Thematic break](http://spec.commonmark.org/0.27/#thematic-breaks) | `* * *` |
| `bulletListMarker` | `-`, `+`, or `*` | `*` |
| `codeBlockStyle` | `indented` or `fenced` | `indented` |
| `fence` | ` ``` ` or `~~~` | ` ``` ` |
| `emDelimiter` | `_` or `*` | `_` |
| `strongDelimiter` | `**` or `__` | `**` |
| `linkStyle` | `inlined` or `referenced` | `inlined` |
| `linkReferenceStyle` | `full`, `collapsed`, or `shortcut` | `full` |
| `preformattedCode` | `false` or [`true`](https://github.com/lucthev/collapse-whitespace/issues/16) | `false` |
### Advanced Options
| Option | Valid values | Default |
| :-------------------- | :------------ | :------ |
| `blankReplacement` | rule replacement function | See **Special Rules** below |
| `keepReplacement` | rule replacement function | See **Special Rules** below |
| `defaultReplacement` | rule replacement function | See **Special Rules** below |
## Methods
### `addRule(key, rule)`
The `key` parameter is a unique name for the rule for easy reference. Example:
```js
turndownService.addRule('strikethrough', {
filter: ['del', 's', 'strike'],
replacement: function (content) {
return '~' + content + '~'
}
})
```
`addRule` returns the `TurndownService` instance for chaining.
See **Extending with Rules** below.
### `keep(filter)`
Determines which elements are to be kept and rendered as HTML. By default, Turndown does not keep any elements. The filter parameter works like a rule filter (see section on filters belows). Example:
```js
turndownService.keep(['del', 'ins'])
turndownService.turndown('<p>Hello <del>world</del><ins>World</ins></p>') // 'Hello <del>world</del><ins>World</ins>'
```
This will render `<del>` and `<ins>` elements as HTML when converted.
`keep` can be called multiple times, with the newly added keep filters taking precedence over older ones. Keep filters will be overridden by the standard CommonMark rules and any added rules. To keep elements that are normally handled by those rules, add a rule with the desired behaviour.
`keep` returns the `TurndownService` instance for chaining.
### `remove(filter)`
Determines which elements are to be removed altogether i.e. converted to an empty string. By default, Turndown does not remove any elements. The filter parameter works like a rule filter (see section on filters belows). Example:
```js
turndownService.remove('del')
turndownService.turndown('<p>Hello <del>world</del><ins>World</ins></p>') // 'Hello World'
```
This will remove `<del>` elements (and contents).
`remove` can be called multiple times, with the newly added remove filters taking precedence over older ones. Remove filters will be overridden by the keep filters, standard CommonMark rules, and any added rules. To remove elements that are normally handled by those rules, add a rule with the desired behaviour.
`remove` returns the `TurndownService` instance for chaining.
### `use(plugin|array)`
Use a plugin, or an array of plugins. Example:
```js
// Import plugins from turndown-plugin-gfm
var turndownPluginGfm = require('turndown-plugin-gfm')
var gfm = turndownPluginGfm.gfm
var tables = turndownPluginGfm.tables
var strikethrough = turndownPluginGfm.strikethrough
// Use the gfm plugin
turndownService.use(gfm)
// Use the table and strikethrough plugins only
turndownService.use([tables, strikethrough])
```
`use` returns the `TurndownService` instance for chaining.
See **Plugins** below.
## Extending with Rules
Turndown can be extended by adding **rules**. A rule is a plain JavaScript object with `filter` and `replacement` properties. For example, the rule for converting `<p>` elements is as follows:
```js
{
filter: 'p',
replacement: function (content) {
return '\n\n' + content + '\n\n'
}
}
```
The filter selects `<p>` elements, and the replacement function returns the `<p>` contents separated by two new lines.
### `filter` String|Array|Function
The filter property determines whether or not an element should be replaced with the rule's `replacement`. DOM nodes can be selected simply using a tag name or an array of tag names:
* `filter: 'p'` will select `<p>` elements
* `filter: ['em', 'i']` will select `<em>` or `<i>` elements
The tag names in the `filter` property are expected in lowercase, regardless of their form in the document.
Alternatively, the filter can be a function that returns a boolean depending on whether a given node should be replaced. The function is passed a DOM node as well as the `TurndownService` options. For example, the following rule selects `<a>` elements (with an `href`) when the `linkStyle` option is `inlined`:
```js
filter: function (node, options) {
return (
options.linkStyle === 'inlined' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
}
```
### `replacement` Function
The replacement function determines how an element should be converted. It should return the Markdown string for a given node. The function is passed the node's content, the node itself, and the `TurndownService` options.
The following rule shows how `<em>` elements are converted:
```js
rules.emphasis = {
filter: ['em', 'i'],
replacement: function (content, node, options) {
return options.emDelimiter + content + options.emDelimiter
}
}
```
### Special Rules
**Blank rule** determines how to handle blank elements. It overrides every rule (even those added via `addRule`). A node is blank if it only contains whitespace, and it's not an `<a>`, `<td>`,`<th>` or a void element. Its behaviour can be customised using the `blankReplacement` option.
**Keep rules** determine how to handle the elements that should not be converted, i.e. rendered as HTML in the Markdown output. By default, no elements are kept. Block-level elements will be separated from surrounding content by blank lines. Its behaviour can be customised using the `keepReplacement` option.
**Remove rules** determine which elements to remove altogether. By default, no elements are removed.
**Default rule** handles nodes which are not recognised by any other rule. By default, it outputs the node's text content (separated by blank lines if it is a block-level element). Its behaviour can be customised with the `defaultReplacement` option.
### Rule Precedence
Turndown iterates over the set of rules, and picks the first one that matches the `filter`. The following list describes the order of precedence:
1. Blank rule
2. Added rules (optional)
3. Commonmark rules
4. Keep rules
5. Remove rules
6. Default rule
## Plugins
The plugin API provides a convenient way for developers to apply multiple extensions. A plugin is just a function that is called with the `TurndownService` instance.
## Escaping Markdown Characters
Turndown uses backslashes (`\`) to escape Markdown characters in the HTML input. This ensures that these characters are not interpreted as Markdown when the output is compiled back to HTML. For example, the contents of `<h1>1. Hello world</h1>` needs to be escaped to `1\. Hello world`, otherwise it will be interpreted as a list item rather than a heading.
To avoid the complexity and the performance implications of parsing the content of every HTML element as Markdown, Turndown uses a group of regular expressions to escape potential Markdown syntax. As a result, the escaping rules can be quite aggressive.
### Overriding `TurndownService.prototype.escape`
If you are confident in doing so, you may want to customise the escaping behaviour to suit your needs. This can be done by overriding `TurndownService.prototype.escape`. `escape` takes the text of each HTML element and should return a version with the Markdown characters escaped.
Note: text in code elements is never passed to`escape`.
## License
turndown is copyright © 2017+ Dom Christie and released under the MIT license.

View File

@@ -0,0 +1,976 @@
var TurndownService = (function () {
'use strict';
function extend (destination) {
for (var i = 1; i < arguments.length; i++) {
var source = arguments[i];
for (var key in source) {
if (source.hasOwnProperty(key)) destination[key] = source[key];
}
}
return destination
}
function repeat (character, count) {
return Array(count + 1).join(character)
}
function trimLeadingNewlines (string) {
return string.replace(/^\n*/, '')
}
function trimTrailingNewlines (string) {
// avoid match-at-end regexp bottleneck, see #370
var indexEnd = string.length;
while (indexEnd > 0 && string[indexEnd - 1] === '\n') indexEnd--;
return string.substring(0, indexEnd)
}
function trimNewlines (string) {
return trimTrailingNewlines(trimLeadingNewlines(string))
}
var blockElements = [
'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS',
'CENTER', 'DD', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE',
'FOOTER', 'FORM', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HEADER',
'HGROUP', 'HR', 'HTML', 'ISINDEX', 'LI', 'MAIN', 'MENU', 'NAV', 'NOFRAMES',
'NOSCRIPT', 'OL', 'OUTPUT', 'P', 'PRE', 'SECTION', 'TABLE', 'TBODY', 'TD',
'TFOOT', 'TH', 'THEAD', 'TR', 'UL'
];
function isBlock (node) {
return is(node, blockElements)
}
var voidElements = [
'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT',
'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR'
];
function isVoid (node) {
return is(node, voidElements)
}
function hasVoid (node) {
return has(node, voidElements)
}
var meaningfulWhenBlankElements = [
'A', 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TH', 'TD', 'IFRAME', 'SCRIPT',
'AUDIO', 'VIDEO'
];
function isMeaningfulWhenBlank (node) {
return is(node, meaningfulWhenBlankElements)
}
function hasMeaningfulWhenBlank (node) {
return has(node, meaningfulWhenBlankElements)
}
function is (node, tagNames) {
return tagNames.indexOf(node.nodeName) >= 0
}
function has (node, tagNames) {
return (
node.getElementsByTagName &&
tagNames.some(function (tagName) {
return node.getElementsByTagName(tagName).length
})
)
}
var rules = {};
rules.paragraph = {
filter: 'p',
replacement: function (content) {
return '\n\n' + content + '\n\n'
}
};
rules.lineBreak = {
filter: 'br',
replacement: function (content, node, options) {
return options.br + '\n'
}
};
rules.heading = {
filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
replacement: function (content, node, options) {
var hLevel = Number(node.nodeName.charAt(1));
if (options.headingStyle === 'setext' && hLevel < 3) {
var underline = repeat((hLevel === 1 ? '=' : '-'), content.length);
return (
'\n\n' + content + '\n' + underline + '\n\n'
)
} else {
return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
}
}
};
rules.blockquote = {
filter: 'blockquote',
replacement: function (content) {
content = trimNewlines(content).replace(/^/gm, '> ');
return '\n\n' + content + '\n\n'
}
};
rules.list = {
filter: ['ul', 'ol'],
replacement: function (content, node) {
var parent = node.parentNode;
if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
return '\n' + content
} else {
return '\n\n' + content + '\n\n'
}
}
};
rules.listItem = {
filter: 'li',
replacement: function (content, node, options) {
var prefix = options.bulletListMarker + ' ';
var parent = node.parentNode;
if (parent.nodeName === 'OL') {
var start = parent.getAttribute('start');
var index = Array.prototype.indexOf.call(parent.children, node);
prefix = (start ? Number(start) + index : index + 1) + '. ';
}
var isParagraph = /\n$/.test(content);
content = trimNewlines(content) + (isParagraph ? '\n' : '');
content = content.replace(/\n/gm, '\n' + ' '.repeat(prefix.length)); // indent
return (
prefix + content + (node.nextSibling ? '\n' : '')
)
}
};
rules.indentedCodeBlock = {
filter: function (node, options) {
return (
options.codeBlockStyle === 'indented' &&
node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE'
)
},
replacement: function (content, node, options) {
return (
'\n\n ' +
node.firstChild.textContent.replace(/\n/g, '\n ') +
'\n\n'
)
}
};
rules.fencedCodeBlock = {
filter: function (node, options) {
return (
options.codeBlockStyle === 'fenced' &&
node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE'
)
},
replacement: function (content, node, options) {
var className = node.firstChild.getAttribute('class') || '';
var language = (className.match(/language-(\S+)/) || [null, ''])[1];
var code = node.firstChild.textContent;
var fenceChar = options.fence.charAt(0);
var fenceSize = 3;
var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm');
var match;
while ((match = fenceInCodeRegex.exec(code))) {
if (match[0].length >= fenceSize) {
fenceSize = match[0].length + 1;
}
}
var fence = repeat(fenceChar, fenceSize);
return (
'\n\n' + fence + language + '\n' +
code.replace(/\n$/, '') +
'\n' + fence + '\n\n'
)
}
};
rules.horizontalRule = {
filter: 'hr',
replacement: function (content, node, options) {
return '\n\n' + options.hr + '\n\n'
}
};
rules.inlineLink = {
filter: function (node, options) {
return (
options.linkStyle === 'inlined' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
},
replacement: function (content, node) {
var href = node.getAttribute('href');
if (href) href = href.replace(/([()])/g, '\\$1');
var title = cleanAttribute(node.getAttribute('title'));
if (title) title = ' "' + title.replace(/"/g, '\\"') + '"';
return '[' + content + '](' + href + title + ')'
}
};
rules.referenceLink = {
filter: function (node, options) {
return (
options.linkStyle === 'referenced' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
},
replacement: function (content, node, options) {
var href = node.getAttribute('href');
var title = cleanAttribute(node.getAttribute('title'));
if (title) title = ' "' + title + '"';
var replacement;
var reference;
switch (options.linkReferenceStyle) {
case 'collapsed':
replacement = '[' + content + '][]';
reference = '[' + content + ']: ' + href + title;
break
case 'shortcut':
replacement = '[' + content + ']';
reference = '[' + content + ']: ' + href + title;
break
default:
var id = this.references.length + 1;
replacement = '[' + content + '][' + id + ']';
reference = '[' + id + ']: ' + href + title;
}
this.references.push(reference);
return replacement
},
references: [],
append: function (options) {
var references = '';
if (this.references.length) {
references = '\n\n' + this.references.join('\n') + '\n\n';
this.references = []; // Reset references
}
return references
}
};
rules.emphasis = {
filter: ['em', 'i'],
replacement: function (content, node, options) {
if (!content.trim()) return ''
return options.emDelimiter + content + options.emDelimiter
}
};
rules.strong = {
filter: ['strong', 'b'],
replacement: function (content, node, options) {
if (!content.trim()) return ''
return options.strongDelimiter + content + options.strongDelimiter
}
};
rules.code = {
filter: function (node) {
var hasSiblings = node.previousSibling || node.nextSibling;
var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
return node.nodeName === 'CODE' && !isCodeBlock
},
replacement: function (content) {
if (!content) return ''
content = content.replace(/\r?\n|\r/g, ' ');
var extraSpace = /^`|^ .*?[^ ].* $|`$/.test(content) ? ' ' : '';
var delimiter = '`';
var matches = content.match(/`+/gm) || [];
while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
return delimiter + extraSpace + content + extraSpace + delimiter
}
};
rules.image = {
filter: 'img',
replacement: function (content, node) {
var alt = cleanAttribute(node.getAttribute('alt'));
var src = node.getAttribute('src') || '';
var title = cleanAttribute(node.getAttribute('title'));
var titlePart = title ? ' "' + title + '"' : '';
return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
}
};
function cleanAttribute (attribute) {
return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : ''
}
/**
* Manages a collection of rules used to convert HTML to Markdown
*/
function Rules (options) {
this.options = options;
this._keep = [];
this._remove = [];
this.blankRule = {
replacement: options.blankReplacement
};
this.keepReplacement = options.keepReplacement;
this.defaultRule = {
replacement: options.defaultReplacement
};
this.array = [];
for (var key in options.rules) this.array.push(options.rules[key]);
}
Rules.prototype = {
add: function (key, rule) {
this.array.unshift(rule);
},
keep: function (filter) {
this._keep.unshift({
filter: filter,
replacement: this.keepReplacement
});
},
remove: function (filter) {
this._remove.unshift({
filter: filter,
replacement: function () {
return ''
}
});
},
forNode: function (node) {
if (node.isBlank) return this.blankRule
var rule;
if ((rule = findRule(this.array, node, this.options))) return rule
if ((rule = findRule(this._keep, node, this.options))) return rule
if ((rule = findRule(this._remove, node, this.options))) return rule
return this.defaultRule
},
forEach: function (fn) {
for (var i = 0; i < this.array.length; i++) fn(this.array[i], i);
}
};
function findRule (rules, node, options) {
for (var i = 0; i < rules.length; i++) {
var rule = rules[i];
if (filterValue(rule, node, options)) return rule
}
return void 0
}
function filterValue (rule, node, options) {
var filter = rule.filter;
if (typeof filter === 'string') {
if (filter === node.nodeName.toLowerCase()) return true
} else if (Array.isArray(filter)) {
if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true
} else if (typeof filter === 'function') {
if (filter.call(rule, node, options)) return true
} else {
throw new TypeError('`filter` needs to be a string, array, or function')
}
}
/**
* The collapseWhitespace function is adapted from collapse-whitespace
* by Luc Thevenard.
*
* The MIT License (MIT)
*
* Copyright (c) 2014 Luc Thevenard <lucthevenard@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/**
* collapseWhitespace(options) removes extraneous whitespace from an the given element.
*
* @param {Object} options
*/
function collapseWhitespace (options) {
var element = options.element;
var isBlock = options.isBlock;
var isVoid = options.isVoid;
var isPre = options.isPre || function (node) {
return node.nodeName === 'PRE'
};
if (!element.firstChild || isPre(element)) return
var prevText = null;
var keepLeadingWs = false;
var prev = null;
var node = next(prev, element, isPre);
while (node !== element) {
if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
var text = node.data.replace(/[ \r\n\t]+/g, ' ');
if ((!prevText || / $/.test(prevText.data)) &&
!keepLeadingWs && text[0] === ' ') {
text = text.substr(1);
}
// `text` might be empty at this point.
if (!text) {
node = remove(node);
continue
}
node.data = text;
prevText = node;
} else if (node.nodeType === 1) { // Node.ELEMENT_NODE
if (isBlock(node) || node.nodeName === 'BR') {
if (prevText) {
prevText.data = prevText.data.replace(/ $/, '');
}
prevText = null;
keepLeadingWs = false;
} else if (isVoid(node) || isPre(node)) {
// Avoid trimming space around non-block, non-BR void elements and inline PRE.
prevText = null;
keepLeadingWs = true;
} else if (prevText) {
// Drop protection if set previously.
keepLeadingWs = false;
}
} else {
node = remove(node);
continue
}
var nextNode = next(prev, node, isPre);
prev = node;
node = nextNode;
}
if (prevText) {
prevText.data = prevText.data.replace(/ $/, '');
if (!prevText.data) {
remove(prevText);
}
}
}
/**
* remove(node) removes the given node from the DOM and returns the
* next node in the sequence.
*
* @param {Node} node
* @return {Node} node
*/
function remove (node) {
var next = node.nextSibling || node.parentNode;
node.parentNode.removeChild(node);
return next
}
/**
* next(prev, current, isPre) returns the next node in the sequence, given the
* current and previous nodes.
*
* @param {Node} prev
* @param {Node} current
* @param {Function} isPre
* @return {Node}
*/
function next (prev, current, isPre) {
if ((prev && prev.parentNode === current) || isPre(current)) {
return current.nextSibling || current.parentNode
}
return current.firstChild || current.nextSibling || current.parentNode
}
/*
* Set up window for Node.js
*/
var root = (typeof window !== 'undefined' ? window : {});
/*
* Parsing HTML strings
*/
function canParseHTMLNatively () {
var Parser = root.DOMParser;
var canParse = false;
// Adapted from https://gist.github.com/1129031
// Firefox/Opera/IE throw errors on unsupported types
try {
// WebKit returns null on unsupported types
if (new Parser().parseFromString('', 'text/html')) {
canParse = true;
}
} catch (e) {}
return canParse
}
function createHTMLParser () {
var Parser = function () {};
{
if (shouldUseActiveX()) {
Parser.prototype.parseFromString = function (string) {
var doc = new window.ActiveXObject('htmlfile');
doc.designMode = 'on'; // disable on-page scripts
doc.open();
doc.write(string);
doc.close();
return doc
};
} else {
Parser.prototype.parseFromString = function (string) {
var doc = document.implementation.createHTMLDocument('');
doc.open();
doc.write(string);
doc.close();
return doc
};
}
}
return Parser
}
function shouldUseActiveX () {
var useActiveX = false;
try {
document.implementation.createHTMLDocument('').open();
} catch (e) {
if (root.ActiveXObject) useActiveX = true;
}
return useActiveX
}
var HTMLParser = canParseHTMLNatively() ? root.DOMParser : createHTMLParser();
function RootNode (input, options) {
var root;
if (typeof input === 'string') {
var doc = htmlParser().parseFromString(
// DOM parsers arrange elements in the <head> and <body>.
// Wrapping in a custom element ensures elements are reliably arranged in
// a single element.
'<x-turndown id="turndown-root">' + input + '</x-turndown>',
'text/html'
);
root = doc.getElementById('turndown-root');
} else {
root = input.cloneNode(true);
}
collapseWhitespace({
element: root,
isBlock: isBlock,
isVoid: isVoid,
isPre: options.preformattedCode ? isPreOrCode : null
});
return root
}
var _htmlParser;
function htmlParser () {
_htmlParser = _htmlParser || new HTMLParser();
return _htmlParser
}
function isPreOrCode (node) {
return node.nodeName === 'PRE' || node.nodeName === 'CODE'
}
function Node (node, options) {
node.isBlock = isBlock(node);
node.isCode = node.nodeName === 'CODE' || node.parentNode.isCode;
node.isBlank = isBlank(node);
node.flankingWhitespace = flankingWhitespace(node, options);
return node
}
function isBlank (node) {
return (
!isVoid(node) &&
!isMeaningfulWhenBlank(node) &&
/^\s*$/i.test(node.textContent) &&
!hasVoid(node) &&
!hasMeaningfulWhenBlank(node)
)
}
function flankingWhitespace (node, options) {
if (node.isBlock || (options.preformattedCode && node.isCode)) {
return { leading: '', trailing: '' }
}
var edges = edgeWhitespace(node.textContent);
// abandon leading ASCII WS if left-flanked by ASCII WS
if (edges.leadingAscii && isFlankedByWhitespace('left', node, options)) {
edges.leading = edges.leadingNonAscii;
}
// abandon trailing ASCII WS if right-flanked by ASCII WS
if (edges.trailingAscii && isFlankedByWhitespace('right', node, options)) {
edges.trailing = edges.trailingNonAscii;
}
return { leading: edges.leading, trailing: edges.trailing }
}
function edgeWhitespace (string) {
var m = string.match(/^(([ \t\r\n]*)(\s*))(?:(?=\S)[\s\S]*\S)?((\s*?)([ \t\r\n]*))$/);
return {
leading: m[1], // whole string for whitespace-only strings
leadingAscii: m[2],
leadingNonAscii: m[3],
trailing: m[4], // empty for whitespace-only strings
trailingNonAscii: m[5],
trailingAscii: m[6]
}
}
function isFlankedByWhitespace (side, node, options) {
var sibling;
var regExp;
var isFlanked;
if (side === 'left') {
sibling = node.previousSibling;
regExp = / $/;
} else {
sibling = node.nextSibling;
regExp = /^ /;
}
if (sibling) {
if (sibling.nodeType === 3) {
isFlanked = regExp.test(sibling.nodeValue);
} else if (options.preformattedCode && sibling.nodeName === 'CODE') {
isFlanked = false;
} else if (sibling.nodeType === 1 && !isBlock(sibling)) {
isFlanked = regExp.test(sibling.textContent);
}
}
return isFlanked
}
var reduce = Array.prototype.reduce;
var escapes = [
[/\\/g, '\\\\'],
[/\*/g, '\\*'],
[/^-/g, '\\-'],
[/^\+ /g, '\\+ '],
[/^(=+)/g, '\\$1'],
[/^(#{1,6}) /g, '\\$1 '],
[/`/g, '\\`'],
[/^~~~/g, '\\~~~'],
[/\[/g, '\\['],
[/\]/g, '\\]'],
[/^>/g, '\\>'],
[/_/g, '\\_'],
[/^(\d+)\. /g, '$1\\. ']
];
function TurndownService (options) {
if (!(this instanceof TurndownService)) return new TurndownService(options)
var defaults = {
rules: rules,
headingStyle: 'setext',
hr: '* * *',
bulletListMarker: '*',
codeBlockStyle: 'indented',
fence: '```',
emDelimiter: '_',
strongDelimiter: '**',
linkStyle: 'inlined',
linkReferenceStyle: 'full',
br: ' ',
preformattedCode: false,
blankReplacement: function (content, node) {
return node.isBlock ? '\n\n' : ''
},
keepReplacement: function (content, node) {
return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML
},
defaultReplacement: function (content, node) {
return node.isBlock ? '\n\n' + content + '\n\n' : content
}
};
this.options = extend({}, defaults, options);
this.rules = new Rules(this.options);
}
TurndownService.prototype = {
/**
* The entry point for converting a string or DOM node to Markdown
* @public
* @param {String|HTMLElement} input The string or DOM node to convert
* @returns A Markdown representation of the input
* @type String
*/
turndown: function (input) {
if (!canConvert(input)) {
throw new TypeError(
input + ' is not a string, or an element/document/fragment node.'
)
}
if (input === '') return ''
var output = process.call(this, new RootNode(input, this.options));
return postProcess.call(this, output)
},
/**
* Add one or more plugins
* @public
* @param {Function|Array} plugin The plugin or array of plugins to add
* @returns The Turndown instance for chaining
* @type Object
*/
use: function (plugin) {
if (Array.isArray(plugin)) {
for (var i = 0; i < plugin.length; i++) this.use(plugin[i]);
} else if (typeof plugin === 'function') {
plugin(this);
} else {
throw new TypeError('plugin must be a Function or an Array of Functions')
}
return this
},
/**
* Adds a rule
* @public
* @param {String} key The unique key of the rule
* @param {Object} rule The rule
* @returns The Turndown instance for chaining
* @type Object
*/
addRule: function (key, rule) {
this.rules.add(key, rule);
return this
},
/**
* Keep a node (as HTML) that matches the filter
* @public
* @param {String|Array|Function} filter The unique key of the rule
* @returns The Turndown instance for chaining
* @type Object
*/
keep: function (filter) {
this.rules.keep(filter);
return this
},
/**
* Remove a node that matches the filter
* @public
* @param {String|Array|Function} filter The unique key of the rule
* @returns The Turndown instance for chaining
* @type Object
*/
remove: function (filter) {
this.rules.remove(filter);
return this
},
/**
* Escapes Markdown syntax
* @public
* @param {String} string The string to escape
* @returns A string with Markdown syntax escaped
* @type String
*/
escape: function (string) {
return escapes.reduce(function (accumulator, escape) {
return accumulator.replace(escape[0], escape[1])
}, string)
}
};
/**
* Reduces a DOM node down to its Markdown string equivalent
* @private
* @param {HTMLElement} parentNode The node to convert
* @returns A Markdown representation of the node
* @type String
*/
function process (parentNode) {
var self = this;
return reduce.call(parentNode.childNodes, function (output, node) {
node = new Node(node, self.options);
var replacement = '';
if (node.nodeType === 3) {
replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
} else if (node.nodeType === 1) {
replacement = replacementForNode.call(self, node);
}
return join(output, replacement)
}, '')
}
/**
* Appends strings as each rule requires and trims the output
* @private
* @param {String} output The conversion output
* @returns A trimmed version of the ouput
* @type String
*/
function postProcess (output) {
var self = this;
this.rules.forEach(function (rule) {
if (typeof rule.append === 'function') {
output = join(output, rule.append(self.options));
}
});
return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
}
/**
* Converts an element node to its Markdown equivalent
* @private
* @param {HTMLElement} node The node to convert
* @returns A Markdown representation of the node
* @type String
*/
function replacementForNode (node) {
var rule = this.rules.forNode(node);
var content = process.call(this, node);
var whitespace = node.flankingWhitespace;
if (whitespace.leading || whitespace.trailing) content = content.trim();
return (
whitespace.leading +
rule.replacement(content, node, this.options) +
whitespace.trailing
)
}
/**
* Joins replacement to the current output with appropriate number of new lines
* @private
* @param {String} output The current conversion output
* @param {String} replacement The string to append to the output
* @returns Joined output
* @type String
*/
function join (output, replacement) {
var s1 = trimTrailingNewlines(output);
var s2 = trimLeadingNewlines(replacement);
var nls = Math.max(output.length - s1.length, replacement.length - s2.length);
var separator = '\n\n'.substring(0, nls);
return s1 + separator + s2
}
/**
* Determines whether an input can be converted
* @private
* @param {String|HTMLElement} input Describe this parameter
* @returns Describe what it returns
* @type String|Object|Array|Boolean|Number
*/
function canConvert (input) {
return (
input != null && (
typeof input === 'string' ||
(input.nodeType && (
input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
))
)
)
}
return TurndownService;
}());

View File

@@ -0,0 +1,973 @@
'use strict';
function extend (destination) {
for (var i = 1; i < arguments.length; i++) {
var source = arguments[i];
for (var key in source) {
if (source.hasOwnProperty(key)) destination[key] = source[key];
}
}
return destination
}
function repeat (character, count) {
return Array(count + 1).join(character)
}
function trimLeadingNewlines (string) {
return string.replace(/^\n*/, '')
}
function trimTrailingNewlines (string) {
// avoid match-at-end regexp bottleneck, see #370
var indexEnd = string.length;
while (indexEnd > 0 && string[indexEnd - 1] === '\n') indexEnd--;
return string.substring(0, indexEnd)
}
function trimNewlines (string) {
return trimTrailingNewlines(trimLeadingNewlines(string))
}
var blockElements = [
'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS',
'CENTER', 'DD', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE',
'FOOTER', 'FORM', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HEADER',
'HGROUP', 'HR', 'HTML', 'ISINDEX', 'LI', 'MAIN', 'MENU', 'NAV', 'NOFRAMES',
'NOSCRIPT', 'OL', 'OUTPUT', 'P', 'PRE', 'SECTION', 'TABLE', 'TBODY', 'TD',
'TFOOT', 'TH', 'THEAD', 'TR', 'UL'
];
function isBlock (node) {
return is(node, blockElements)
}
var voidElements = [
'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT',
'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR'
];
function isVoid (node) {
return is(node, voidElements)
}
function hasVoid (node) {
return has(node, voidElements)
}
var meaningfulWhenBlankElements = [
'A', 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TH', 'TD', 'IFRAME', 'SCRIPT',
'AUDIO', 'VIDEO'
];
function isMeaningfulWhenBlank (node) {
return is(node, meaningfulWhenBlankElements)
}
function hasMeaningfulWhenBlank (node) {
return has(node, meaningfulWhenBlankElements)
}
function is (node, tagNames) {
return tagNames.indexOf(node.nodeName) >= 0
}
function has (node, tagNames) {
return (
node.getElementsByTagName &&
tagNames.some(function (tagName) {
return node.getElementsByTagName(tagName).length
})
)
}
var rules = {};
rules.paragraph = {
filter: 'p',
replacement: function (content) {
return '\n\n' + content + '\n\n'
}
};
rules.lineBreak = {
filter: 'br',
replacement: function (content, node, options) {
return options.br + '\n'
}
};
rules.heading = {
filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
replacement: function (content, node, options) {
var hLevel = Number(node.nodeName.charAt(1));
if (options.headingStyle === 'setext' && hLevel < 3) {
var underline = repeat((hLevel === 1 ? '=' : '-'), content.length);
return (
'\n\n' + content + '\n' + underline + '\n\n'
)
} else {
return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
}
}
};
rules.blockquote = {
filter: 'blockquote',
replacement: function (content) {
content = trimNewlines(content).replace(/^/gm, '> ');
return '\n\n' + content + '\n\n'
}
};
rules.list = {
filter: ['ul', 'ol'],
replacement: function (content, node) {
var parent = node.parentNode;
if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
return '\n' + content
} else {
return '\n\n' + content + '\n\n'
}
}
};
rules.listItem = {
filter: 'li',
replacement: function (content, node, options) {
var prefix = options.bulletListMarker + ' ';
var parent = node.parentNode;
if (parent.nodeName === 'OL') {
var start = parent.getAttribute('start');
var index = Array.prototype.indexOf.call(parent.children, node);
prefix = (start ? Number(start) + index : index + 1) + '. ';
}
var isParagraph = /\n$/.test(content);
content = trimNewlines(content) + (isParagraph ? '\n' : '');
content = content.replace(/\n/gm, '\n' + ' '.repeat(prefix.length)); // indent
return (
prefix + content + (node.nextSibling ? '\n' : '')
)
}
};
rules.indentedCodeBlock = {
filter: function (node, options) {
return (
options.codeBlockStyle === 'indented' &&
node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE'
)
},
replacement: function (content, node, options) {
return (
'\n\n ' +
node.firstChild.textContent.replace(/\n/g, '\n ') +
'\n\n'
)
}
};
rules.fencedCodeBlock = {
filter: function (node, options) {
return (
options.codeBlockStyle === 'fenced' &&
node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE'
)
},
replacement: function (content, node, options) {
var className = node.firstChild.getAttribute('class') || '';
var language = (className.match(/language-(\S+)/) || [null, ''])[1];
var code = node.firstChild.textContent;
var fenceChar = options.fence.charAt(0);
var fenceSize = 3;
var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm');
var match;
while ((match = fenceInCodeRegex.exec(code))) {
if (match[0].length >= fenceSize) {
fenceSize = match[0].length + 1;
}
}
var fence = repeat(fenceChar, fenceSize);
return (
'\n\n' + fence + language + '\n' +
code.replace(/\n$/, '') +
'\n' + fence + '\n\n'
)
}
};
rules.horizontalRule = {
filter: 'hr',
replacement: function (content, node, options) {
return '\n\n' + options.hr + '\n\n'
}
};
rules.inlineLink = {
filter: function (node, options) {
return (
options.linkStyle === 'inlined' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
},
replacement: function (content, node) {
var href = node.getAttribute('href');
if (href) href = href.replace(/([()])/g, '\\$1');
var title = cleanAttribute(node.getAttribute('title'));
if (title) title = ' "' + title.replace(/"/g, '\\"') + '"';
return '[' + content + '](' + href + title + ')'
}
};
rules.referenceLink = {
filter: function (node, options) {
return (
options.linkStyle === 'referenced' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
},
replacement: function (content, node, options) {
var href = node.getAttribute('href');
var title = cleanAttribute(node.getAttribute('title'));
if (title) title = ' "' + title + '"';
var replacement;
var reference;
switch (options.linkReferenceStyle) {
case 'collapsed':
replacement = '[' + content + '][]';
reference = '[' + content + ']: ' + href + title;
break
case 'shortcut':
replacement = '[' + content + ']';
reference = '[' + content + ']: ' + href + title;
break
default:
var id = this.references.length + 1;
replacement = '[' + content + '][' + id + ']';
reference = '[' + id + ']: ' + href + title;
}
this.references.push(reference);
return replacement
},
references: [],
append: function (options) {
var references = '';
if (this.references.length) {
references = '\n\n' + this.references.join('\n') + '\n\n';
this.references = []; // Reset references
}
return references
}
};
rules.emphasis = {
filter: ['em', 'i'],
replacement: function (content, node, options) {
if (!content.trim()) return ''
return options.emDelimiter + content + options.emDelimiter
}
};
rules.strong = {
filter: ['strong', 'b'],
replacement: function (content, node, options) {
if (!content.trim()) return ''
return options.strongDelimiter + content + options.strongDelimiter
}
};
rules.code = {
filter: function (node) {
var hasSiblings = node.previousSibling || node.nextSibling;
var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
return node.nodeName === 'CODE' && !isCodeBlock
},
replacement: function (content) {
if (!content) return ''
content = content.replace(/\r?\n|\r/g, ' ');
var extraSpace = /^`|^ .*?[^ ].* $|`$/.test(content) ? ' ' : '';
var delimiter = '`';
var matches = content.match(/`+/gm) || [];
while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
return delimiter + extraSpace + content + extraSpace + delimiter
}
};
rules.image = {
filter: 'img',
replacement: function (content, node) {
var alt = cleanAttribute(node.getAttribute('alt'));
var src = node.getAttribute('src') || '';
var title = cleanAttribute(node.getAttribute('title'));
var titlePart = title ? ' "' + title + '"' : '';
return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
}
};
function cleanAttribute (attribute) {
return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : ''
}
/**
* Manages a collection of rules used to convert HTML to Markdown
*/
function Rules (options) {
this.options = options;
this._keep = [];
this._remove = [];
this.blankRule = {
replacement: options.blankReplacement
};
this.keepReplacement = options.keepReplacement;
this.defaultRule = {
replacement: options.defaultReplacement
};
this.array = [];
for (var key in options.rules) this.array.push(options.rules[key]);
}
Rules.prototype = {
add: function (key, rule) {
this.array.unshift(rule);
},
keep: function (filter) {
this._keep.unshift({
filter: filter,
replacement: this.keepReplacement
});
},
remove: function (filter) {
this._remove.unshift({
filter: filter,
replacement: function () {
return ''
}
});
},
forNode: function (node) {
if (node.isBlank) return this.blankRule
var rule;
if ((rule = findRule(this.array, node, this.options))) return rule
if ((rule = findRule(this._keep, node, this.options))) return rule
if ((rule = findRule(this._remove, node, this.options))) return rule
return this.defaultRule
},
forEach: function (fn) {
for (var i = 0; i < this.array.length; i++) fn(this.array[i], i);
}
};
function findRule (rules, node, options) {
for (var i = 0; i < rules.length; i++) {
var rule = rules[i];
if (filterValue(rule, node, options)) return rule
}
return void 0
}
function filterValue (rule, node, options) {
var filter = rule.filter;
if (typeof filter === 'string') {
if (filter === node.nodeName.toLowerCase()) return true
} else if (Array.isArray(filter)) {
if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true
} else if (typeof filter === 'function') {
if (filter.call(rule, node, options)) return true
} else {
throw new TypeError('`filter` needs to be a string, array, or function')
}
}
/**
* The collapseWhitespace function is adapted from collapse-whitespace
* by Luc Thevenard.
*
* The MIT License (MIT)
*
* Copyright (c) 2014 Luc Thevenard <lucthevenard@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/**
* collapseWhitespace(options) removes extraneous whitespace from an the given element.
*
* @param {Object} options
*/
function collapseWhitespace (options) {
var element = options.element;
var isBlock = options.isBlock;
var isVoid = options.isVoid;
var isPre = options.isPre || function (node) {
return node.nodeName === 'PRE'
};
if (!element.firstChild || isPre(element)) return
var prevText = null;
var keepLeadingWs = false;
var prev = null;
var node = next(prev, element, isPre);
while (node !== element) {
if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
var text = node.data.replace(/[ \r\n\t]+/g, ' ');
if ((!prevText || / $/.test(prevText.data)) &&
!keepLeadingWs && text[0] === ' ') {
text = text.substr(1);
}
// `text` might be empty at this point.
if (!text) {
node = remove(node);
continue
}
node.data = text;
prevText = node;
} else if (node.nodeType === 1) { // Node.ELEMENT_NODE
if (isBlock(node) || node.nodeName === 'BR') {
if (prevText) {
prevText.data = prevText.data.replace(/ $/, '');
}
prevText = null;
keepLeadingWs = false;
} else if (isVoid(node) || isPre(node)) {
// Avoid trimming space around non-block, non-BR void elements and inline PRE.
prevText = null;
keepLeadingWs = true;
} else if (prevText) {
// Drop protection if set previously.
keepLeadingWs = false;
}
} else {
node = remove(node);
continue
}
var nextNode = next(prev, node, isPre);
prev = node;
node = nextNode;
}
if (prevText) {
prevText.data = prevText.data.replace(/ $/, '');
if (!prevText.data) {
remove(prevText);
}
}
}
/**
* remove(node) removes the given node from the DOM and returns the
* next node in the sequence.
*
* @param {Node} node
* @return {Node} node
*/
function remove (node) {
var next = node.nextSibling || node.parentNode;
node.parentNode.removeChild(node);
return next
}
/**
* next(prev, current, isPre) returns the next node in the sequence, given the
* current and previous nodes.
*
* @param {Node} prev
* @param {Node} current
* @param {Function} isPre
* @return {Node}
*/
function next (prev, current, isPre) {
if ((prev && prev.parentNode === current) || isPre(current)) {
return current.nextSibling || current.parentNode
}
return current.firstChild || current.nextSibling || current.parentNode
}
/*
* Set up window for Node.js
*/
var root = (typeof window !== 'undefined' ? window : {});
/*
* Parsing HTML strings
*/
function canParseHTMLNatively () {
var Parser = root.DOMParser;
var canParse = false;
// Adapted from https://gist.github.com/1129031
// Firefox/Opera/IE throw errors on unsupported types
try {
// WebKit returns null on unsupported types
if (new Parser().parseFromString('', 'text/html')) {
canParse = true;
}
} catch (e) {}
return canParse
}
function createHTMLParser () {
var Parser = function () {};
{
if (shouldUseActiveX()) {
Parser.prototype.parseFromString = function (string) {
var doc = new window.ActiveXObject('htmlfile');
doc.designMode = 'on'; // disable on-page scripts
doc.open();
doc.write(string);
doc.close();
return doc
};
} else {
Parser.prototype.parseFromString = function (string) {
var doc = document.implementation.createHTMLDocument('');
doc.open();
doc.write(string);
doc.close();
return doc
};
}
}
return Parser
}
function shouldUseActiveX () {
var useActiveX = false;
try {
document.implementation.createHTMLDocument('').open();
} catch (e) {
if (root.ActiveXObject) useActiveX = true;
}
return useActiveX
}
var HTMLParser = canParseHTMLNatively() ? root.DOMParser : createHTMLParser();
function RootNode (input, options) {
var root;
if (typeof input === 'string') {
var doc = htmlParser().parseFromString(
// DOM parsers arrange elements in the <head> and <body>.
// Wrapping in a custom element ensures elements are reliably arranged in
// a single element.
'<x-turndown id="turndown-root">' + input + '</x-turndown>',
'text/html'
);
root = doc.getElementById('turndown-root');
} else {
root = input.cloneNode(true);
}
collapseWhitespace({
element: root,
isBlock: isBlock,
isVoid: isVoid,
isPre: options.preformattedCode ? isPreOrCode : null
});
return root
}
var _htmlParser;
function htmlParser () {
_htmlParser = _htmlParser || new HTMLParser();
return _htmlParser
}
function isPreOrCode (node) {
return node.nodeName === 'PRE' || node.nodeName === 'CODE'
}
function Node (node, options) {
node.isBlock = isBlock(node);
node.isCode = node.nodeName === 'CODE' || node.parentNode.isCode;
node.isBlank = isBlank(node);
node.flankingWhitespace = flankingWhitespace(node, options);
return node
}
function isBlank (node) {
return (
!isVoid(node) &&
!isMeaningfulWhenBlank(node) &&
/^\s*$/i.test(node.textContent) &&
!hasVoid(node) &&
!hasMeaningfulWhenBlank(node)
)
}
function flankingWhitespace (node, options) {
if (node.isBlock || (options.preformattedCode && node.isCode)) {
return { leading: '', trailing: '' }
}
var edges = edgeWhitespace(node.textContent);
// abandon leading ASCII WS if left-flanked by ASCII WS
if (edges.leadingAscii && isFlankedByWhitespace('left', node, options)) {
edges.leading = edges.leadingNonAscii;
}
// abandon trailing ASCII WS if right-flanked by ASCII WS
if (edges.trailingAscii && isFlankedByWhitespace('right', node, options)) {
edges.trailing = edges.trailingNonAscii;
}
return { leading: edges.leading, trailing: edges.trailing }
}
function edgeWhitespace (string) {
var m = string.match(/^(([ \t\r\n]*)(\s*))(?:(?=\S)[\s\S]*\S)?((\s*?)([ \t\r\n]*))$/);
return {
leading: m[1], // whole string for whitespace-only strings
leadingAscii: m[2],
leadingNonAscii: m[3],
trailing: m[4], // empty for whitespace-only strings
trailingNonAscii: m[5],
trailingAscii: m[6]
}
}
function isFlankedByWhitespace (side, node, options) {
var sibling;
var regExp;
var isFlanked;
if (side === 'left') {
sibling = node.previousSibling;
regExp = / $/;
} else {
sibling = node.nextSibling;
regExp = /^ /;
}
if (sibling) {
if (sibling.nodeType === 3) {
isFlanked = regExp.test(sibling.nodeValue);
} else if (options.preformattedCode && sibling.nodeName === 'CODE') {
isFlanked = false;
} else if (sibling.nodeType === 1 && !isBlock(sibling)) {
isFlanked = regExp.test(sibling.textContent);
}
}
return isFlanked
}
var reduce = Array.prototype.reduce;
var escapes = [
[/\\/g, '\\\\'],
[/\*/g, '\\*'],
[/^-/g, '\\-'],
[/^\+ /g, '\\+ '],
[/^(=+)/g, '\\$1'],
[/^(#{1,6}) /g, '\\$1 '],
[/`/g, '\\`'],
[/^~~~/g, '\\~~~'],
[/\[/g, '\\['],
[/\]/g, '\\]'],
[/^>/g, '\\>'],
[/_/g, '\\_'],
[/^(\d+)\. /g, '$1\\. ']
];
function TurndownService (options) {
if (!(this instanceof TurndownService)) return new TurndownService(options)
var defaults = {
rules: rules,
headingStyle: 'setext',
hr: '* * *',
bulletListMarker: '*',
codeBlockStyle: 'indented',
fence: '```',
emDelimiter: '_',
strongDelimiter: '**',
linkStyle: 'inlined',
linkReferenceStyle: 'full',
br: ' ',
preformattedCode: false,
blankReplacement: function (content, node) {
return node.isBlock ? '\n\n' : ''
},
keepReplacement: function (content, node) {
return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML
},
defaultReplacement: function (content, node) {
return node.isBlock ? '\n\n' + content + '\n\n' : content
}
};
this.options = extend({}, defaults, options);
this.rules = new Rules(this.options);
}
TurndownService.prototype = {
/**
* The entry point for converting a string or DOM node to Markdown
* @public
* @param {String|HTMLElement} input The string or DOM node to convert
* @returns A Markdown representation of the input
* @type String
*/
turndown: function (input) {
if (!canConvert(input)) {
throw new TypeError(
input + ' is not a string, or an element/document/fragment node.'
)
}
if (input === '') return ''
var output = process.call(this, new RootNode(input, this.options));
return postProcess.call(this, output)
},
/**
* Add one or more plugins
* @public
* @param {Function|Array} plugin The plugin or array of plugins to add
* @returns The Turndown instance for chaining
* @type Object
*/
use: function (plugin) {
if (Array.isArray(plugin)) {
for (var i = 0; i < plugin.length; i++) this.use(plugin[i]);
} else if (typeof plugin === 'function') {
plugin(this);
} else {
throw new TypeError('plugin must be a Function or an Array of Functions')
}
return this
},
/**
* Adds a rule
* @public
* @param {String} key The unique key of the rule
* @param {Object} rule The rule
* @returns The Turndown instance for chaining
* @type Object
*/
addRule: function (key, rule) {
this.rules.add(key, rule);
return this
},
/**
* Keep a node (as HTML) that matches the filter
* @public
* @param {String|Array|Function} filter The unique key of the rule
* @returns The Turndown instance for chaining
* @type Object
*/
keep: function (filter) {
this.rules.keep(filter);
return this
},
/**
* Remove a node that matches the filter
* @public
* @param {String|Array|Function} filter The unique key of the rule
* @returns The Turndown instance for chaining
* @type Object
*/
remove: function (filter) {
this.rules.remove(filter);
return this
},
/**
* Escapes Markdown syntax
* @public
* @param {String} string The string to escape
* @returns A string with Markdown syntax escaped
* @type String
*/
escape: function (string) {
return escapes.reduce(function (accumulator, escape) {
return accumulator.replace(escape[0], escape[1])
}, string)
}
};
/**
* Reduces a DOM node down to its Markdown string equivalent
* @private
* @param {HTMLElement} parentNode The node to convert
* @returns A Markdown representation of the node
* @type String
*/
function process (parentNode) {
var self = this;
return reduce.call(parentNode.childNodes, function (output, node) {
node = new Node(node, self.options);
var replacement = '';
if (node.nodeType === 3) {
replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
} else if (node.nodeType === 1) {
replacement = replacementForNode.call(self, node);
}
return join(output, replacement)
}, '')
}
/**
* Appends strings as each rule requires and trims the output
* @private
* @param {String} output The conversion output
* @returns A trimmed version of the ouput
* @type String
*/
function postProcess (output) {
var self = this;
this.rules.forEach(function (rule) {
if (typeof rule.append === 'function') {
output = join(output, rule.append(self.options));
}
});
return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
}
/**
* Converts an element node to its Markdown equivalent
* @private
* @param {HTMLElement} node The node to convert
* @returns A Markdown representation of the node
* @type String
*/
function replacementForNode (node) {
var rule = this.rules.forNode(node);
var content = process.call(this, node);
var whitespace = node.flankingWhitespace;
if (whitespace.leading || whitespace.trailing) content = content.trim();
return (
whitespace.leading +
rule.replacement(content, node, this.options) +
whitespace.trailing
)
}
/**
* Joins replacement to the current output with appropriate number of new lines
* @private
* @param {String} output The current conversion output
* @param {String} replacement The string to append to the output
* @returns Joined output
* @type String
*/
function join (output, replacement) {
var s1 = trimTrailingNewlines(output);
var s2 = trimLeadingNewlines(replacement);
var nls = Math.max(output.length - s1.length, replacement.length - s2.length);
var separator = '\n\n'.substring(0, nls);
return s1 + separator + s2
}
/**
* Determines whether an input can be converted
* @private
* @param {String|HTMLElement} input Describe this parameter
* @returns Describe what it returns
* @type String|Object|Array|Boolean|Number
*/
function canConvert (input) {
return (
input != null && (
typeof input === 'string' ||
(input.nodeType && (
input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
))
)
)
}
module.exports = TurndownService;

View File

@@ -0,0 +1,971 @@
function extend (destination) {
for (var i = 1; i < arguments.length; i++) {
var source = arguments[i];
for (var key in source) {
if (source.hasOwnProperty(key)) destination[key] = source[key];
}
}
return destination
}
function repeat (character, count) {
return Array(count + 1).join(character)
}
function trimLeadingNewlines (string) {
return string.replace(/^\n*/, '')
}
function trimTrailingNewlines (string) {
// avoid match-at-end regexp bottleneck, see #370
var indexEnd = string.length;
while (indexEnd > 0 && string[indexEnd - 1] === '\n') indexEnd--;
return string.substring(0, indexEnd)
}
function trimNewlines (string) {
return trimTrailingNewlines(trimLeadingNewlines(string))
}
var blockElements = [
'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS',
'CENTER', 'DD', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE',
'FOOTER', 'FORM', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HEADER',
'HGROUP', 'HR', 'HTML', 'ISINDEX', 'LI', 'MAIN', 'MENU', 'NAV', 'NOFRAMES',
'NOSCRIPT', 'OL', 'OUTPUT', 'P', 'PRE', 'SECTION', 'TABLE', 'TBODY', 'TD',
'TFOOT', 'TH', 'THEAD', 'TR', 'UL'
];
function isBlock (node) {
return is(node, blockElements)
}
var voidElements = [
'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT',
'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR'
];
function isVoid (node) {
return is(node, voidElements)
}
function hasVoid (node) {
return has(node, voidElements)
}
var meaningfulWhenBlankElements = [
'A', 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TH', 'TD', 'IFRAME', 'SCRIPT',
'AUDIO', 'VIDEO'
];
function isMeaningfulWhenBlank (node) {
return is(node, meaningfulWhenBlankElements)
}
function hasMeaningfulWhenBlank (node) {
return has(node, meaningfulWhenBlankElements)
}
function is (node, tagNames) {
return tagNames.indexOf(node.nodeName) >= 0
}
function has (node, tagNames) {
return (
node.getElementsByTagName &&
tagNames.some(function (tagName) {
return node.getElementsByTagName(tagName).length
})
)
}
var rules = {};
rules.paragraph = {
filter: 'p',
replacement: function (content) {
return '\n\n' + content + '\n\n'
}
};
rules.lineBreak = {
filter: 'br',
replacement: function (content, node, options) {
return options.br + '\n'
}
};
rules.heading = {
filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
replacement: function (content, node, options) {
var hLevel = Number(node.nodeName.charAt(1));
if (options.headingStyle === 'setext' && hLevel < 3) {
var underline = repeat((hLevel === 1 ? '=' : '-'), content.length);
return (
'\n\n' + content + '\n' + underline + '\n\n'
)
} else {
return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
}
}
};
rules.blockquote = {
filter: 'blockquote',
replacement: function (content) {
content = trimNewlines(content).replace(/^/gm, '> ');
return '\n\n' + content + '\n\n'
}
};
rules.list = {
filter: ['ul', 'ol'],
replacement: function (content, node) {
var parent = node.parentNode;
if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
return '\n' + content
} else {
return '\n\n' + content + '\n\n'
}
}
};
rules.listItem = {
filter: 'li',
replacement: function (content, node, options) {
var prefix = options.bulletListMarker + ' ';
var parent = node.parentNode;
if (parent.nodeName === 'OL') {
var start = parent.getAttribute('start');
var index = Array.prototype.indexOf.call(parent.children, node);
prefix = (start ? Number(start) + index : index + 1) + '. ';
}
var isParagraph = /\n$/.test(content);
content = trimNewlines(content) + (isParagraph ? '\n' : '');
content = content.replace(/\n/gm, '\n' + ' '.repeat(prefix.length)); // indent
return (
prefix + content + (node.nextSibling ? '\n' : '')
)
}
};
rules.indentedCodeBlock = {
filter: function (node, options) {
return (
options.codeBlockStyle === 'indented' &&
node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE'
)
},
replacement: function (content, node, options) {
return (
'\n\n ' +
node.firstChild.textContent.replace(/\n/g, '\n ') +
'\n\n'
)
}
};
rules.fencedCodeBlock = {
filter: function (node, options) {
return (
options.codeBlockStyle === 'fenced' &&
node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE'
)
},
replacement: function (content, node, options) {
var className = node.firstChild.getAttribute('class') || '';
var language = (className.match(/language-(\S+)/) || [null, ''])[1];
var code = node.firstChild.textContent;
var fenceChar = options.fence.charAt(0);
var fenceSize = 3;
var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm');
var match;
while ((match = fenceInCodeRegex.exec(code))) {
if (match[0].length >= fenceSize) {
fenceSize = match[0].length + 1;
}
}
var fence = repeat(fenceChar, fenceSize);
return (
'\n\n' + fence + language + '\n' +
code.replace(/\n$/, '') +
'\n' + fence + '\n\n'
)
}
};
rules.horizontalRule = {
filter: 'hr',
replacement: function (content, node, options) {
return '\n\n' + options.hr + '\n\n'
}
};
rules.inlineLink = {
filter: function (node, options) {
return (
options.linkStyle === 'inlined' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
},
replacement: function (content, node) {
var href = node.getAttribute('href');
if (href) href = href.replace(/([()])/g, '\\$1');
var title = cleanAttribute(node.getAttribute('title'));
if (title) title = ' "' + title.replace(/"/g, '\\"') + '"';
return '[' + content + '](' + href + title + ')'
}
};
rules.referenceLink = {
filter: function (node, options) {
return (
options.linkStyle === 'referenced' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
},
replacement: function (content, node, options) {
var href = node.getAttribute('href');
var title = cleanAttribute(node.getAttribute('title'));
if (title) title = ' "' + title + '"';
var replacement;
var reference;
switch (options.linkReferenceStyle) {
case 'collapsed':
replacement = '[' + content + '][]';
reference = '[' + content + ']: ' + href + title;
break
case 'shortcut':
replacement = '[' + content + ']';
reference = '[' + content + ']: ' + href + title;
break
default:
var id = this.references.length + 1;
replacement = '[' + content + '][' + id + ']';
reference = '[' + id + ']: ' + href + title;
}
this.references.push(reference);
return replacement
},
references: [],
append: function (options) {
var references = '';
if (this.references.length) {
references = '\n\n' + this.references.join('\n') + '\n\n';
this.references = []; // Reset references
}
return references
}
};
rules.emphasis = {
filter: ['em', 'i'],
replacement: function (content, node, options) {
if (!content.trim()) return ''
return options.emDelimiter + content + options.emDelimiter
}
};
rules.strong = {
filter: ['strong', 'b'],
replacement: function (content, node, options) {
if (!content.trim()) return ''
return options.strongDelimiter + content + options.strongDelimiter
}
};
rules.code = {
filter: function (node) {
var hasSiblings = node.previousSibling || node.nextSibling;
var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
return node.nodeName === 'CODE' && !isCodeBlock
},
replacement: function (content) {
if (!content) return ''
content = content.replace(/\r?\n|\r/g, ' ');
var extraSpace = /^`|^ .*?[^ ].* $|`$/.test(content) ? ' ' : '';
var delimiter = '`';
var matches = content.match(/`+/gm) || [];
while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
return delimiter + extraSpace + content + extraSpace + delimiter
}
};
rules.image = {
filter: 'img',
replacement: function (content, node) {
var alt = cleanAttribute(node.getAttribute('alt'));
var src = node.getAttribute('src') || '';
var title = cleanAttribute(node.getAttribute('title'));
var titlePart = title ? ' "' + title + '"' : '';
return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
}
};
function cleanAttribute (attribute) {
return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : ''
}
/**
* Manages a collection of rules used to convert HTML to Markdown
*/
function Rules (options) {
this.options = options;
this._keep = [];
this._remove = [];
this.blankRule = {
replacement: options.blankReplacement
};
this.keepReplacement = options.keepReplacement;
this.defaultRule = {
replacement: options.defaultReplacement
};
this.array = [];
for (var key in options.rules) this.array.push(options.rules[key]);
}
Rules.prototype = {
add: function (key, rule) {
this.array.unshift(rule);
},
keep: function (filter) {
this._keep.unshift({
filter: filter,
replacement: this.keepReplacement
});
},
remove: function (filter) {
this._remove.unshift({
filter: filter,
replacement: function () {
return ''
}
});
},
forNode: function (node) {
if (node.isBlank) return this.blankRule
var rule;
if ((rule = findRule(this.array, node, this.options))) return rule
if ((rule = findRule(this._keep, node, this.options))) return rule
if ((rule = findRule(this._remove, node, this.options))) return rule
return this.defaultRule
},
forEach: function (fn) {
for (var i = 0; i < this.array.length; i++) fn(this.array[i], i);
}
};
function findRule (rules, node, options) {
for (var i = 0; i < rules.length; i++) {
var rule = rules[i];
if (filterValue(rule, node, options)) return rule
}
return void 0
}
function filterValue (rule, node, options) {
var filter = rule.filter;
if (typeof filter === 'string') {
if (filter === node.nodeName.toLowerCase()) return true
} else if (Array.isArray(filter)) {
if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true
} else if (typeof filter === 'function') {
if (filter.call(rule, node, options)) return true
} else {
throw new TypeError('`filter` needs to be a string, array, or function')
}
}
/**
* The collapseWhitespace function is adapted from collapse-whitespace
* by Luc Thevenard.
*
* The MIT License (MIT)
*
* Copyright (c) 2014 Luc Thevenard <lucthevenard@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/**
* collapseWhitespace(options) removes extraneous whitespace from an the given element.
*
* @param {Object} options
*/
function collapseWhitespace (options) {
var element = options.element;
var isBlock = options.isBlock;
var isVoid = options.isVoid;
var isPre = options.isPre || function (node) {
return node.nodeName === 'PRE'
};
if (!element.firstChild || isPre(element)) return
var prevText = null;
var keepLeadingWs = false;
var prev = null;
var node = next(prev, element, isPre);
while (node !== element) {
if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
var text = node.data.replace(/[ \r\n\t]+/g, ' ');
if ((!prevText || / $/.test(prevText.data)) &&
!keepLeadingWs && text[0] === ' ') {
text = text.substr(1);
}
// `text` might be empty at this point.
if (!text) {
node = remove(node);
continue
}
node.data = text;
prevText = node;
} else if (node.nodeType === 1) { // Node.ELEMENT_NODE
if (isBlock(node) || node.nodeName === 'BR') {
if (prevText) {
prevText.data = prevText.data.replace(/ $/, '');
}
prevText = null;
keepLeadingWs = false;
} else if (isVoid(node) || isPre(node)) {
// Avoid trimming space around non-block, non-BR void elements and inline PRE.
prevText = null;
keepLeadingWs = true;
} else if (prevText) {
// Drop protection if set previously.
keepLeadingWs = false;
}
} else {
node = remove(node);
continue
}
var nextNode = next(prev, node, isPre);
prev = node;
node = nextNode;
}
if (prevText) {
prevText.data = prevText.data.replace(/ $/, '');
if (!prevText.data) {
remove(prevText);
}
}
}
/**
* remove(node) removes the given node from the DOM and returns the
* next node in the sequence.
*
* @param {Node} node
* @return {Node} node
*/
function remove (node) {
var next = node.nextSibling || node.parentNode;
node.parentNode.removeChild(node);
return next
}
/**
* next(prev, current, isPre) returns the next node in the sequence, given the
* current and previous nodes.
*
* @param {Node} prev
* @param {Node} current
* @param {Function} isPre
* @return {Node}
*/
function next (prev, current, isPre) {
if ((prev && prev.parentNode === current) || isPre(current)) {
return current.nextSibling || current.parentNode
}
return current.firstChild || current.nextSibling || current.parentNode
}
/*
* Set up window for Node.js
*/
var root = (typeof window !== 'undefined' ? window : {});
/*
* Parsing HTML strings
*/
function canParseHTMLNatively () {
var Parser = root.DOMParser;
var canParse = false;
// Adapted from https://gist.github.com/1129031
// Firefox/Opera/IE throw errors on unsupported types
try {
// WebKit returns null on unsupported types
if (new Parser().parseFromString('', 'text/html')) {
canParse = true;
}
} catch (e) {}
return canParse
}
function createHTMLParser () {
var Parser = function () {};
{
if (shouldUseActiveX()) {
Parser.prototype.parseFromString = function (string) {
var doc = new window.ActiveXObject('htmlfile');
doc.designMode = 'on'; // disable on-page scripts
doc.open();
doc.write(string);
doc.close();
return doc
};
} else {
Parser.prototype.parseFromString = function (string) {
var doc = document.implementation.createHTMLDocument('');
doc.open();
doc.write(string);
doc.close();
return doc
};
}
}
return Parser
}
function shouldUseActiveX () {
var useActiveX = false;
try {
document.implementation.createHTMLDocument('').open();
} catch (e) {
if (root.ActiveXObject) useActiveX = true;
}
return useActiveX
}
var HTMLParser = canParseHTMLNatively() ? root.DOMParser : createHTMLParser();
function RootNode (input, options) {
var root;
if (typeof input === 'string') {
var doc = htmlParser().parseFromString(
// DOM parsers arrange elements in the <head> and <body>.
// Wrapping in a custom element ensures elements are reliably arranged in
// a single element.
'<x-turndown id="turndown-root">' + input + '</x-turndown>',
'text/html'
);
root = doc.getElementById('turndown-root');
} else {
root = input.cloneNode(true);
}
collapseWhitespace({
element: root,
isBlock: isBlock,
isVoid: isVoid,
isPre: options.preformattedCode ? isPreOrCode : null
});
return root
}
var _htmlParser;
function htmlParser () {
_htmlParser = _htmlParser || new HTMLParser();
return _htmlParser
}
function isPreOrCode (node) {
return node.nodeName === 'PRE' || node.nodeName === 'CODE'
}
function Node (node, options) {
node.isBlock = isBlock(node);
node.isCode = node.nodeName === 'CODE' || node.parentNode.isCode;
node.isBlank = isBlank(node);
node.flankingWhitespace = flankingWhitespace(node, options);
return node
}
function isBlank (node) {
return (
!isVoid(node) &&
!isMeaningfulWhenBlank(node) &&
/^\s*$/i.test(node.textContent) &&
!hasVoid(node) &&
!hasMeaningfulWhenBlank(node)
)
}
function flankingWhitespace (node, options) {
if (node.isBlock || (options.preformattedCode && node.isCode)) {
return { leading: '', trailing: '' }
}
var edges = edgeWhitespace(node.textContent);
// abandon leading ASCII WS if left-flanked by ASCII WS
if (edges.leadingAscii && isFlankedByWhitespace('left', node, options)) {
edges.leading = edges.leadingNonAscii;
}
// abandon trailing ASCII WS if right-flanked by ASCII WS
if (edges.trailingAscii && isFlankedByWhitespace('right', node, options)) {
edges.trailing = edges.trailingNonAscii;
}
return { leading: edges.leading, trailing: edges.trailing }
}
function edgeWhitespace (string) {
var m = string.match(/^(([ \t\r\n]*)(\s*))(?:(?=\S)[\s\S]*\S)?((\s*?)([ \t\r\n]*))$/);
return {
leading: m[1], // whole string for whitespace-only strings
leadingAscii: m[2],
leadingNonAscii: m[3],
trailing: m[4], // empty for whitespace-only strings
trailingNonAscii: m[5],
trailingAscii: m[6]
}
}
function isFlankedByWhitespace (side, node, options) {
var sibling;
var regExp;
var isFlanked;
if (side === 'left') {
sibling = node.previousSibling;
regExp = / $/;
} else {
sibling = node.nextSibling;
regExp = /^ /;
}
if (sibling) {
if (sibling.nodeType === 3) {
isFlanked = regExp.test(sibling.nodeValue);
} else if (options.preformattedCode && sibling.nodeName === 'CODE') {
isFlanked = false;
} else if (sibling.nodeType === 1 && !isBlock(sibling)) {
isFlanked = regExp.test(sibling.textContent);
}
}
return isFlanked
}
var reduce = Array.prototype.reduce;
var escapes = [
[/\\/g, '\\\\'],
[/\*/g, '\\*'],
[/^-/g, '\\-'],
[/^\+ /g, '\\+ '],
[/^(=+)/g, '\\$1'],
[/^(#{1,6}) /g, '\\$1 '],
[/`/g, '\\`'],
[/^~~~/g, '\\~~~'],
[/\[/g, '\\['],
[/\]/g, '\\]'],
[/^>/g, '\\>'],
[/_/g, '\\_'],
[/^(\d+)\. /g, '$1\\. ']
];
function TurndownService (options) {
if (!(this instanceof TurndownService)) return new TurndownService(options)
var defaults = {
rules: rules,
headingStyle: 'setext',
hr: '* * *',
bulletListMarker: '*',
codeBlockStyle: 'indented',
fence: '```',
emDelimiter: '_',
strongDelimiter: '**',
linkStyle: 'inlined',
linkReferenceStyle: 'full',
br: ' ',
preformattedCode: false,
blankReplacement: function (content, node) {
return node.isBlock ? '\n\n' : ''
},
keepReplacement: function (content, node) {
return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML
},
defaultReplacement: function (content, node) {
return node.isBlock ? '\n\n' + content + '\n\n' : content
}
};
this.options = extend({}, defaults, options);
this.rules = new Rules(this.options);
}
TurndownService.prototype = {
/**
* The entry point for converting a string or DOM node to Markdown
* @public
* @param {String|HTMLElement} input The string or DOM node to convert
* @returns A Markdown representation of the input
* @type String
*/
turndown: function (input) {
if (!canConvert(input)) {
throw new TypeError(
input + ' is not a string, or an element/document/fragment node.'
)
}
if (input === '') return ''
var output = process.call(this, new RootNode(input, this.options));
return postProcess.call(this, output)
},
/**
* Add one or more plugins
* @public
* @param {Function|Array} plugin The plugin or array of plugins to add
* @returns The Turndown instance for chaining
* @type Object
*/
use: function (plugin) {
if (Array.isArray(plugin)) {
for (var i = 0; i < plugin.length; i++) this.use(plugin[i]);
} else if (typeof plugin === 'function') {
plugin(this);
} else {
throw new TypeError('plugin must be a Function or an Array of Functions')
}
return this
},
/**
* Adds a rule
* @public
* @param {String} key The unique key of the rule
* @param {Object} rule The rule
* @returns The Turndown instance for chaining
* @type Object
*/
addRule: function (key, rule) {
this.rules.add(key, rule);
return this
},
/**
* Keep a node (as HTML) that matches the filter
* @public
* @param {String|Array|Function} filter The unique key of the rule
* @returns The Turndown instance for chaining
* @type Object
*/
keep: function (filter) {
this.rules.keep(filter);
return this
},
/**
* Remove a node that matches the filter
* @public
* @param {String|Array|Function} filter The unique key of the rule
* @returns The Turndown instance for chaining
* @type Object
*/
remove: function (filter) {
this.rules.remove(filter);
return this
},
/**
* Escapes Markdown syntax
* @public
* @param {String} string The string to escape
* @returns A string with Markdown syntax escaped
* @type String
*/
escape: function (string) {
return escapes.reduce(function (accumulator, escape) {
return accumulator.replace(escape[0], escape[1])
}, string)
}
};
/**
* Reduces a DOM node down to its Markdown string equivalent
* @private
* @param {HTMLElement} parentNode The node to convert
* @returns A Markdown representation of the node
* @type String
*/
function process (parentNode) {
var self = this;
return reduce.call(parentNode.childNodes, function (output, node) {
node = new Node(node, self.options);
var replacement = '';
if (node.nodeType === 3) {
replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
} else if (node.nodeType === 1) {
replacement = replacementForNode.call(self, node);
}
return join(output, replacement)
}, '')
}
/**
* Appends strings as each rule requires and trims the output
* @private
* @param {String} output The conversion output
* @returns A trimmed version of the ouput
* @type String
*/
function postProcess (output) {
var self = this;
this.rules.forEach(function (rule) {
if (typeof rule.append === 'function') {
output = join(output, rule.append(self.options));
}
});
return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
}
/**
* Converts an element node to its Markdown equivalent
* @private
* @param {HTMLElement} node The node to convert
* @returns A Markdown representation of the node
* @type String
*/
function replacementForNode (node) {
var rule = this.rules.forNode(node);
var content = process.call(this, node);
var whitespace = node.flankingWhitespace;
if (whitespace.leading || whitespace.trailing) content = content.trim();
return (
whitespace.leading +
rule.replacement(content, node, this.options) +
whitespace.trailing
)
}
/**
* Joins replacement to the current output with appropriate number of new lines
* @private
* @param {String} output The current conversion output
* @param {String} replacement The string to append to the output
* @returns Joined output
* @type String
*/
function join (output, replacement) {
var s1 = trimTrailingNewlines(output);
var s2 = trimLeadingNewlines(replacement);
var nls = Math.max(output.length - s1.length, replacement.length - s2.length);
var separator = '\n\n'.substring(0, nls);
return s1 + separator + s2
}
/**
* Determines whether an input can be converted
* @private
* @param {String|HTMLElement} input Describe this parameter
* @returns Describe what it returns
* @type String|Object|Array|Boolean|Number
*/
function canConvert (input) {
return (
input != null && (
typeof input === 'string' ||
(input.nodeType && (
input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
))
)
)
}
export default TurndownService;

View File

@@ -0,0 +1,979 @@
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() :
typeof define === 'function' && define.amd ? define(factory) :
(global = typeof globalThis !== 'undefined' ? globalThis : global || self, global.TurndownService = factory());
}(this, (function () { 'use strict';
function extend (destination) {
for (var i = 1; i < arguments.length; i++) {
var source = arguments[i];
for (var key in source) {
if (source.hasOwnProperty(key)) destination[key] = source[key];
}
}
return destination
}
function repeat (character, count) {
return Array(count + 1).join(character)
}
function trimLeadingNewlines (string) {
return string.replace(/^\n*/, '')
}
function trimTrailingNewlines (string) {
// avoid match-at-end regexp bottleneck, see #370
var indexEnd = string.length;
while (indexEnd > 0 && string[indexEnd - 1] === '\n') indexEnd--;
return string.substring(0, indexEnd)
}
function trimNewlines (string) {
return trimTrailingNewlines(trimLeadingNewlines(string))
}
var blockElements = [
'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS',
'CENTER', 'DD', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE',
'FOOTER', 'FORM', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HEADER',
'HGROUP', 'HR', 'HTML', 'ISINDEX', 'LI', 'MAIN', 'MENU', 'NAV', 'NOFRAMES',
'NOSCRIPT', 'OL', 'OUTPUT', 'P', 'PRE', 'SECTION', 'TABLE', 'TBODY', 'TD',
'TFOOT', 'TH', 'THEAD', 'TR', 'UL'
];
function isBlock (node) {
return is(node, blockElements)
}
var voidElements = [
'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT',
'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR'
];
function isVoid (node) {
return is(node, voidElements)
}
function hasVoid (node) {
return has(node, voidElements)
}
var meaningfulWhenBlankElements = [
'A', 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TH', 'TD', 'IFRAME', 'SCRIPT',
'AUDIO', 'VIDEO'
];
function isMeaningfulWhenBlank (node) {
return is(node, meaningfulWhenBlankElements)
}
function hasMeaningfulWhenBlank (node) {
return has(node, meaningfulWhenBlankElements)
}
function is (node, tagNames) {
return tagNames.indexOf(node.nodeName) >= 0
}
function has (node, tagNames) {
return (
node.getElementsByTagName &&
tagNames.some(function (tagName) {
return node.getElementsByTagName(tagName).length
})
)
}
var rules = {};
rules.paragraph = {
filter: 'p',
replacement: function (content) {
return '\n\n' + content + '\n\n'
}
};
rules.lineBreak = {
filter: 'br',
replacement: function (content, node, options) {
return options.br + '\n'
}
};
rules.heading = {
filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
replacement: function (content, node, options) {
var hLevel = Number(node.nodeName.charAt(1));
if (options.headingStyle === 'setext' && hLevel < 3) {
var underline = repeat((hLevel === 1 ? '=' : '-'), content.length);
return (
'\n\n' + content + '\n' + underline + '\n\n'
)
} else {
return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
}
}
};
rules.blockquote = {
filter: 'blockquote',
replacement: function (content) {
content = trimNewlines(content).replace(/^/gm, '> ');
return '\n\n' + content + '\n\n'
}
};
rules.list = {
filter: ['ul', 'ol'],
replacement: function (content, node) {
var parent = node.parentNode;
if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
return '\n' + content
} else {
return '\n\n' + content + '\n\n'
}
}
};
rules.listItem = {
filter: 'li',
replacement: function (content, node, options) {
var prefix = options.bulletListMarker + ' ';
var parent = node.parentNode;
if (parent.nodeName === 'OL') {
var start = parent.getAttribute('start');
var index = Array.prototype.indexOf.call(parent.children, node);
prefix = (start ? Number(start) + index : index + 1) + '. ';
}
var isParagraph = /\n$/.test(content);
content = trimNewlines(content) + (isParagraph ? '\n' : '');
content = content.replace(/\n/gm, '\n' + ' '.repeat(prefix.length)); // indent
return (
prefix + content + (node.nextSibling ? '\n' : '')
)
}
};
rules.indentedCodeBlock = {
filter: function (node, options) {
return (
options.codeBlockStyle === 'indented' &&
node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE'
)
},
replacement: function (content, node, options) {
return (
'\n\n ' +
node.firstChild.textContent.replace(/\n/g, '\n ') +
'\n\n'
)
}
};
rules.fencedCodeBlock = {
filter: function (node, options) {
return (
options.codeBlockStyle === 'fenced' &&
node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE'
)
},
replacement: function (content, node, options) {
var className = node.firstChild.getAttribute('class') || '';
var language = (className.match(/language-(\S+)/) || [null, ''])[1];
var code = node.firstChild.textContent;
var fenceChar = options.fence.charAt(0);
var fenceSize = 3;
var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm');
var match;
while ((match = fenceInCodeRegex.exec(code))) {
if (match[0].length >= fenceSize) {
fenceSize = match[0].length + 1;
}
}
var fence = repeat(fenceChar, fenceSize);
return (
'\n\n' + fence + language + '\n' +
code.replace(/\n$/, '') +
'\n' + fence + '\n\n'
)
}
};
rules.horizontalRule = {
filter: 'hr',
replacement: function (content, node, options) {
return '\n\n' + options.hr + '\n\n'
}
};
rules.inlineLink = {
filter: function (node, options) {
return (
options.linkStyle === 'inlined' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
},
replacement: function (content, node) {
var href = node.getAttribute('href');
if (href) href = href.replace(/([()])/g, '\\$1');
var title = cleanAttribute(node.getAttribute('title'));
if (title) title = ' "' + title.replace(/"/g, '\\"') + '"';
return '[' + content + '](' + href + title + ')'
}
};
rules.referenceLink = {
filter: function (node, options) {
return (
options.linkStyle === 'referenced' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
},
replacement: function (content, node, options) {
var href = node.getAttribute('href');
var title = cleanAttribute(node.getAttribute('title'));
if (title) title = ' "' + title + '"';
var replacement;
var reference;
switch (options.linkReferenceStyle) {
case 'collapsed':
replacement = '[' + content + '][]';
reference = '[' + content + ']: ' + href + title;
break
case 'shortcut':
replacement = '[' + content + ']';
reference = '[' + content + ']: ' + href + title;
break
default:
var id = this.references.length + 1;
replacement = '[' + content + '][' + id + ']';
reference = '[' + id + ']: ' + href + title;
}
this.references.push(reference);
return replacement
},
references: [],
append: function (options) {
var references = '';
if (this.references.length) {
references = '\n\n' + this.references.join('\n') + '\n\n';
this.references = []; // Reset references
}
return references
}
};
rules.emphasis = {
filter: ['em', 'i'],
replacement: function (content, node, options) {
if (!content.trim()) return ''
return options.emDelimiter + content + options.emDelimiter
}
};
rules.strong = {
filter: ['strong', 'b'],
replacement: function (content, node, options) {
if (!content.trim()) return ''
return options.strongDelimiter + content + options.strongDelimiter
}
};
rules.code = {
filter: function (node) {
var hasSiblings = node.previousSibling || node.nextSibling;
var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
return node.nodeName === 'CODE' && !isCodeBlock
},
replacement: function (content) {
if (!content) return ''
content = content.replace(/\r?\n|\r/g, ' ');
var extraSpace = /^`|^ .*?[^ ].* $|`$/.test(content) ? ' ' : '';
var delimiter = '`';
var matches = content.match(/`+/gm) || [];
while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
return delimiter + extraSpace + content + extraSpace + delimiter
}
};
rules.image = {
filter: 'img',
replacement: function (content, node) {
var alt = cleanAttribute(node.getAttribute('alt'));
var src = node.getAttribute('src') || '';
var title = cleanAttribute(node.getAttribute('title'));
var titlePart = title ? ' "' + title + '"' : '';
return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
}
};
function cleanAttribute (attribute) {
return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : ''
}
/**
* Manages a collection of rules used to convert HTML to Markdown
*/
function Rules (options) {
this.options = options;
this._keep = [];
this._remove = [];
this.blankRule = {
replacement: options.blankReplacement
};
this.keepReplacement = options.keepReplacement;
this.defaultRule = {
replacement: options.defaultReplacement
};
this.array = [];
for (var key in options.rules) this.array.push(options.rules[key]);
}
Rules.prototype = {
add: function (key, rule) {
this.array.unshift(rule);
},
keep: function (filter) {
this._keep.unshift({
filter: filter,
replacement: this.keepReplacement
});
},
remove: function (filter) {
this._remove.unshift({
filter: filter,
replacement: function () {
return ''
}
});
},
forNode: function (node) {
if (node.isBlank) return this.blankRule
var rule;
if ((rule = findRule(this.array, node, this.options))) return rule
if ((rule = findRule(this._keep, node, this.options))) return rule
if ((rule = findRule(this._remove, node, this.options))) return rule
return this.defaultRule
},
forEach: function (fn) {
for (var i = 0; i < this.array.length; i++) fn(this.array[i], i);
}
};
function findRule (rules, node, options) {
for (var i = 0; i < rules.length; i++) {
var rule = rules[i];
if (filterValue(rule, node, options)) return rule
}
return void 0
}
function filterValue (rule, node, options) {
var filter = rule.filter;
if (typeof filter === 'string') {
if (filter === node.nodeName.toLowerCase()) return true
} else if (Array.isArray(filter)) {
if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true
} else if (typeof filter === 'function') {
if (filter.call(rule, node, options)) return true
} else {
throw new TypeError('`filter` needs to be a string, array, or function')
}
}
/**
* The collapseWhitespace function is adapted from collapse-whitespace
* by Luc Thevenard.
*
* The MIT License (MIT)
*
* Copyright (c) 2014 Luc Thevenard <lucthevenard@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/**
* collapseWhitespace(options) removes extraneous whitespace from an the given element.
*
* @param {Object} options
*/
function collapseWhitespace (options) {
var element = options.element;
var isBlock = options.isBlock;
var isVoid = options.isVoid;
var isPre = options.isPre || function (node) {
return node.nodeName === 'PRE'
};
if (!element.firstChild || isPre(element)) return
var prevText = null;
var keepLeadingWs = false;
var prev = null;
var node = next(prev, element, isPre);
while (node !== element) {
if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
var text = node.data.replace(/[ \r\n\t]+/g, ' ');
if ((!prevText || / $/.test(prevText.data)) &&
!keepLeadingWs && text[0] === ' ') {
text = text.substr(1);
}
// `text` might be empty at this point.
if (!text) {
node = remove(node);
continue
}
node.data = text;
prevText = node;
} else if (node.nodeType === 1) { // Node.ELEMENT_NODE
if (isBlock(node) || node.nodeName === 'BR') {
if (prevText) {
prevText.data = prevText.data.replace(/ $/, '');
}
prevText = null;
keepLeadingWs = false;
} else if (isVoid(node) || isPre(node)) {
// Avoid trimming space around non-block, non-BR void elements and inline PRE.
prevText = null;
keepLeadingWs = true;
} else if (prevText) {
// Drop protection if set previously.
keepLeadingWs = false;
}
} else {
node = remove(node);
continue
}
var nextNode = next(prev, node, isPre);
prev = node;
node = nextNode;
}
if (prevText) {
prevText.data = prevText.data.replace(/ $/, '');
if (!prevText.data) {
remove(prevText);
}
}
}
/**
* remove(node) removes the given node from the DOM and returns the
* next node in the sequence.
*
* @param {Node} node
* @return {Node} node
*/
function remove (node) {
var next = node.nextSibling || node.parentNode;
node.parentNode.removeChild(node);
return next
}
/**
* next(prev, current, isPre) returns the next node in the sequence, given the
* current and previous nodes.
*
* @param {Node} prev
* @param {Node} current
* @param {Function} isPre
* @return {Node}
*/
function next (prev, current, isPre) {
if ((prev && prev.parentNode === current) || isPre(current)) {
return current.nextSibling || current.parentNode
}
return current.firstChild || current.nextSibling || current.parentNode
}
/*
* Set up window for Node.js
*/
var root = (typeof window !== 'undefined' ? window : {});
/*
* Parsing HTML strings
*/
function canParseHTMLNatively () {
var Parser = root.DOMParser;
var canParse = false;
// Adapted from https://gist.github.com/1129031
// Firefox/Opera/IE throw errors on unsupported types
try {
// WebKit returns null on unsupported types
if (new Parser().parseFromString('', 'text/html')) {
canParse = true;
}
} catch (e) {}
return canParse
}
function createHTMLParser () {
var Parser = function () {};
{
if (shouldUseActiveX()) {
Parser.prototype.parseFromString = function (string) {
var doc = new window.ActiveXObject('htmlfile');
doc.designMode = 'on'; // disable on-page scripts
doc.open();
doc.write(string);
doc.close();
return doc
};
} else {
Parser.prototype.parseFromString = function (string) {
var doc = document.implementation.createHTMLDocument('');
doc.open();
doc.write(string);
doc.close();
return doc
};
}
}
return Parser
}
function shouldUseActiveX () {
var useActiveX = false;
try {
document.implementation.createHTMLDocument('').open();
} catch (e) {
if (root.ActiveXObject) useActiveX = true;
}
return useActiveX
}
var HTMLParser = canParseHTMLNatively() ? root.DOMParser : createHTMLParser();
function RootNode (input, options) {
var root;
if (typeof input === 'string') {
var doc = htmlParser().parseFromString(
// DOM parsers arrange elements in the <head> and <body>.
// Wrapping in a custom element ensures elements are reliably arranged in
// a single element.
'<x-turndown id="turndown-root">' + input + '</x-turndown>',
'text/html'
);
root = doc.getElementById('turndown-root');
} else {
root = input.cloneNode(true);
}
collapseWhitespace({
element: root,
isBlock: isBlock,
isVoid: isVoid,
isPre: options.preformattedCode ? isPreOrCode : null
});
return root
}
var _htmlParser;
function htmlParser () {
_htmlParser = _htmlParser || new HTMLParser();
return _htmlParser
}
function isPreOrCode (node) {
return node.nodeName === 'PRE' || node.nodeName === 'CODE'
}
function Node (node, options) {
node.isBlock = isBlock(node);
node.isCode = node.nodeName === 'CODE' || node.parentNode.isCode;
node.isBlank = isBlank(node);
node.flankingWhitespace = flankingWhitespace(node, options);
return node
}
function isBlank (node) {
return (
!isVoid(node) &&
!isMeaningfulWhenBlank(node) &&
/^\s*$/i.test(node.textContent) &&
!hasVoid(node) &&
!hasMeaningfulWhenBlank(node)
)
}
function flankingWhitespace (node, options) {
if (node.isBlock || (options.preformattedCode && node.isCode)) {
return { leading: '', trailing: '' }
}
var edges = edgeWhitespace(node.textContent);
// abandon leading ASCII WS if left-flanked by ASCII WS
if (edges.leadingAscii && isFlankedByWhitespace('left', node, options)) {
edges.leading = edges.leadingNonAscii;
}
// abandon trailing ASCII WS if right-flanked by ASCII WS
if (edges.trailingAscii && isFlankedByWhitespace('right', node, options)) {
edges.trailing = edges.trailingNonAscii;
}
return { leading: edges.leading, trailing: edges.trailing }
}
function edgeWhitespace (string) {
var m = string.match(/^(([ \t\r\n]*)(\s*))(?:(?=\S)[\s\S]*\S)?((\s*?)([ \t\r\n]*))$/);
return {
leading: m[1], // whole string for whitespace-only strings
leadingAscii: m[2],
leadingNonAscii: m[3],
trailing: m[4], // empty for whitespace-only strings
trailingNonAscii: m[5],
trailingAscii: m[6]
}
}
function isFlankedByWhitespace (side, node, options) {
var sibling;
var regExp;
var isFlanked;
if (side === 'left') {
sibling = node.previousSibling;
regExp = / $/;
} else {
sibling = node.nextSibling;
regExp = /^ /;
}
if (sibling) {
if (sibling.nodeType === 3) {
isFlanked = regExp.test(sibling.nodeValue);
} else if (options.preformattedCode && sibling.nodeName === 'CODE') {
isFlanked = false;
} else if (sibling.nodeType === 1 && !isBlock(sibling)) {
isFlanked = regExp.test(sibling.textContent);
}
}
return isFlanked
}
var reduce = Array.prototype.reduce;
var escapes = [
[/\\/g, '\\\\'],
[/\*/g, '\\*'],
[/^-/g, '\\-'],
[/^\+ /g, '\\+ '],
[/^(=+)/g, '\\$1'],
[/^(#{1,6}) /g, '\\$1 '],
[/`/g, '\\`'],
[/^~~~/g, '\\~~~'],
[/\[/g, '\\['],
[/\]/g, '\\]'],
[/^>/g, '\\>'],
[/_/g, '\\_'],
[/^(\d+)\. /g, '$1\\. ']
];
function TurndownService (options) {
if (!(this instanceof TurndownService)) return new TurndownService(options)
var defaults = {
rules: rules,
headingStyle: 'setext',
hr: '* * *',
bulletListMarker: '*',
codeBlockStyle: 'indented',
fence: '```',
emDelimiter: '_',
strongDelimiter: '**',
linkStyle: 'inlined',
linkReferenceStyle: 'full',
br: ' ',
preformattedCode: false,
blankReplacement: function (content, node) {
return node.isBlock ? '\n\n' : ''
},
keepReplacement: function (content, node) {
return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML
},
defaultReplacement: function (content, node) {
return node.isBlock ? '\n\n' + content + '\n\n' : content
}
};
this.options = extend({}, defaults, options);
this.rules = new Rules(this.options);
}
TurndownService.prototype = {
/**
* The entry point for converting a string or DOM node to Markdown
* @public
* @param {String|HTMLElement} input The string or DOM node to convert
* @returns A Markdown representation of the input
* @type String
*/
turndown: function (input) {
if (!canConvert(input)) {
throw new TypeError(
input + ' is not a string, or an element/document/fragment node.'
)
}
if (input === '') return ''
var output = process.call(this, new RootNode(input, this.options));
return postProcess.call(this, output)
},
/**
* Add one or more plugins
* @public
* @param {Function|Array} plugin The plugin or array of plugins to add
* @returns The Turndown instance for chaining
* @type Object
*/
use: function (plugin) {
if (Array.isArray(plugin)) {
for (var i = 0; i < plugin.length; i++) this.use(plugin[i]);
} else if (typeof plugin === 'function') {
plugin(this);
} else {
throw new TypeError('plugin must be a Function or an Array of Functions')
}
return this
},
/**
* Adds a rule
* @public
* @param {String} key The unique key of the rule
* @param {Object} rule The rule
* @returns The Turndown instance for chaining
* @type Object
*/
addRule: function (key, rule) {
this.rules.add(key, rule);
return this
},
/**
* Keep a node (as HTML) that matches the filter
* @public
* @param {String|Array|Function} filter The unique key of the rule
* @returns The Turndown instance for chaining
* @type Object
*/
keep: function (filter) {
this.rules.keep(filter);
return this
},
/**
* Remove a node that matches the filter
* @public
* @param {String|Array|Function} filter The unique key of the rule
* @returns The Turndown instance for chaining
* @type Object
*/
remove: function (filter) {
this.rules.remove(filter);
return this
},
/**
* Escapes Markdown syntax
* @public
* @param {String} string The string to escape
* @returns A string with Markdown syntax escaped
* @type String
*/
escape: function (string) {
return escapes.reduce(function (accumulator, escape) {
return accumulator.replace(escape[0], escape[1])
}, string)
}
};
/**
* Reduces a DOM node down to its Markdown string equivalent
* @private
* @param {HTMLElement} parentNode The node to convert
* @returns A Markdown representation of the node
* @type String
*/
function process (parentNode) {
var self = this;
return reduce.call(parentNode.childNodes, function (output, node) {
node = new Node(node, self.options);
var replacement = '';
if (node.nodeType === 3) {
replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
} else if (node.nodeType === 1) {
replacement = replacementForNode.call(self, node);
}
return join(output, replacement)
}, '')
}
/**
* Appends strings as each rule requires and trims the output
* @private
* @param {String} output The conversion output
* @returns A trimmed version of the ouput
* @type String
*/
function postProcess (output) {
var self = this;
this.rules.forEach(function (rule) {
if (typeof rule.append === 'function') {
output = join(output, rule.append(self.options));
}
});
return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
}
/**
* Converts an element node to its Markdown equivalent
* @private
* @param {HTMLElement} node The node to convert
* @returns A Markdown representation of the node
* @type String
*/
function replacementForNode (node) {
var rule = this.rules.forNode(node);
var content = process.call(this, node);
var whitespace = node.flankingWhitespace;
if (whitespace.leading || whitespace.trailing) content = content.trim();
return (
whitespace.leading +
rule.replacement(content, node, this.options) +
whitespace.trailing
)
}
/**
* Joins replacement to the current output with appropriate number of new lines
* @private
* @param {String} output The current conversion output
* @param {String} replacement The string to append to the output
* @returns Joined output
* @type String
*/
function join (output, replacement) {
var s1 = trimTrailingNewlines(output);
var s2 = trimLeadingNewlines(replacement);
var nls = Math.max(output.length - s1.length, replacement.length - s2.length);
var separator = '\n\n'.substring(0, nls);
return s1 + separator + s2
}
/**
* Determines whether an input can be converted
* @private
* @param {String|HTMLElement} input Describe this parameter
* @returns Describe what it returns
* @type String|Object|Array|Boolean|Number
*/
function canConvert (input) {
return (
input != null && (
typeof input === 'string' ||
(input.nodeType && (
input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
))
)
)
}
return TurndownService;
})));

View File

@@ -0,0 +1,949 @@
'use strict';
function extend (destination) {
for (var i = 1; i < arguments.length; i++) {
var source = arguments[i];
for (var key in source) {
if (source.hasOwnProperty(key)) destination[key] = source[key];
}
}
return destination
}
function repeat (character, count) {
return Array(count + 1).join(character)
}
function trimLeadingNewlines (string) {
return string.replace(/^\n*/, '')
}
function trimTrailingNewlines (string) {
// avoid match-at-end regexp bottleneck, see #370
var indexEnd = string.length;
while (indexEnd > 0 && string[indexEnd - 1] === '\n') indexEnd--;
return string.substring(0, indexEnd)
}
function trimNewlines (string) {
return trimTrailingNewlines(trimLeadingNewlines(string))
}
var blockElements = [
'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS',
'CENTER', 'DD', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE',
'FOOTER', 'FORM', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HEADER',
'HGROUP', 'HR', 'HTML', 'ISINDEX', 'LI', 'MAIN', 'MENU', 'NAV', 'NOFRAMES',
'NOSCRIPT', 'OL', 'OUTPUT', 'P', 'PRE', 'SECTION', 'TABLE', 'TBODY', 'TD',
'TFOOT', 'TH', 'THEAD', 'TR', 'UL'
];
function isBlock (node) {
return is(node, blockElements)
}
var voidElements = [
'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT',
'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR'
];
function isVoid (node) {
return is(node, voidElements)
}
function hasVoid (node) {
return has(node, voidElements)
}
var meaningfulWhenBlankElements = [
'A', 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TH', 'TD', 'IFRAME', 'SCRIPT',
'AUDIO', 'VIDEO'
];
function isMeaningfulWhenBlank (node) {
return is(node, meaningfulWhenBlankElements)
}
function hasMeaningfulWhenBlank (node) {
return has(node, meaningfulWhenBlankElements)
}
function is (node, tagNames) {
return tagNames.indexOf(node.nodeName) >= 0
}
function has (node, tagNames) {
return (
node.getElementsByTagName &&
tagNames.some(function (tagName) {
return node.getElementsByTagName(tagName).length
})
)
}
var rules = {};
rules.paragraph = {
filter: 'p',
replacement: function (content) {
return '\n\n' + content + '\n\n'
}
};
rules.lineBreak = {
filter: 'br',
replacement: function (content, node, options) {
return options.br + '\n'
}
};
rules.heading = {
filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
replacement: function (content, node, options) {
var hLevel = Number(node.nodeName.charAt(1));
if (options.headingStyle === 'setext' && hLevel < 3) {
var underline = repeat((hLevel === 1 ? '=' : '-'), content.length);
return (
'\n\n' + content + '\n' + underline + '\n\n'
)
} else {
return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
}
}
};
rules.blockquote = {
filter: 'blockquote',
replacement: function (content) {
content = trimNewlines(content).replace(/^/gm, '> ');
return '\n\n' + content + '\n\n'
}
};
rules.list = {
filter: ['ul', 'ol'],
replacement: function (content, node) {
var parent = node.parentNode;
if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
return '\n' + content
} else {
return '\n\n' + content + '\n\n'
}
}
};
rules.listItem = {
filter: 'li',
replacement: function (content, node, options) {
var prefix = options.bulletListMarker + ' ';
var parent = node.parentNode;
if (parent.nodeName === 'OL') {
var start = parent.getAttribute('start');
var index = Array.prototype.indexOf.call(parent.children, node);
prefix = (start ? Number(start) + index : index + 1) + '. ';
}
var isParagraph = /\n$/.test(content);
content = trimNewlines(content) + (isParagraph ? '\n' : '');
content = content.replace(/\n/gm, '\n' + ' '.repeat(prefix.length)); // indent
return (
prefix + content + (node.nextSibling ? '\n' : '')
)
}
};
rules.indentedCodeBlock = {
filter: function (node, options) {
return (
options.codeBlockStyle === 'indented' &&
node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE'
)
},
replacement: function (content, node, options) {
return (
'\n\n ' +
node.firstChild.textContent.replace(/\n/g, '\n ') +
'\n\n'
)
}
};
rules.fencedCodeBlock = {
filter: function (node, options) {
return (
options.codeBlockStyle === 'fenced' &&
node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE'
)
},
replacement: function (content, node, options) {
var className = node.firstChild.getAttribute('class') || '';
var language = (className.match(/language-(\S+)/) || [null, ''])[1];
var code = node.firstChild.textContent;
var fenceChar = options.fence.charAt(0);
var fenceSize = 3;
var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm');
var match;
while ((match = fenceInCodeRegex.exec(code))) {
if (match[0].length >= fenceSize) {
fenceSize = match[0].length + 1;
}
}
var fence = repeat(fenceChar, fenceSize);
return (
'\n\n' + fence + language + '\n' +
code.replace(/\n$/, '') +
'\n' + fence + '\n\n'
)
}
};
rules.horizontalRule = {
filter: 'hr',
replacement: function (content, node, options) {
return '\n\n' + options.hr + '\n\n'
}
};
rules.inlineLink = {
filter: function (node, options) {
return (
options.linkStyle === 'inlined' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
},
replacement: function (content, node) {
var href = node.getAttribute('href');
if (href) href = href.replace(/([()])/g, '\\$1');
var title = cleanAttribute(node.getAttribute('title'));
if (title) title = ' "' + title.replace(/"/g, '\\"') + '"';
return '[' + content + '](' + href + title + ')'
}
};
rules.referenceLink = {
filter: function (node, options) {
return (
options.linkStyle === 'referenced' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
},
replacement: function (content, node, options) {
var href = node.getAttribute('href');
var title = cleanAttribute(node.getAttribute('title'));
if (title) title = ' "' + title + '"';
var replacement;
var reference;
switch (options.linkReferenceStyle) {
case 'collapsed':
replacement = '[' + content + '][]';
reference = '[' + content + ']: ' + href + title;
break
case 'shortcut':
replacement = '[' + content + ']';
reference = '[' + content + ']: ' + href + title;
break
default:
var id = this.references.length + 1;
replacement = '[' + content + '][' + id + ']';
reference = '[' + id + ']: ' + href + title;
}
this.references.push(reference);
return replacement
},
references: [],
append: function (options) {
var references = '';
if (this.references.length) {
references = '\n\n' + this.references.join('\n') + '\n\n';
this.references = []; // Reset references
}
return references
}
};
rules.emphasis = {
filter: ['em', 'i'],
replacement: function (content, node, options) {
if (!content.trim()) return ''
return options.emDelimiter + content + options.emDelimiter
}
};
rules.strong = {
filter: ['strong', 'b'],
replacement: function (content, node, options) {
if (!content.trim()) return ''
return options.strongDelimiter + content + options.strongDelimiter
}
};
rules.code = {
filter: function (node) {
var hasSiblings = node.previousSibling || node.nextSibling;
var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
return node.nodeName === 'CODE' && !isCodeBlock
},
replacement: function (content) {
if (!content) return ''
content = content.replace(/\r?\n|\r/g, ' ');
var extraSpace = /^`|^ .*?[^ ].* $|`$/.test(content) ? ' ' : '';
var delimiter = '`';
var matches = content.match(/`+/gm) || [];
while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
return delimiter + extraSpace + content + extraSpace + delimiter
}
};
rules.image = {
filter: 'img',
replacement: function (content, node) {
var alt = cleanAttribute(node.getAttribute('alt'));
var src = node.getAttribute('src') || '';
var title = cleanAttribute(node.getAttribute('title'));
var titlePart = title ? ' "' + title + '"' : '';
return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
}
};
function cleanAttribute (attribute) {
return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : ''
}
/**
* Manages a collection of rules used to convert HTML to Markdown
*/
function Rules (options) {
this.options = options;
this._keep = [];
this._remove = [];
this.blankRule = {
replacement: options.blankReplacement
};
this.keepReplacement = options.keepReplacement;
this.defaultRule = {
replacement: options.defaultReplacement
};
this.array = [];
for (var key in options.rules) this.array.push(options.rules[key]);
}
Rules.prototype = {
add: function (key, rule) {
this.array.unshift(rule);
},
keep: function (filter) {
this._keep.unshift({
filter: filter,
replacement: this.keepReplacement
});
},
remove: function (filter) {
this._remove.unshift({
filter: filter,
replacement: function () {
return ''
}
});
},
forNode: function (node) {
if (node.isBlank) return this.blankRule
var rule;
if ((rule = findRule(this.array, node, this.options))) return rule
if ((rule = findRule(this._keep, node, this.options))) return rule
if ((rule = findRule(this._remove, node, this.options))) return rule
return this.defaultRule
},
forEach: function (fn) {
for (var i = 0; i < this.array.length; i++) fn(this.array[i], i);
}
};
function findRule (rules, node, options) {
for (var i = 0; i < rules.length; i++) {
var rule = rules[i];
if (filterValue(rule, node, options)) return rule
}
return void 0
}
function filterValue (rule, node, options) {
var filter = rule.filter;
if (typeof filter === 'string') {
if (filter === node.nodeName.toLowerCase()) return true
} else if (Array.isArray(filter)) {
if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true
} else if (typeof filter === 'function') {
if (filter.call(rule, node, options)) return true
} else {
throw new TypeError('`filter` needs to be a string, array, or function')
}
}
/**
* The collapseWhitespace function is adapted from collapse-whitespace
* by Luc Thevenard.
*
* The MIT License (MIT)
*
* Copyright (c) 2014 Luc Thevenard <lucthevenard@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/**
* collapseWhitespace(options) removes extraneous whitespace from an the given element.
*
* @param {Object} options
*/
function collapseWhitespace (options) {
var element = options.element;
var isBlock = options.isBlock;
var isVoid = options.isVoid;
var isPre = options.isPre || function (node) {
return node.nodeName === 'PRE'
};
if (!element.firstChild || isPre(element)) return
var prevText = null;
var keepLeadingWs = false;
var prev = null;
var node = next(prev, element, isPre);
while (node !== element) {
if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
var text = node.data.replace(/[ \r\n\t]+/g, ' ');
if ((!prevText || / $/.test(prevText.data)) &&
!keepLeadingWs && text[0] === ' ') {
text = text.substr(1);
}
// `text` might be empty at this point.
if (!text) {
node = remove(node);
continue
}
node.data = text;
prevText = node;
} else if (node.nodeType === 1) { // Node.ELEMENT_NODE
if (isBlock(node) || node.nodeName === 'BR') {
if (prevText) {
prevText.data = prevText.data.replace(/ $/, '');
}
prevText = null;
keepLeadingWs = false;
} else if (isVoid(node) || isPre(node)) {
// Avoid trimming space around non-block, non-BR void elements and inline PRE.
prevText = null;
keepLeadingWs = true;
} else if (prevText) {
// Drop protection if set previously.
keepLeadingWs = false;
}
} else {
node = remove(node);
continue
}
var nextNode = next(prev, node, isPre);
prev = node;
node = nextNode;
}
if (prevText) {
prevText.data = prevText.data.replace(/ $/, '');
if (!prevText.data) {
remove(prevText);
}
}
}
/**
* remove(node) removes the given node from the DOM and returns the
* next node in the sequence.
*
* @param {Node} node
* @return {Node} node
*/
function remove (node) {
var next = node.nextSibling || node.parentNode;
node.parentNode.removeChild(node);
return next
}
/**
* next(prev, current, isPre) returns the next node in the sequence, given the
* current and previous nodes.
*
* @param {Node} prev
* @param {Node} current
* @param {Function} isPre
* @return {Node}
*/
function next (prev, current, isPre) {
if ((prev && prev.parentNode === current) || isPre(current)) {
return current.nextSibling || current.parentNode
}
return current.firstChild || current.nextSibling || current.parentNode
}
/*
* Set up window for Node.js
*/
var root = (typeof window !== 'undefined' ? window : {});
/*
* Parsing HTML strings
*/
function canParseHTMLNatively () {
var Parser = root.DOMParser;
var canParse = false;
// Adapted from https://gist.github.com/1129031
// Firefox/Opera/IE throw errors on unsupported types
try {
// WebKit returns null on unsupported types
if (new Parser().parseFromString('', 'text/html')) {
canParse = true;
}
} catch (e) {}
return canParse
}
function createHTMLParser () {
var Parser = function () {};
{
var domino = require('@mixmark-io/domino');
Parser.prototype.parseFromString = function (string) {
return domino.createDocument(string)
};
}
return Parser
}
var HTMLParser = canParseHTMLNatively() ? root.DOMParser : createHTMLParser();
function RootNode (input, options) {
var root;
if (typeof input === 'string') {
var doc = htmlParser().parseFromString(
// DOM parsers arrange elements in the <head> and <body>.
// Wrapping in a custom element ensures elements are reliably arranged in
// a single element.
'<x-turndown id="turndown-root">' + input + '</x-turndown>',
'text/html'
);
root = doc.getElementById('turndown-root');
} else {
root = input.cloneNode(true);
}
collapseWhitespace({
element: root,
isBlock: isBlock,
isVoid: isVoid,
isPre: options.preformattedCode ? isPreOrCode : null
});
return root
}
var _htmlParser;
function htmlParser () {
_htmlParser = _htmlParser || new HTMLParser();
return _htmlParser
}
function isPreOrCode (node) {
return node.nodeName === 'PRE' || node.nodeName === 'CODE'
}
function Node (node, options) {
node.isBlock = isBlock(node);
node.isCode = node.nodeName === 'CODE' || node.parentNode.isCode;
node.isBlank = isBlank(node);
node.flankingWhitespace = flankingWhitespace(node, options);
return node
}
function isBlank (node) {
return (
!isVoid(node) &&
!isMeaningfulWhenBlank(node) &&
/^\s*$/i.test(node.textContent) &&
!hasVoid(node) &&
!hasMeaningfulWhenBlank(node)
)
}
function flankingWhitespace (node, options) {
if (node.isBlock || (options.preformattedCode && node.isCode)) {
return { leading: '', trailing: '' }
}
var edges = edgeWhitespace(node.textContent);
// abandon leading ASCII WS if left-flanked by ASCII WS
if (edges.leadingAscii && isFlankedByWhitespace('left', node, options)) {
edges.leading = edges.leadingNonAscii;
}
// abandon trailing ASCII WS if right-flanked by ASCII WS
if (edges.trailingAscii && isFlankedByWhitespace('right', node, options)) {
edges.trailing = edges.trailingNonAscii;
}
return { leading: edges.leading, trailing: edges.trailing }
}
function edgeWhitespace (string) {
var m = string.match(/^(([ \t\r\n]*)(\s*))(?:(?=\S)[\s\S]*\S)?((\s*?)([ \t\r\n]*))$/);
return {
leading: m[1], // whole string for whitespace-only strings
leadingAscii: m[2],
leadingNonAscii: m[3],
trailing: m[4], // empty for whitespace-only strings
trailingNonAscii: m[5],
trailingAscii: m[6]
}
}
function isFlankedByWhitespace (side, node, options) {
var sibling;
var regExp;
var isFlanked;
if (side === 'left') {
sibling = node.previousSibling;
regExp = / $/;
} else {
sibling = node.nextSibling;
regExp = /^ /;
}
if (sibling) {
if (sibling.nodeType === 3) {
isFlanked = regExp.test(sibling.nodeValue);
} else if (options.preformattedCode && sibling.nodeName === 'CODE') {
isFlanked = false;
} else if (sibling.nodeType === 1 && !isBlock(sibling)) {
isFlanked = regExp.test(sibling.textContent);
}
}
return isFlanked
}
var reduce = Array.prototype.reduce;
var escapes = [
[/\\/g, '\\\\'],
[/\*/g, '\\*'],
[/^-/g, '\\-'],
[/^\+ /g, '\\+ '],
[/^(=+)/g, '\\$1'],
[/^(#{1,6}) /g, '\\$1 '],
[/`/g, '\\`'],
[/^~~~/g, '\\~~~'],
[/\[/g, '\\['],
[/\]/g, '\\]'],
[/^>/g, '\\>'],
[/_/g, '\\_'],
[/^(\d+)\. /g, '$1\\. ']
];
function TurndownService (options) {
if (!(this instanceof TurndownService)) return new TurndownService(options)
var defaults = {
rules: rules,
headingStyle: 'setext',
hr: '* * *',
bulletListMarker: '*',
codeBlockStyle: 'indented',
fence: '```',
emDelimiter: '_',
strongDelimiter: '**',
linkStyle: 'inlined',
linkReferenceStyle: 'full',
br: ' ',
preformattedCode: false,
blankReplacement: function (content, node) {
return node.isBlock ? '\n\n' : ''
},
keepReplacement: function (content, node) {
return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML
},
defaultReplacement: function (content, node) {
return node.isBlock ? '\n\n' + content + '\n\n' : content
}
};
this.options = extend({}, defaults, options);
this.rules = new Rules(this.options);
}
TurndownService.prototype = {
/**
* The entry point for converting a string or DOM node to Markdown
* @public
* @param {String|HTMLElement} input The string or DOM node to convert
* @returns A Markdown representation of the input
* @type String
*/
turndown: function (input) {
if (!canConvert(input)) {
throw new TypeError(
input + ' is not a string, or an element/document/fragment node.'
)
}
if (input === '') return ''
var output = process.call(this, new RootNode(input, this.options));
return postProcess.call(this, output)
},
/**
* Add one or more plugins
* @public
* @param {Function|Array} plugin The plugin or array of plugins to add
* @returns The Turndown instance for chaining
* @type Object
*/
use: function (plugin) {
if (Array.isArray(plugin)) {
for (var i = 0; i < plugin.length; i++) this.use(plugin[i]);
} else if (typeof plugin === 'function') {
plugin(this);
} else {
throw new TypeError('plugin must be a Function or an Array of Functions')
}
return this
},
/**
* Adds a rule
* @public
* @param {String} key The unique key of the rule
* @param {Object} rule The rule
* @returns The Turndown instance for chaining
* @type Object
*/
addRule: function (key, rule) {
this.rules.add(key, rule);
return this
},
/**
* Keep a node (as HTML) that matches the filter
* @public
* @param {String|Array|Function} filter The unique key of the rule
* @returns The Turndown instance for chaining
* @type Object
*/
keep: function (filter) {
this.rules.keep(filter);
return this
},
/**
* Remove a node that matches the filter
* @public
* @param {String|Array|Function} filter The unique key of the rule
* @returns The Turndown instance for chaining
* @type Object
*/
remove: function (filter) {
this.rules.remove(filter);
return this
},
/**
* Escapes Markdown syntax
* @public
* @param {String} string The string to escape
* @returns A string with Markdown syntax escaped
* @type String
*/
escape: function (string) {
return escapes.reduce(function (accumulator, escape) {
return accumulator.replace(escape[0], escape[1])
}, string)
}
};
/**
* Reduces a DOM node down to its Markdown string equivalent
* @private
* @param {HTMLElement} parentNode The node to convert
* @returns A Markdown representation of the node
* @type String
*/
function process (parentNode) {
var self = this;
return reduce.call(parentNode.childNodes, function (output, node) {
node = new Node(node, self.options);
var replacement = '';
if (node.nodeType === 3) {
replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
} else if (node.nodeType === 1) {
replacement = replacementForNode.call(self, node);
}
return join(output, replacement)
}, '')
}
/**
* Appends strings as each rule requires and trims the output
* @private
* @param {String} output The conversion output
* @returns A trimmed version of the ouput
* @type String
*/
function postProcess (output) {
var self = this;
this.rules.forEach(function (rule) {
if (typeof rule.append === 'function') {
output = join(output, rule.append(self.options));
}
});
return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
}
/**
* Converts an element node to its Markdown equivalent
* @private
* @param {HTMLElement} node The node to convert
* @returns A Markdown representation of the node
* @type String
*/
function replacementForNode (node) {
var rule = this.rules.forNode(node);
var content = process.call(this, node);
var whitespace = node.flankingWhitespace;
if (whitespace.leading || whitespace.trailing) content = content.trim();
return (
whitespace.leading +
rule.replacement(content, node, this.options) +
whitespace.trailing
)
}
/**
* Joins replacement to the current output with appropriate number of new lines
* @private
* @param {String} output The current conversion output
* @param {String} replacement The string to append to the output
* @returns Joined output
* @type String
*/
function join (output, replacement) {
var s1 = trimTrailingNewlines(output);
var s2 = trimLeadingNewlines(replacement);
var nls = Math.max(output.length - s1.length, replacement.length - s2.length);
var separator = '\n\n'.substring(0, nls);
return s1 + separator + s2
}
/**
* Determines whether an input can be converted
* @private
* @param {String|HTMLElement} input Describe this parameter
* @returns Describe what it returns
* @type String|Object|Array|Boolean|Number
*/
function canConvert (input) {
return (
input != null && (
typeof input === 'string' ||
(input.nodeType && (
input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
))
)
)
}
module.exports = TurndownService;

View File

@@ -0,0 +1,947 @@
function extend (destination) {
for (var i = 1; i < arguments.length; i++) {
var source = arguments[i];
for (var key in source) {
if (source.hasOwnProperty(key)) destination[key] = source[key];
}
}
return destination
}
function repeat (character, count) {
return Array(count + 1).join(character)
}
function trimLeadingNewlines (string) {
return string.replace(/^\n*/, '')
}
function trimTrailingNewlines (string) {
// avoid match-at-end regexp bottleneck, see #370
var indexEnd = string.length;
while (indexEnd > 0 && string[indexEnd - 1] === '\n') indexEnd--;
return string.substring(0, indexEnd)
}
function trimNewlines (string) {
return trimTrailingNewlines(trimLeadingNewlines(string))
}
var blockElements = [
'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS',
'CENTER', 'DD', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE',
'FOOTER', 'FORM', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HEADER',
'HGROUP', 'HR', 'HTML', 'ISINDEX', 'LI', 'MAIN', 'MENU', 'NAV', 'NOFRAMES',
'NOSCRIPT', 'OL', 'OUTPUT', 'P', 'PRE', 'SECTION', 'TABLE', 'TBODY', 'TD',
'TFOOT', 'TH', 'THEAD', 'TR', 'UL'
];
function isBlock (node) {
return is(node, blockElements)
}
var voidElements = [
'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT',
'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR'
];
function isVoid (node) {
return is(node, voidElements)
}
function hasVoid (node) {
return has(node, voidElements)
}
var meaningfulWhenBlankElements = [
'A', 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TH', 'TD', 'IFRAME', 'SCRIPT',
'AUDIO', 'VIDEO'
];
function isMeaningfulWhenBlank (node) {
return is(node, meaningfulWhenBlankElements)
}
function hasMeaningfulWhenBlank (node) {
return has(node, meaningfulWhenBlankElements)
}
function is (node, tagNames) {
return tagNames.indexOf(node.nodeName) >= 0
}
function has (node, tagNames) {
return (
node.getElementsByTagName &&
tagNames.some(function (tagName) {
return node.getElementsByTagName(tagName).length
})
)
}
var rules = {};
rules.paragraph = {
filter: 'p',
replacement: function (content) {
return '\n\n' + content + '\n\n'
}
};
rules.lineBreak = {
filter: 'br',
replacement: function (content, node, options) {
return options.br + '\n'
}
};
rules.heading = {
filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
replacement: function (content, node, options) {
var hLevel = Number(node.nodeName.charAt(1));
if (options.headingStyle === 'setext' && hLevel < 3) {
var underline = repeat((hLevel === 1 ? '=' : '-'), content.length);
return (
'\n\n' + content + '\n' + underline + '\n\n'
)
} else {
return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
}
}
};
rules.blockquote = {
filter: 'blockquote',
replacement: function (content) {
content = trimNewlines(content).replace(/^/gm, '> ');
return '\n\n' + content + '\n\n'
}
};
rules.list = {
filter: ['ul', 'ol'],
replacement: function (content, node) {
var parent = node.parentNode;
if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
return '\n' + content
} else {
return '\n\n' + content + '\n\n'
}
}
};
rules.listItem = {
filter: 'li',
replacement: function (content, node, options) {
var prefix = options.bulletListMarker + ' ';
var parent = node.parentNode;
if (parent.nodeName === 'OL') {
var start = parent.getAttribute('start');
var index = Array.prototype.indexOf.call(parent.children, node);
prefix = (start ? Number(start) + index : index + 1) + '. ';
}
var isParagraph = /\n$/.test(content);
content = trimNewlines(content) + (isParagraph ? '\n' : '');
content = content.replace(/\n/gm, '\n' + ' '.repeat(prefix.length)); // indent
return (
prefix + content + (node.nextSibling ? '\n' : '')
)
}
};
rules.indentedCodeBlock = {
filter: function (node, options) {
return (
options.codeBlockStyle === 'indented' &&
node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE'
)
},
replacement: function (content, node, options) {
return (
'\n\n ' +
node.firstChild.textContent.replace(/\n/g, '\n ') +
'\n\n'
)
}
};
rules.fencedCodeBlock = {
filter: function (node, options) {
return (
options.codeBlockStyle === 'fenced' &&
node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE'
)
},
replacement: function (content, node, options) {
var className = node.firstChild.getAttribute('class') || '';
var language = (className.match(/language-(\S+)/) || [null, ''])[1];
var code = node.firstChild.textContent;
var fenceChar = options.fence.charAt(0);
var fenceSize = 3;
var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm');
var match;
while ((match = fenceInCodeRegex.exec(code))) {
if (match[0].length >= fenceSize) {
fenceSize = match[0].length + 1;
}
}
var fence = repeat(fenceChar, fenceSize);
return (
'\n\n' + fence + language + '\n' +
code.replace(/\n$/, '') +
'\n' + fence + '\n\n'
)
}
};
rules.horizontalRule = {
filter: 'hr',
replacement: function (content, node, options) {
return '\n\n' + options.hr + '\n\n'
}
};
rules.inlineLink = {
filter: function (node, options) {
return (
options.linkStyle === 'inlined' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
},
replacement: function (content, node) {
var href = node.getAttribute('href');
if (href) href = href.replace(/([()])/g, '\\$1');
var title = cleanAttribute(node.getAttribute('title'));
if (title) title = ' "' + title.replace(/"/g, '\\"') + '"';
return '[' + content + '](' + href + title + ')'
}
};
rules.referenceLink = {
filter: function (node, options) {
return (
options.linkStyle === 'referenced' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
},
replacement: function (content, node, options) {
var href = node.getAttribute('href');
var title = cleanAttribute(node.getAttribute('title'));
if (title) title = ' "' + title + '"';
var replacement;
var reference;
switch (options.linkReferenceStyle) {
case 'collapsed':
replacement = '[' + content + '][]';
reference = '[' + content + ']: ' + href + title;
break
case 'shortcut':
replacement = '[' + content + ']';
reference = '[' + content + ']: ' + href + title;
break
default:
var id = this.references.length + 1;
replacement = '[' + content + '][' + id + ']';
reference = '[' + id + ']: ' + href + title;
}
this.references.push(reference);
return replacement
},
references: [],
append: function (options) {
var references = '';
if (this.references.length) {
references = '\n\n' + this.references.join('\n') + '\n\n';
this.references = []; // Reset references
}
return references
}
};
rules.emphasis = {
filter: ['em', 'i'],
replacement: function (content, node, options) {
if (!content.trim()) return ''
return options.emDelimiter + content + options.emDelimiter
}
};
rules.strong = {
filter: ['strong', 'b'],
replacement: function (content, node, options) {
if (!content.trim()) return ''
return options.strongDelimiter + content + options.strongDelimiter
}
};
rules.code = {
filter: function (node) {
var hasSiblings = node.previousSibling || node.nextSibling;
var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
return node.nodeName === 'CODE' && !isCodeBlock
},
replacement: function (content) {
if (!content) return ''
content = content.replace(/\r?\n|\r/g, ' ');
var extraSpace = /^`|^ .*?[^ ].* $|`$/.test(content) ? ' ' : '';
var delimiter = '`';
var matches = content.match(/`+/gm) || [];
while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
return delimiter + extraSpace + content + extraSpace + delimiter
}
};
rules.image = {
filter: 'img',
replacement: function (content, node) {
var alt = cleanAttribute(node.getAttribute('alt'));
var src = node.getAttribute('src') || '';
var title = cleanAttribute(node.getAttribute('title'));
var titlePart = title ? ' "' + title + '"' : '';
return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
}
};
function cleanAttribute (attribute) {
return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : ''
}
/**
* Manages a collection of rules used to convert HTML to Markdown
*/
function Rules (options) {
this.options = options;
this._keep = [];
this._remove = [];
this.blankRule = {
replacement: options.blankReplacement
};
this.keepReplacement = options.keepReplacement;
this.defaultRule = {
replacement: options.defaultReplacement
};
this.array = [];
for (var key in options.rules) this.array.push(options.rules[key]);
}
Rules.prototype = {
add: function (key, rule) {
this.array.unshift(rule);
},
keep: function (filter) {
this._keep.unshift({
filter: filter,
replacement: this.keepReplacement
});
},
remove: function (filter) {
this._remove.unshift({
filter: filter,
replacement: function () {
return ''
}
});
},
forNode: function (node) {
if (node.isBlank) return this.blankRule
var rule;
if ((rule = findRule(this.array, node, this.options))) return rule
if ((rule = findRule(this._keep, node, this.options))) return rule
if ((rule = findRule(this._remove, node, this.options))) return rule
return this.defaultRule
},
forEach: function (fn) {
for (var i = 0; i < this.array.length; i++) fn(this.array[i], i);
}
};
function findRule (rules, node, options) {
for (var i = 0; i < rules.length; i++) {
var rule = rules[i];
if (filterValue(rule, node, options)) return rule
}
return void 0
}
function filterValue (rule, node, options) {
var filter = rule.filter;
if (typeof filter === 'string') {
if (filter === node.nodeName.toLowerCase()) return true
} else if (Array.isArray(filter)) {
if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true
} else if (typeof filter === 'function') {
if (filter.call(rule, node, options)) return true
} else {
throw new TypeError('`filter` needs to be a string, array, or function')
}
}
/**
* The collapseWhitespace function is adapted from collapse-whitespace
* by Luc Thevenard.
*
* The MIT License (MIT)
*
* Copyright (c) 2014 Luc Thevenard <lucthevenard@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/**
* collapseWhitespace(options) removes extraneous whitespace from an the given element.
*
* @param {Object} options
*/
function collapseWhitespace (options) {
var element = options.element;
var isBlock = options.isBlock;
var isVoid = options.isVoid;
var isPre = options.isPre || function (node) {
return node.nodeName === 'PRE'
};
if (!element.firstChild || isPre(element)) return
var prevText = null;
var keepLeadingWs = false;
var prev = null;
var node = next(prev, element, isPre);
while (node !== element) {
if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
var text = node.data.replace(/[ \r\n\t]+/g, ' ');
if ((!prevText || / $/.test(prevText.data)) &&
!keepLeadingWs && text[0] === ' ') {
text = text.substr(1);
}
// `text` might be empty at this point.
if (!text) {
node = remove(node);
continue
}
node.data = text;
prevText = node;
} else if (node.nodeType === 1) { // Node.ELEMENT_NODE
if (isBlock(node) || node.nodeName === 'BR') {
if (prevText) {
prevText.data = prevText.data.replace(/ $/, '');
}
prevText = null;
keepLeadingWs = false;
} else if (isVoid(node) || isPre(node)) {
// Avoid trimming space around non-block, non-BR void elements and inline PRE.
prevText = null;
keepLeadingWs = true;
} else if (prevText) {
// Drop protection if set previously.
keepLeadingWs = false;
}
} else {
node = remove(node);
continue
}
var nextNode = next(prev, node, isPre);
prev = node;
node = nextNode;
}
if (prevText) {
prevText.data = prevText.data.replace(/ $/, '');
if (!prevText.data) {
remove(prevText);
}
}
}
/**
* remove(node) removes the given node from the DOM and returns the
* next node in the sequence.
*
* @param {Node} node
* @return {Node} node
*/
function remove (node) {
var next = node.nextSibling || node.parentNode;
node.parentNode.removeChild(node);
return next
}
/**
* next(prev, current, isPre) returns the next node in the sequence, given the
* current and previous nodes.
*
* @param {Node} prev
* @param {Node} current
* @param {Function} isPre
* @return {Node}
*/
function next (prev, current, isPre) {
if ((prev && prev.parentNode === current) || isPre(current)) {
return current.nextSibling || current.parentNode
}
return current.firstChild || current.nextSibling || current.parentNode
}
/*
* Set up window for Node.js
*/
var root = (typeof window !== 'undefined' ? window : {});
/*
* Parsing HTML strings
*/
function canParseHTMLNatively () {
var Parser = root.DOMParser;
var canParse = false;
// Adapted from https://gist.github.com/1129031
// Firefox/Opera/IE throw errors on unsupported types
try {
// WebKit returns null on unsupported types
if (new Parser().parseFromString('', 'text/html')) {
canParse = true;
}
} catch (e) {}
return canParse
}
function createHTMLParser () {
var Parser = function () {};
{
var domino = require('@mixmark-io/domino');
Parser.prototype.parseFromString = function (string) {
return domino.createDocument(string)
};
}
return Parser
}
var HTMLParser = canParseHTMLNatively() ? root.DOMParser : createHTMLParser();
function RootNode (input, options) {
var root;
if (typeof input === 'string') {
var doc = htmlParser().parseFromString(
// DOM parsers arrange elements in the <head> and <body>.
// Wrapping in a custom element ensures elements are reliably arranged in
// a single element.
'<x-turndown id="turndown-root">' + input + '</x-turndown>',
'text/html'
);
root = doc.getElementById('turndown-root');
} else {
root = input.cloneNode(true);
}
collapseWhitespace({
element: root,
isBlock: isBlock,
isVoid: isVoid,
isPre: options.preformattedCode ? isPreOrCode : null
});
return root
}
var _htmlParser;
function htmlParser () {
_htmlParser = _htmlParser || new HTMLParser();
return _htmlParser
}
function isPreOrCode (node) {
return node.nodeName === 'PRE' || node.nodeName === 'CODE'
}
function Node (node, options) {
node.isBlock = isBlock(node);
node.isCode = node.nodeName === 'CODE' || node.parentNode.isCode;
node.isBlank = isBlank(node);
node.flankingWhitespace = flankingWhitespace(node, options);
return node
}
function isBlank (node) {
return (
!isVoid(node) &&
!isMeaningfulWhenBlank(node) &&
/^\s*$/i.test(node.textContent) &&
!hasVoid(node) &&
!hasMeaningfulWhenBlank(node)
)
}
function flankingWhitespace (node, options) {
if (node.isBlock || (options.preformattedCode && node.isCode)) {
return { leading: '', trailing: '' }
}
var edges = edgeWhitespace(node.textContent);
// abandon leading ASCII WS if left-flanked by ASCII WS
if (edges.leadingAscii && isFlankedByWhitespace('left', node, options)) {
edges.leading = edges.leadingNonAscii;
}
// abandon trailing ASCII WS if right-flanked by ASCII WS
if (edges.trailingAscii && isFlankedByWhitespace('right', node, options)) {
edges.trailing = edges.trailingNonAscii;
}
return { leading: edges.leading, trailing: edges.trailing }
}
function edgeWhitespace (string) {
var m = string.match(/^(([ \t\r\n]*)(\s*))(?:(?=\S)[\s\S]*\S)?((\s*?)([ \t\r\n]*))$/);
return {
leading: m[1], // whole string for whitespace-only strings
leadingAscii: m[2],
leadingNonAscii: m[3],
trailing: m[4], // empty for whitespace-only strings
trailingNonAscii: m[5],
trailingAscii: m[6]
}
}
function isFlankedByWhitespace (side, node, options) {
var sibling;
var regExp;
var isFlanked;
if (side === 'left') {
sibling = node.previousSibling;
regExp = / $/;
} else {
sibling = node.nextSibling;
regExp = /^ /;
}
if (sibling) {
if (sibling.nodeType === 3) {
isFlanked = regExp.test(sibling.nodeValue);
} else if (options.preformattedCode && sibling.nodeName === 'CODE') {
isFlanked = false;
} else if (sibling.nodeType === 1 && !isBlock(sibling)) {
isFlanked = regExp.test(sibling.textContent);
}
}
return isFlanked
}
var reduce = Array.prototype.reduce;
var escapes = [
[/\\/g, '\\\\'],
[/\*/g, '\\*'],
[/^-/g, '\\-'],
[/^\+ /g, '\\+ '],
[/^(=+)/g, '\\$1'],
[/^(#{1,6}) /g, '\\$1 '],
[/`/g, '\\`'],
[/^~~~/g, '\\~~~'],
[/\[/g, '\\['],
[/\]/g, '\\]'],
[/^>/g, '\\>'],
[/_/g, '\\_'],
[/^(\d+)\. /g, '$1\\. ']
];
function TurndownService (options) {
if (!(this instanceof TurndownService)) return new TurndownService(options)
var defaults = {
rules: rules,
headingStyle: 'setext',
hr: '* * *',
bulletListMarker: '*',
codeBlockStyle: 'indented',
fence: '```',
emDelimiter: '_',
strongDelimiter: '**',
linkStyle: 'inlined',
linkReferenceStyle: 'full',
br: ' ',
preformattedCode: false,
blankReplacement: function (content, node) {
return node.isBlock ? '\n\n' : ''
},
keepReplacement: function (content, node) {
return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML
},
defaultReplacement: function (content, node) {
return node.isBlock ? '\n\n' + content + '\n\n' : content
}
};
this.options = extend({}, defaults, options);
this.rules = new Rules(this.options);
}
TurndownService.prototype = {
/**
* The entry point for converting a string or DOM node to Markdown
* @public
* @param {String|HTMLElement} input The string or DOM node to convert
* @returns A Markdown representation of the input
* @type String
*/
turndown: function (input) {
if (!canConvert(input)) {
throw new TypeError(
input + ' is not a string, or an element/document/fragment node.'
)
}
if (input === '') return ''
var output = process.call(this, new RootNode(input, this.options));
return postProcess.call(this, output)
},
/**
* Add one or more plugins
* @public
* @param {Function|Array} plugin The plugin or array of plugins to add
* @returns The Turndown instance for chaining
* @type Object
*/
use: function (plugin) {
if (Array.isArray(plugin)) {
for (var i = 0; i < plugin.length; i++) this.use(plugin[i]);
} else if (typeof plugin === 'function') {
plugin(this);
} else {
throw new TypeError('plugin must be a Function or an Array of Functions')
}
return this
},
/**
* Adds a rule
* @public
* @param {String} key The unique key of the rule
* @param {Object} rule The rule
* @returns The Turndown instance for chaining
* @type Object
*/
addRule: function (key, rule) {
this.rules.add(key, rule);
return this
},
/**
* Keep a node (as HTML) that matches the filter
* @public
* @param {String|Array|Function} filter The unique key of the rule
* @returns The Turndown instance for chaining
* @type Object
*/
keep: function (filter) {
this.rules.keep(filter);
return this
},
/**
* Remove a node that matches the filter
* @public
* @param {String|Array|Function} filter The unique key of the rule
* @returns The Turndown instance for chaining
* @type Object
*/
remove: function (filter) {
this.rules.remove(filter);
return this
},
/**
* Escapes Markdown syntax
* @public
* @param {String} string The string to escape
* @returns A string with Markdown syntax escaped
* @type String
*/
escape: function (string) {
return escapes.reduce(function (accumulator, escape) {
return accumulator.replace(escape[0], escape[1])
}, string)
}
};
/**
* Reduces a DOM node down to its Markdown string equivalent
* @private
* @param {HTMLElement} parentNode The node to convert
* @returns A Markdown representation of the node
* @type String
*/
function process (parentNode) {
var self = this;
return reduce.call(parentNode.childNodes, function (output, node) {
node = new Node(node, self.options);
var replacement = '';
if (node.nodeType === 3) {
replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
} else if (node.nodeType === 1) {
replacement = replacementForNode.call(self, node);
}
return join(output, replacement)
}, '')
}
/**
* Appends strings as each rule requires and trims the output
* @private
* @param {String} output The conversion output
* @returns A trimmed version of the ouput
* @type String
*/
function postProcess (output) {
var self = this;
this.rules.forEach(function (rule) {
if (typeof rule.append === 'function') {
output = join(output, rule.append(self.options));
}
});
return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
}
/**
* Converts an element node to its Markdown equivalent
* @private
* @param {HTMLElement} node The node to convert
* @returns A Markdown representation of the node
* @type String
*/
function replacementForNode (node) {
var rule = this.rules.forNode(node);
var content = process.call(this, node);
var whitespace = node.flankingWhitespace;
if (whitespace.leading || whitespace.trailing) content = content.trim();
return (
whitespace.leading +
rule.replacement(content, node, this.options) +
whitespace.trailing
)
}
/**
* Joins replacement to the current output with appropriate number of new lines
* @private
* @param {String} output The current conversion output
* @param {String} replacement The string to append to the output
* @returns Joined output
* @type String
*/
function join (output, replacement) {
var s1 = trimTrailingNewlines(output);
var s2 = trimLeadingNewlines(replacement);
var nls = Math.max(output.length - s1.length, replacement.length - s2.length);
var separator = '\n\n'.substring(0, nls);
return s1 + separator + s2
}
/**
* Determines whether an input can be converted
* @private
* @param {String|HTMLElement} input Describe this parameter
* @returns Describe what it returns
* @type String|Object|Array|Boolean|Number
*/
function canConvert (input) {
return (
input != null && (
typeof input === 'string' ||
(input.nodeType && (
input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
))
)
)
}
export default TurndownService;

View File

@@ -0,0 +1,955 @@
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() :
typeof define === 'function' && define.amd ? define(factory) :
(global = typeof globalThis !== 'undefined' ? globalThis : global || self, global.TurndownService = factory());
}(this, (function () { 'use strict';
function extend (destination) {
for (var i = 1; i < arguments.length; i++) {
var source = arguments[i];
for (var key in source) {
if (source.hasOwnProperty(key)) destination[key] = source[key];
}
}
return destination
}
function repeat (character, count) {
return Array(count + 1).join(character)
}
function trimLeadingNewlines (string) {
return string.replace(/^\n*/, '')
}
function trimTrailingNewlines (string) {
// avoid match-at-end regexp bottleneck, see #370
var indexEnd = string.length;
while (indexEnd > 0 && string[indexEnd - 1] === '\n') indexEnd--;
return string.substring(0, indexEnd)
}
function trimNewlines (string) {
return trimTrailingNewlines(trimLeadingNewlines(string))
}
var blockElements = [
'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS',
'CENTER', 'DD', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE',
'FOOTER', 'FORM', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HEADER',
'HGROUP', 'HR', 'HTML', 'ISINDEX', 'LI', 'MAIN', 'MENU', 'NAV', 'NOFRAMES',
'NOSCRIPT', 'OL', 'OUTPUT', 'P', 'PRE', 'SECTION', 'TABLE', 'TBODY', 'TD',
'TFOOT', 'TH', 'THEAD', 'TR', 'UL'
];
function isBlock (node) {
return is(node, blockElements)
}
var voidElements = [
'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT',
'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR'
];
function isVoid (node) {
return is(node, voidElements)
}
function hasVoid (node) {
return has(node, voidElements)
}
var meaningfulWhenBlankElements = [
'A', 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TH', 'TD', 'IFRAME', 'SCRIPT',
'AUDIO', 'VIDEO'
];
function isMeaningfulWhenBlank (node) {
return is(node, meaningfulWhenBlankElements)
}
function hasMeaningfulWhenBlank (node) {
return has(node, meaningfulWhenBlankElements)
}
function is (node, tagNames) {
return tagNames.indexOf(node.nodeName) >= 0
}
function has (node, tagNames) {
return (
node.getElementsByTagName &&
tagNames.some(function (tagName) {
return node.getElementsByTagName(tagName).length
})
)
}
var rules = {};
rules.paragraph = {
filter: 'p',
replacement: function (content) {
return '\n\n' + content + '\n\n'
}
};
rules.lineBreak = {
filter: 'br',
replacement: function (content, node, options) {
return options.br + '\n'
}
};
rules.heading = {
filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
replacement: function (content, node, options) {
var hLevel = Number(node.nodeName.charAt(1));
if (options.headingStyle === 'setext' && hLevel < 3) {
var underline = repeat((hLevel === 1 ? '=' : '-'), content.length);
return (
'\n\n' + content + '\n' + underline + '\n\n'
)
} else {
return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
}
}
};
rules.blockquote = {
filter: 'blockquote',
replacement: function (content) {
content = trimNewlines(content).replace(/^/gm, '> ');
return '\n\n' + content + '\n\n'
}
};
rules.list = {
filter: ['ul', 'ol'],
replacement: function (content, node) {
var parent = node.parentNode;
if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
return '\n' + content
} else {
return '\n\n' + content + '\n\n'
}
}
};
rules.listItem = {
filter: 'li',
replacement: function (content, node, options) {
var prefix = options.bulletListMarker + ' ';
var parent = node.parentNode;
if (parent.nodeName === 'OL') {
var start = parent.getAttribute('start');
var index = Array.prototype.indexOf.call(parent.children, node);
prefix = (start ? Number(start) + index : index + 1) + '. ';
}
var isParagraph = /\n$/.test(content);
content = trimNewlines(content) + (isParagraph ? '\n' : '');
content = content.replace(/\n/gm, '\n' + ' '.repeat(prefix.length)); // indent
return (
prefix + content + (node.nextSibling ? '\n' : '')
)
}
};
rules.indentedCodeBlock = {
filter: function (node, options) {
return (
options.codeBlockStyle === 'indented' &&
node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE'
)
},
replacement: function (content, node, options) {
return (
'\n\n ' +
node.firstChild.textContent.replace(/\n/g, '\n ') +
'\n\n'
)
}
};
rules.fencedCodeBlock = {
filter: function (node, options) {
return (
options.codeBlockStyle === 'fenced' &&
node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE'
)
},
replacement: function (content, node, options) {
var className = node.firstChild.getAttribute('class') || '';
var language = (className.match(/language-(\S+)/) || [null, ''])[1];
var code = node.firstChild.textContent;
var fenceChar = options.fence.charAt(0);
var fenceSize = 3;
var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm');
var match;
while ((match = fenceInCodeRegex.exec(code))) {
if (match[0].length >= fenceSize) {
fenceSize = match[0].length + 1;
}
}
var fence = repeat(fenceChar, fenceSize);
return (
'\n\n' + fence + language + '\n' +
code.replace(/\n$/, '') +
'\n' + fence + '\n\n'
)
}
};
rules.horizontalRule = {
filter: 'hr',
replacement: function (content, node, options) {
return '\n\n' + options.hr + '\n\n'
}
};
rules.inlineLink = {
filter: function (node, options) {
return (
options.linkStyle === 'inlined' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
},
replacement: function (content, node) {
var href = node.getAttribute('href');
if (href) href = href.replace(/([()])/g, '\\$1');
var title = cleanAttribute(node.getAttribute('title'));
if (title) title = ' "' + title.replace(/"/g, '\\"') + '"';
return '[' + content + '](' + href + title + ')'
}
};
rules.referenceLink = {
filter: function (node, options) {
return (
options.linkStyle === 'referenced' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
},
replacement: function (content, node, options) {
var href = node.getAttribute('href');
var title = cleanAttribute(node.getAttribute('title'));
if (title) title = ' "' + title + '"';
var replacement;
var reference;
switch (options.linkReferenceStyle) {
case 'collapsed':
replacement = '[' + content + '][]';
reference = '[' + content + ']: ' + href + title;
break
case 'shortcut':
replacement = '[' + content + ']';
reference = '[' + content + ']: ' + href + title;
break
default:
var id = this.references.length + 1;
replacement = '[' + content + '][' + id + ']';
reference = '[' + id + ']: ' + href + title;
}
this.references.push(reference);
return replacement
},
references: [],
append: function (options) {
var references = '';
if (this.references.length) {
references = '\n\n' + this.references.join('\n') + '\n\n';
this.references = []; // Reset references
}
return references
}
};
rules.emphasis = {
filter: ['em', 'i'],
replacement: function (content, node, options) {
if (!content.trim()) return ''
return options.emDelimiter + content + options.emDelimiter
}
};
rules.strong = {
filter: ['strong', 'b'],
replacement: function (content, node, options) {
if (!content.trim()) return ''
return options.strongDelimiter + content + options.strongDelimiter
}
};
rules.code = {
filter: function (node) {
var hasSiblings = node.previousSibling || node.nextSibling;
var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
return node.nodeName === 'CODE' && !isCodeBlock
},
replacement: function (content) {
if (!content) return ''
content = content.replace(/\r?\n|\r/g, ' ');
var extraSpace = /^`|^ .*?[^ ].* $|`$/.test(content) ? ' ' : '';
var delimiter = '`';
var matches = content.match(/`+/gm) || [];
while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
return delimiter + extraSpace + content + extraSpace + delimiter
}
};
rules.image = {
filter: 'img',
replacement: function (content, node) {
var alt = cleanAttribute(node.getAttribute('alt'));
var src = node.getAttribute('src') || '';
var title = cleanAttribute(node.getAttribute('title'));
var titlePart = title ? ' "' + title + '"' : '';
return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
}
};
function cleanAttribute (attribute) {
return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : ''
}
/**
* Manages a collection of rules used to convert HTML to Markdown
*/
function Rules (options) {
this.options = options;
this._keep = [];
this._remove = [];
this.blankRule = {
replacement: options.blankReplacement
};
this.keepReplacement = options.keepReplacement;
this.defaultRule = {
replacement: options.defaultReplacement
};
this.array = [];
for (var key in options.rules) this.array.push(options.rules[key]);
}
Rules.prototype = {
add: function (key, rule) {
this.array.unshift(rule);
},
keep: function (filter) {
this._keep.unshift({
filter: filter,
replacement: this.keepReplacement
});
},
remove: function (filter) {
this._remove.unshift({
filter: filter,
replacement: function () {
return ''
}
});
},
forNode: function (node) {
if (node.isBlank) return this.blankRule
var rule;
if ((rule = findRule(this.array, node, this.options))) return rule
if ((rule = findRule(this._keep, node, this.options))) return rule
if ((rule = findRule(this._remove, node, this.options))) return rule
return this.defaultRule
},
forEach: function (fn) {
for (var i = 0; i < this.array.length; i++) fn(this.array[i], i);
}
};
function findRule (rules, node, options) {
for (var i = 0; i < rules.length; i++) {
var rule = rules[i];
if (filterValue(rule, node, options)) return rule
}
return void 0
}
function filterValue (rule, node, options) {
var filter = rule.filter;
if (typeof filter === 'string') {
if (filter === node.nodeName.toLowerCase()) return true
} else if (Array.isArray(filter)) {
if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true
} else if (typeof filter === 'function') {
if (filter.call(rule, node, options)) return true
} else {
throw new TypeError('`filter` needs to be a string, array, or function')
}
}
/**
* The collapseWhitespace function is adapted from collapse-whitespace
* by Luc Thevenard.
*
* The MIT License (MIT)
*
* Copyright (c) 2014 Luc Thevenard <lucthevenard@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/**
* collapseWhitespace(options) removes extraneous whitespace from an the given element.
*
* @param {Object} options
*/
function collapseWhitespace (options) {
var element = options.element;
var isBlock = options.isBlock;
var isVoid = options.isVoid;
var isPre = options.isPre || function (node) {
return node.nodeName === 'PRE'
};
if (!element.firstChild || isPre(element)) return
var prevText = null;
var keepLeadingWs = false;
var prev = null;
var node = next(prev, element, isPre);
while (node !== element) {
if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
var text = node.data.replace(/[ \r\n\t]+/g, ' ');
if ((!prevText || / $/.test(prevText.data)) &&
!keepLeadingWs && text[0] === ' ') {
text = text.substr(1);
}
// `text` might be empty at this point.
if (!text) {
node = remove(node);
continue
}
node.data = text;
prevText = node;
} else if (node.nodeType === 1) { // Node.ELEMENT_NODE
if (isBlock(node) || node.nodeName === 'BR') {
if (prevText) {
prevText.data = prevText.data.replace(/ $/, '');
}
prevText = null;
keepLeadingWs = false;
} else if (isVoid(node) || isPre(node)) {
// Avoid trimming space around non-block, non-BR void elements and inline PRE.
prevText = null;
keepLeadingWs = true;
} else if (prevText) {
// Drop protection if set previously.
keepLeadingWs = false;
}
} else {
node = remove(node);
continue
}
var nextNode = next(prev, node, isPre);
prev = node;
node = nextNode;
}
if (prevText) {
prevText.data = prevText.data.replace(/ $/, '');
if (!prevText.data) {
remove(prevText);
}
}
}
/**
* remove(node) removes the given node from the DOM and returns the
* next node in the sequence.
*
* @param {Node} node
* @return {Node} node
*/
function remove (node) {
var next = node.nextSibling || node.parentNode;
node.parentNode.removeChild(node);
return next
}
/**
* next(prev, current, isPre) returns the next node in the sequence, given the
* current and previous nodes.
*
* @param {Node} prev
* @param {Node} current
* @param {Function} isPre
* @return {Node}
*/
function next (prev, current, isPre) {
if ((prev && prev.parentNode === current) || isPre(current)) {
return current.nextSibling || current.parentNode
}
return current.firstChild || current.nextSibling || current.parentNode
}
/*
* Set up window for Node.js
*/
var root = (typeof window !== 'undefined' ? window : {});
/*
* Parsing HTML strings
*/
function canParseHTMLNatively () {
var Parser = root.DOMParser;
var canParse = false;
// Adapted from https://gist.github.com/1129031
// Firefox/Opera/IE throw errors on unsupported types
try {
// WebKit returns null on unsupported types
if (new Parser().parseFromString('', 'text/html')) {
canParse = true;
}
} catch (e) {}
return canParse
}
function createHTMLParser () {
var Parser = function () {};
{
var domino = require('@mixmark-io/domino');
Parser.prototype.parseFromString = function (string) {
return domino.createDocument(string)
};
}
return Parser
}
var HTMLParser = canParseHTMLNatively() ? root.DOMParser : createHTMLParser();
function RootNode (input, options) {
var root;
if (typeof input === 'string') {
var doc = htmlParser().parseFromString(
// DOM parsers arrange elements in the <head> and <body>.
// Wrapping in a custom element ensures elements are reliably arranged in
// a single element.
'<x-turndown id="turndown-root">' + input + '</x-turndown>',
'text/html'
);
root = doc.getElementById('turndown-root');
} else {
root = input.cloneNode(true);
}
collapseWhitespace({
element: root,
isBlock: isBlock,
isVoid: isVoid,
isPre: options.preformattedCode ? isPreOrCode : null
});
return root
}
var _htmlParser;
function htmlParser () {
_htmlParser = _htmlParser || new HTMLParser();
return _htmlParser
}
function isPreOrCode (node) {
return node.nodeName === 'PRE' || node.nodeName === 'CODE'
}
function Node (node, options) {
node.isBlock = isBlock(node);
node.isCode = node.nodeName === 'CODE' || node.parentNode.isCode;
node.isBlank = isBlank(node);
node.flankingWhitespace = flankingWhitespace(node, options);
return node
}
function isBlank (node) {
return (
!isVoid(node) &&
!isMeaningfulWhenBlank(node) &&
/^\s*$/i.test(node.textContent) &&
!hasVoid(node) &&
!hasMeaningfulWhenBlank(node)
)
}
function flankingWhitespace (node, options) {
if (node.isBlock || (options.preformattedCode && node.isCode)) {
return { leading: '', trailing: '' }
}
var edges = edgeWhitespace(node.textContent);
// abandon leading ASCII WS if left-flanked by ASCII WS
if (edges.leadingAscii && isFlankedByWhitespace('left', node, options)) {
edges.leading = edges.leadingNonAscii;
}
// abandon trailing ASCII WS if right-flanked by ASCII WS
if (edges.trailingAscii && isFlankedByWhitespace('right', node, options)) {
edges.trailing = edges.trailingNonAscii;
}
return { leading: edges.leading, trailing: edges.trailing }
}
function edgeWhitespace (string) {
var m = string.match(/^(([ \t\r\n]*)(\s*))(?:(?=\S)[\s\S]*\S)?((\s*?)([ \t\r\n]*))$/);
return {
leading: m[1], // whole string for whitespace-only strings
leadingAscii: m[2],
leadingNonAscii: m[3],
trailing: m[4], // empty for whitespace-only strings
trailingNonAscii: m[5],
trailingAscii: m[6]
}
}
function isFlankedByWhitespace (side, node, options) {
var sibling;
var regExp;
var isFlanked;
if (side === 'left') {
sibling = node.previousSibling;
regExp = / $/;
} else {
sibling = node.nextSibling;
regExp = /^ /;
}
if (sibling) {
if (sibling.nodeType === 3) {
isFlanked = regExp.test(sibling.nodeValue);
} else if (options.preformattedCode && sibling.nodeName === 'CODE') {
isFlanked = false;
} else if (sibling.nodeType === 1 && !isBlock(sibling)) {
isFlanked = regExp.test(sibling.textContent);
}
}
return isFlanked
}
var reduce = Array.prototype.reduce;
var escapes = [
[/\\/g, '\\\\'],
[/\*/g, '\\*'],
[/^-/g, '\\-'],
[/^\+ /g, '\\+ '],
[/^(=+)/g, '\\$1'],
[/^(#{1,6}) /g, '\\$1 '],
[/`/g, '\\`'],
[/^~~~/g, '\\~~~'],
[/\[/g, '\\['],
[/\]/g, '\\]'],
[/^>/g, '\\>'],
[/_/g, '\\_'],
[/^(\d+)\. /g, '$1\\. ']
];
function TurndownService (options) {
if (!(this instanceof TurndownService)) return new TurndownService(options)
var defaults = {
rules: rules,
headingStyle: 'setext',
hr: '* * *',
bulletListMarker: '*',
codeBlockStyle: 'indented',
fence: '```',
emDelimiter: '_',
strongDelimiter: '**',
linkStyle: 'inlined',
linkReferenceStyle: 'full',
br: ' ',
preformattedCode: false,
blankReplacement: function (content, node) {
return node.isBlock ? '\n\n' : ''
},
keepReplacement: function (content, node) {
return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML
},
defaultReplacement: function (content, node) {
return node.isBlock ? '\n\n' + content + '\n\n' : content
}
};
this.options = extend({}, defaults, options);
this.rules = new Rules(this.options);
}
TurndownService.prototype = {
/**
* The entry point for converting a string or DOM node to Markdown
* @public
* @param {String|HTMLElement} input The string or DOM node to convert
* @returns A Markdown representation of the input
* @type String
*/
turndown: function (input) {
if (!canConvert(input)) {
throw new TypeError(
input + ' is not a string, or an element/document/fragment node.'
)
}
if (input === '') return ''
var output = process.call(this, new RootNode(input, this.options));
return postProcess.call(this, output)
},
/**
* Add one or more plugins
* @public
* @param {Function|Array} plugin The plugin or array of plugins to add
* @returns The Turndown instance for chaining
* @type Object
*/
use: function (plugin) {
if (Array.isArray(plugin)) {
for (var i = 0; i < plugin.length; i++) this.use(plugin[i]);
} else if (typeof plugin === 'function') {
plugin(this);
} else {
throw new TypeError('plugin must be a Function or an Array of Functions')
}
return this
},
/**
* Adds a rule
* @public
* @param {String} key The unique key of the rule
* @param {Object} rule The rule
* @returns The Turndown instance for chaining
* @type Object
*/
addRule: function (key, rule) {
this.rules.add(key, rule);
return this
},
/**
* Keep a node (as HTML) that matches the filter
* @public
* @param {String|Array|Function} filter The unique key of the rule
* @returns The Turndown instance for chaining
* @type Object
*/
keep: function (filter) {
this.rules.keep(filter);
return this
},
/**
* Remove a node that matches the filter
* @public
* @param {String|Array|Function} filter The unique key of the rule
* @returns The Turndown instance for chaining
* @type Object
*/
remove: function (filter) {
this.rules.remove(filter);
return this
},
/**
* Escapes Markdown syntax
* @public
* @param {String} string The string to escape
* @returns A string with Markdown syntax escaped
* @type String
*/
escape: function (string) {
return escapes.reduce(function (accumulator, escape) {
return accumulator.replace(escape[0], escape[1])
}, string)
}
};
/**
* Reduces a DOM node down to its Markdown string equivalent
* @private
* @param {HTMLElement} parentNode The node to convert
* @returns A Markdown representation of the node
* @type String
*/
function process (parentNode) {
var self = this;
return reduce.call(parentNode.childNodes, function (output, node) {
node = new Node(node, self.options);
var replacement = '';
if (node.nodeType === 3) {
replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
} else if (node.nodeType === 1) {
replacement = replacementForNode.call(self, node);
}
return join(output, replacement)
}, '')
}
/**
* Appends strings as each rule requires and trims the output
* @private
* @param {String} output The conversion output
* @returns A trimmed version of the ouput
* @type String
*/
function postProcess (output) {
var self = this;
this.rules.forEach(function (rule) {
if (typeof rule.append === 'function') {
output = join(output, rule.append(self.options));
}
});
return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
}
/**
* Converts an element node to its Markdown equivalent
* @private
* @param {HTMLElement} node The node to convert
* @returns A Markdown representation of the node
* @type String
*/
function replacementForNode (node) {
var rule = this.rules.forNode(node);
var content = process.call(this, node);
var whitespace = node.flankingWhitespace;
if (whitespace.leading || whitespace.trailing) content = content.trim();
return (
whitespace.leading +
rule.replacement(content, node, this.options) +
whitespace.trailing
)
}
/**
* Joins replacement to the current output with appropriate number of new lines
* @private
* @param {String} output The current conversion output
* @param {String} replacement The string to append to the output
* @returns Joined output
* @type String
*/
function join (output, replacement) {
var s1 = trimTrailingNewlines(output);
var s2 = trimLeadingNewlines(replacement);
var nls = Math.max(output.length - s1.length, replacement.length - s2.length);
var separator = '\n\n'.substring(0, nls);
return s1 + separator + s2
}
/**
* Determines whether an input can be converted
* @private
* @param {String|HTMLElement} input Describe this parameter
* @returns Describe what it returns
* @type String|Object|Array|Boolean|Number
*/
function canConvert (input) {
return (
input != null && (
typeof input === 'string' ||
(input.nodeType && (
input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
))
)
)
}
return TurndownService;
})));

View File

@@ -0,0 +1,52 @@
{
"name": "turndown",
"description": "A library that converts HTML to Markdown",
"version": "7.2.2",
"author": "Dom Christie",
"main": "lib/turndown.cjs.js",
"module": "lib/turndown.es.js",
"jsnext:main": "lib/turndown.es.js",
"browser": {
"@mixmark-io/domino": false,
"./lib/turndown.cjs.js": "./lib/turndown.browser.cjs.js",
"./lib/turndown.es.js": "./lib/turndown.browser.es.js",
"./lib/turndown.umd.js": "./lib/turndown.browser.umd.js"
},
"dependencies": {
"@mixmark-io/domino": "^2.2.0"
},
"devDependencies": {
"@rollup/plugin-commonjs": "^19.0.0",
"@rollup/plugin-node-resolve": "13.0.0",
"@rollup/plugin-replace": "2.4.2",
"browserify": "17.0.0",
"rewire": "^6.0.0",
"rollup": "2.52.3",
"standard": "^10.0.3",
"turndown-attendant": "0.0.3"
},
"files": [
"lib",
"dist"
],
"keywords": [
"converter",
"html",
"markdown"
],
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/mixmark-io/turndown.git"
},
"scripts": {
"build": "npm run build-cjs && npm run build-es && npm run build-umd && npm run build-iife",
"build-cjs": "rollup -c config/rollup.config.cjs.js && rollup -c config/rollup.config.browser.cjs.js",
"build-es": "rollup -c config/rollup.config.es.js && rollup -c config/rollup.config.browser.es.js",
"build-umd": "rollup -c config/rollup.config.umd.js && rollup -c config/rollup.config.browser.umd.js",
"build-iife": "rollup -c config/rollup.config.iife.js",
"build-test": "browserify test/turndown-test.js --outfile test/turndown-test.browser.js",
"prepare": "npm run build",
"test": "npm run build && npm run build-test && standard ./src/**/*.js && node test/internals-test.js && node test/turndown-test.js"
}
}