Menu
Portfolio
Blog
Snippets
Login
Confirm
exit_to_app
Close
Entries
library_add
Utility(Event,Mouse): Log Movement X,Y
delete
Utility(txt): Basic Sentence Generator
delete
Utility(Event,Keyboard): Logger
delete
Utility(txt): Expanded Phrase Generator
delete
Scrape(General|Multi): Paginated Crawler
delete
Scrape(Sunbiz|Multi): Get Biz's By Zip
delete
Utility(Time): Test Load Speed
delete
Scrape(General|Single|NLP): Get Keyword Stats
delete
Scrape(General|Single): Get Text Near Links
delete
Utility(Event|Mouse): Press Down,Up, & Dragend
delete
Utility(Number): Between Range
delete
Utility(Event|Window): On Resize
delete
Scrape(Youtube|Single): Crawl Channel Videos
delete
Scrape(General|Multi): Tiny Link Crawler
delete
Utility(Time): Day of Week Report
delete
Utility(Number): Aspect Ratio Size Pairs
delete
Utility(Data): Sort Object Properties
delete
Scrape(General|Single): RiTa Sentences & Words
delete
Scrape(General|Single): Investigate Element Layers
delete
Scrape(NLP|Single): Using Compromise Plugins
delete
Utility(General): Remote Script Loader
delete
Scrape(General|Single|NLP): Compromise RiTa D3
delete
Scrape(General|Multi|Node): Grouped Node Crawler
delete
Scrape(Amazon|Multi): Crawl Product SERPs
delete
Scrape(Amazon|Multi): Get Paginated Brands
delete
Utility (Data): Download JSON in Browser
delete
Utility (Data): JSON AutoTypes
delete
Scrape(YouTube|Single): Video Page
delete
Utility (Text): Make Page Editable
delete
Utility (Text): Article Editor
delete
Scrape(General|Single|Text): Get Text On Click
delete
Utility (File): DnD File Parser (CSV,JSON,TXT)
delete
Scrape(General|Single): Get Links (Examples)
delete
Scrape(General|Single|Text): Get Sentences by Tag
delete
Utility (File): JSON to CSV via D3.js
delete
Scrape(General|Single): Auto Parse by Content Type
delete
Scrape(General|Single): Get Paragraphs & Sentences
delete
Scrape(Amazon|Multi): Get Reviews by ASIN
delete
Scrape(General|Single): Download Images on Page
delete
Utility(Event,Form): Custom Submit Function
delete
Utility (Fetch): Basic API Post Request
delete
Utility (Event,Form,Fetch): Form Data to API
delete
Utility (Time): Async Delay
delete
Utility (Time): Async Repeat Every N Secs
delete
Scrape(coj): Crawl Property SERPs
delete
Utility (Data): Promise Chain
delete
Utility (Fetch): Examples - JSON,Text,HTML
delete
Scrape(Amazon|Single): Product Review NLP
delete
Utility (Nodejs): Streaming Collections
delete
Scrape(Rate My Professor|Multi): Crawl Prof SERPs
delete
Utility (Time): JS Timer
delete
Utility (Text): Proper Case
delete
Scrape(Thingiverse API): Get Things via Search API
delete
Scrape(General|Single|Node): Get Node Attributes
delete
Scrape(General|Single|Node): Node Attributes + Text
delete
Scrape(Thesaurus): Get Words from SERPs
delete
Scrape(Walmart): Crawl Product SERPs
delete
Scrape(free3d): Crawl 3D Model SERPs
delete
Scrape(Aliexpress|Single): Get Products From SERP
delete
Scrape(simplify3d): Crawl Post SERPs
delete
Scrape(Twitter): Crawl Post Feed (infinite scroll)
delete
Scrape(DDuckGo|Single): Get Links from SERP
delete
Scrape(General|Single): Get Tokens String Distance
delete
Scrape(General|Single): Content Report
delete
Scrape(General|Single|Node): Node Recon (CSV)
delete
Utility (File): D3 JSON to CSV
delete
Scrape(coj|Multi): Crawl Property SERPs
delete
Scrape(coj|Single): sidenote
delete
Scrape(General|Single): Recursive Node Crawler
delete
Utility (Event,Window): Scroll to Root ScrollHeight
delete
Scrape(Indeed|Multi): Crawl Job SERPs
delete
Scrape(Thingiverse API): Get Things By Id
delete
Scrape(Thingiverse): Crawl Things by Category
delete
Scrape(Thingiverse API): Get Thing Batches by Id via DnD
delete
Scrape(YouTube|Single): Get Video Playlist
delete
Utility (Data): Join Thing Metrics & Meta
delete
Utility(Data): Get Nested Array Lengths
delete
Utility (Twitter): Hide Followed Profiles
delete
Utility (Time): YYYY-MM-DD HH:MM:SS
delete
Scrape(Thangs|Multi): Crawl 3D Model SERPs
delete
Scrape(PrusaPrints,Multi): Get Prints
delete
Scrape(Reddit,Single): Get Posts
delete
Userscript(Youtube): Scrape Channel Videos
delete
Userscript(Youtube): Tab Manager
delete
Scrape(Sunbiz|Multi): Biz Details
delete
Utility(Data):DnD View Types
delete
Scrape(General|Single|Node): Select Nodes by Attr
delete
Scrape(Aliexpress|Multi) Get Products via API
delete
Utility(Text): Strip Web Page CSS, Script, Events, Media
delete
Scrape(Youtube|Single) Get Subs
delete
Scrape(General|Single): SelectAll ReduceByProp
delete
Scrape(General|Single): SelectAll ReduceMultiProps
delete
Scrape(General|Multi): Tiny Link Crawler + Delay & Node Reports
delete
Scrape(P5|Multi): Get Examples
delete
Scrape(LinkedIn|Single): Find New Connections
delete
UserScript(linkedIn|Single) Get Jobs
delete
Utility (Time): Date From Days Ago
delete
Utility(General|Single) Keep Scrolling
delete
Scrape(YouTube) Videos From Search
delete
Utility(General|Single): getOffset
delete
Utility(Event,Form): Get Data On Form Input
delete
Utility(Event,Element): ResizeObserver
delete
COCO-SSD Object Categories
delete
Scrape(Wikipedia|Multi): What Links Here?
delete
Scrape(DDuckGo|Single): Download Images
delete
Scrape(General|Single|NLP): Compromise nGram
delete
Scrape(General|Single|NLP): Compromise RiTa D3
Edit Snippet
(() => { /* For this to work for some websites, you might need to disable a security feature in your browser. More info here: https://stackoverflow.com/questions/27323631/how-to-override-content-security-policy-while-including-script-in-browser-js-con */ var options = { scripts: [ "https://unpkg.com/compromise", "https://rednoise.org/rita/download/rita.min.js", "https://d3js.org/d3.v5.min.js" ], cssSelector:"body", multiNodeTarget:false, stripTags:true, doc:document, } const stripTags = (el,tagList) => { tagList = !tagList ? "script,style,noscript,textarea,form,input,nav,aside" : tagList; [...el.querySelectorAll(tagList)].forEach(el => el.remove()) } const getNodeText = (el,clean=true) => { var text = el.innerText ? el.innerText : el.value ? el.value : el.content ? el.content : ""; return clean ? text.replace(/\.+/gim,".").replace(/\?+/gim,"?").replace(/!+/gim,"!") .replace(/[\s\t]+/gim," ") .replace(/[\n\r]+/gim,"\n").trim() : text; //if clean is false, just return the raw text; } const getNodeAttributes = (el) => { try{ return [...el.attributes] .reduce((atts,att) => { atts[att.nodeName] = att.value return atts },{}) }catch(e){ //if there is an error with selection, then log the error and return an empty array console.log({e}) return {}; } } const select = ({doc,cssSelector,multiNodeTarget}) => { try{ return multiNodeTarget ? [...doc.querySelectorAll(cssSelector)] : [doc.querySelector(cssSelector)]; }catch(e){ //if there is an error with selection, then log the error and return an empty array console.log({e}) return []; } } const processNodes = (nodes) => { var results = []; if(nodes.length > 0){ for(var node of nodes){ results.push({ nodeName:node.nodeName, text:getNodeText(node), ...getNodeAttributes(node) }); } } return results; } var getWordCount = (tagReport,v) => { tagReport[v.word] ? tagReport[v.word]++ : tagReport[v.word] = 1; return tagReport; } var getWordPOV = (word) => { var wordLow = word.toLowerCase(); var pointsOfView = { first:["i","me","my","mine","we","us","our","ours","myself","ourselves"], second:["you","your","yours","yourself","yourselves"], third:["he","it","she","her","his","their","they","them","everyone","anybody","anyone","him", "hers", "himself", "herself", "itself", "they", "them", "theirs", "themselves"] } var povLabel = Object.keys(pointsOfView).find(key => pointsOfView[key].indexOf(wordLow) > -1); return povLabel ? povLabel : "third"; } const run = (options) => { options.stripTags ? stripTags(document.body) : null; var targets = select(options); var results = processNodes(targets) console.log({results}) ; var str = results[0].text; var comp = nlp(str); var sentences = { rita:RiTa.splitSentences(str), comp: comp.sentences().json(), } var allPOSTags = []; sentences.posReport = sentences.comp.reduce((posReport,s) => { var comp = nlp(s.text); var matchTests = { hasQuote: comp.match('@hasQuote').text().length > 0, hasComma: comp.match('@hasComma').text().length > 0, hasPeriod: comp.match('@hasPeriod').text().length > 0, hasExclamation: comp.match('@hasExclamation').text().length > 0, hasQuestionMark: comp.match('@hasQuestionMark').text().length > 0, //hasEllipses: comp.match('@hasEllipses').text().length > 0, hasSemicolon: comp.match('@hasSemicolon').text().length > 0, hasSlash: comp.match('@hasSlash').text().length > 0, hasHyphen: comp.match('@hasHyphen').text().length > 0, hasDash: comp.match('@hasDash').text().length > 0, hasContraction: comp.match('@hasContraction').text().length > 0, // isAcronym: comp.match('@isAcronym').text().length > 0, //isKnown: comp.match('@isKnown').text().length > 0, isUpperCase: comp.match('@isUpperCase').text().length > 0, isTitleCase: comp.match('@isTitleCase').text().length > 0 } if(s.text.length < 300){ var povLabels = []; var tags = s.terms.reduce((terms,d) => { povLabels.push(getWordPOV(d.text)); allPOSTags.push({parentSentence:s.text,word:d.text,tags:d.tags.sort((a,b) => a < b).join("|")}); for(var tag of d.tags){ terms.push(tag) } return terms; },[]) var fpov = povLabels.some(label => label == "first"); var spov = povLabels.some(label => label == "second"); var pov = fpov && spov ? "first and second" : fpov && spov ? "first and second" : fpov ? "first" : spov ? "second" : "third" posReport.push({ sentence:s.text, pov, tags, ...matchTests }) } return posReport; },[]); var report = { nestedSentencePOSReport:d3.nest() .key(d => d.pov) .key(d => d.tags.indexOf("QuestionWord") > -1 ? "Has Question Word" : "No Question Words") .key(d => d.sentence.length > 130 ? "Long Sentences" : d.sentence.length > 80 ? "Medium Sentences" : "Short Sentences") .key(d => d.tags.indexOf("Conjunction") > -1 ? "Has Conjunction" : "No Conjunctions") .key(d => d.tags.indexOf("Preposition") > -1 ? "Has Preposition" : "No Prepositions") .key(d => d.tags.indexOf("Noun") > -1 || d.tags.indexOf("Determiner") > -1 ? "Has Noun" : "No Nouns") .key(d => d.tags.indexOf("Verb") > -1 ? "Has Verb" : "No Verbs") .key(d => d.tags.some(tag => tag.indexOf("Tense") > -1 || tag.indexOf("Future") > -1) ? "Has Tense Verb" : "No Tense Verbs") .key(d => d.tags.indexOf("Adjective") > -1 ? "Has Adjective" : "No Adjectives") .key(d => d.tags.indexOf("Adverb") > -1 ? "Has Adverb" : "No Adverbs") .key(d => d.tags.filter(tag => tag == "Noun").length >= 3 ? "3+ Nouns" : "Less Than 3 Nouns").map(sentences.posReport), sentences, allPOSTags } console.log({report}) } const loadScripts = (scripts) => { var scriptCountdown = scripts.length; var loadScript = (url) => { var scriptsLoaded = () => { console.log("scriptsLoaded",{scriptCountdown}); scriptCountdown == 0 ? run(options) : null; return true; } console.log("loadScript",{url}) var imported = document.createElement('script'); imported.src = url; imported.addEventListener("load", () => { scriptCountdown--; scriptsLoaded(); }); document.head.appendChild(imported); } scripts.forEach(loadScript) } //START - first step, load remote dependencies loadScripts(options.scripts); })()