From 4be46aebdf67566f327b7b4f56dc56f3fcd2d011 Mon Sep 17 00:00:00 2001 From: Nicolas Stepien Date: Fri, 31 Aug 2012 02:14:52 +0200 Subject: [PATCH] Parse posts. Hopefully I didn't miss anything. --- 4chan_x.user.js | 121 ++++++++++++++++++++++++++++++++++++++++++------ script.coffee | 116 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 211 insertions(+), 26 deletions(-) diff --git a/4chan_x.user.js b/4chan_x.user.js index 70878f088..9245d6801 100644 --- a/4chan_x.user.js +++ b/4chan_x.user.js @@ -603,51 +603,140 @@ Board = (function() { + Board.prototype.toString = function() { + return this.ID; + }; + function Board(ID) { this.ID = ID; this.threads = {}; this.posts = {}; - g.boards[this.ID] = this; + g.boards[this] = this; } - Board.prototype.toString = function() { - return this.ID; - }; - return Board; })(); Thread = (function() { + Thread.prototype.callbacks = []; + + Thread.prototype.toString = function() { + return this.ID; + }; + function Thread(root, board) { this.root = root; this.board = board; this.ID = +root.id.slice(1); this.hr = root.nextElementSibling; this.posts = {}; - g.threads["" + board.ID + "." + this.ID] = board.threads[this.ID] = this; + g.threads["" + board + "." + this] = board.threads[this] = this; } - Thread.prototype.callbacks = []; - return Thread; })(); Post = (function() { + Post.prototype.callbacks = []; + + Post.prototype.toString = function() { + return this.ID; + }; + function Post(root, thread, board) { - this.root = root; + var alt, anchor, bq, capcode, data, date, email, file, flag, i, info, name, node, nodes, post, quotelink, quotes, subject, text, thumb, tripcode, uniqueID, _i, _j, _k, _len, _len1, _ref, _ref1, _ref2; this.thread = thread; this.board = board; this.ID = +root.id.slice(2); - this.el = $('.post', root); - g.posts["" + board.ID + "." + this.ID] = thread.posts[this.ID] = board.posts[this.ID] = this; + post = $('.post', root); + this.nodes = { + root: root, + post: post, + info: $('.postInfo', post), + comment: $('.postMessage', post), + quotelinks: [] + }; + info = this.nodes.info; + if (subject = $('.subject', info)) { + this.nodes.subject = subject; + this.subject = subject.textContent; + } + if (name = $('.name', info)) { + this.nodes.name = name; + this.name = name.textContent; + } + if (email = $('.useremail', info)) { + this.nodes.email = email; + this.email = decodeURIComponent(email.href.slice(7)); + } + if (tripcode = $('.postertrip', info)) { + this.nodes.tripcode = tripcode; + this.tripcode = tripcode.textContent; + } + if (uniqueID = $('.posteruid', info)) { + this.nodes.uniqueID = uniqueID; + this.uniqueID = uniqueID.textContent; + } + if (capcode = $('.capcode', info)) { + this.nodes.capcode = capcode; + this.capcode = capcode.textContent; + } + if (flag = $('.countryFlag', info)) { + this.nodes.flag = flag; + this.flag = flag.title; + } + if (date = $('.dateTime', info)) { + this.nodes.date = date; + this.dateUTC = date.dataset.utc; + } + bq = this.nodes.comment.cloneNode(true); + _ref = $$('.abbr, .capcodeReplies, .exif, b', bq); + for (_i = 0, _len = _ref.length; _i < _len; _i++) { + node = _ref[_i]; + $.rm(node); + } + text = []; + nodes = d.evaluate('.//br|.//text()', bq, null, 7, null); + for (i = _j = 0, _ref1 = nodes.snapshotLength; 0 <= _ref1 ? _j < _ref1 : _j > _ref1; i = 0 <= _ref1 ? ++_j : --_j) { + text.push((data = nodes.snapshotItem(i).data) ? data : '\n'); + } + this.comment = text.join('').replace(/^\n+|\n+$| +(?=\n|$)/g, ''); + quotes = {}; + _ref2 = $$('.quotelink', this.nodes.comment); + for (_k = 0, _len1 = _ref2.length; _k < _len1; _k++) { + quotelink = _ref2[_k]; + if (quotelink.hash) { + this.nodes.quotelinks.push(quotelink); + quotes["" + (quotelink.pathname.split('/')[1]) + "." + quotelink.hash.slice(2)] = true; + } + } + this.quotes = Object.keys(quotes); + if ((file = $('.file', post)) && (thumb = $('img[data-md5]', file))) { + alt = thumb.alt; + anchor = thumb.parentNode; + this.file = { + info: $('.fileInfo', file), + text: $('.fileText', file), + thumb: thumb, + URL: anchor.href, + MD5: thumb.dataset.md5, + size: alt.match(/\d+(\.\d+)?\s\w+$/)[0], + isSpoiler: $.hasClass(anchor, 'imgspoiler') + }; + this.file.thumbURL = "" + location.protocol + "//thumbs.4chan.org/" + board + "/thumb/" + (this.file.URL.match(/(\d+)\./)[1]) + "s.jpg"; + this.file.name = $('span[title]', this.file.info).title; + if (this.file.isImage = /(jpg|png|gif|svg)$/i.test(this.file.name)) { + this.file.dimensions = this.file.text.textContent.match(/\d+x\d+/)[0]; + } + } + this.isReply = $.hasClass(post, 'reply'); + g.posts["" + board + "." + this] = thread.posts[this] = board.posts[this] = this; } - Post.prototype.callbacks = []; - return Post; })(); @@ -748,7 +837,11 @@ if (!$.hasClass(child, 'postContainer')) { continue; } - posts.push(new Post(child, thread, g.BOARD)); + try { + posts.push(new Post(child, thread, g.BOARD)); + } catch (err) { + + } } } Main.callbackNodes(Thread, threads, true); diff --git a/script.coffee b/script.coffee index 8f7205a2d..9464d237b 100644 --- a/script.coffee +++ b/script.coffee @@ -474,30 +474,117 @@ $.extend $, class Board + toString: -> @ID + constructor: (@ID) -> @threads = {} @posts = {} - g.boards[@ID] = @ - - toString: -> @ID + g.boards[@] = @ class Thread + callbacks: [] + toString: -> @ID + constructor: (@root, @board) -> @ID = +root.id[1..] @hr = root.nextElementSibling @posts = {} - g.threads["#{board.ID}.#{@ID}"] = board.threads[@ID] = @ - callbacks: [] + g.threads["#{board}.#{@}"] = board.threads[@] = @ class Post - constructor: (@root, @thread, @board) -> - @ID = +root.id[2..] - @el = $ '.post', root - - g.posts["#{board.ID}.#{@ID}"] = thread.posts[@ID] = board.posts[@ID] = @ callbacks: [] + toString: -> @ID + + constructor: (root, @thread, @board) -> + @ID = +root.id[2..] + + post = $ '.post', root + @nodes = + root: root + post: post + info: $ '.postInfo', post + comment: $ '.postMessage', post + quotelinks: [] + + info = @nodes.info + if subject = $ '.subject', info + @nodes.subject = subject + @subject = subject.textContent + if name = $ '.name', info + @nodes.name = name + @name = name.textContent + if email = $ '.useremail', info + @nodes.email = email + @email = decodeURIComponent email.href[7..] + if tripcode = $ '.postertrip', info + @nodes.tripcode = tripcode + @tripcode = tripcode.textContent + if uniqueID = $ '.posteruid', info + @nodes.uniqueID = uniqueID + @uniqueID = uniqueID.textContent + if capcode = $ '.capcode', info + @nodes.capcode = capcode + @capcode = capcode.textContent + if flag = $ '.countryFlag', info + @nodes.flag = flag + @flag = flag.title + if date = $ '.dateTime', info + @nodes.date = date + @dateUTC = date.dataset.utc + + # Get the comment's text. + #
-> \n + # Remove: + # 'Comment too long'... + # Admin/Mod/Dev replies. (/q/) + # EXIF data. (/p/) + # Rolls. (/tg/) + # Preceding and following new lines. + # Trailing spaces. + bq = @nodes.comment.cloneNode true + for node in $$ '.abbr, .capcodeReplies, .exif, b', bq + $.rm node + text = [] + # XPathResult.ORDERED_NODE_SNAPSHOT_TYPE === 7 + nodes = d.evaluate './/br|.//text()', bq, null, 7, null + for i in [0...nodes.snapshotLength] + text.push if data = nodes.snapshotItem(i).data then data else '\n' + @comment = text.join('').replace /^\n+|\n+$| +(?=\n|$)/g, '' + + quotes = {} + for quotelink in $$ '.quotelink', @nodes.comment + # Don't add board links. (>>>/b/) + # Don't add text-board quotelinks. (>>>/img/1234) + # Only add quotes that link to posts on an imageboard. + if quotelink.hash + @.nodes.quotelinks.push quotelink + quotes["#{quotelink.pathname.split('/')[1]}.#{quotelink.hash[2..]}"] = true + @quotes = Object.keys quotes + + if (file = $ '.file', post) and thumb = $ 'img[data-md5]', file + # Supports JPG/PNG/GIF/PDF. + # Flash files are not supported. + alt = thumb.alt + anchor = thumb.parentNode + @file = + info: $ '.fileInfo', file + text: $ '.fileText', file + thumb: thumb + URL: anchor.href + MD5: thumb.dataset.md5 + size: alt.match(/\d+(\.\d+)?\s\w+$/)[0] + isSpoiler: $.hasClass anchor, 'imgspoiler' + @file.thumbURL = "#{location.protocol}//thumbs.4chan.org/#{board}/thumb/#{@file.URL.match(/(\d+)\./)[1]}s.jpg" + @file.name = $('span[title]', @file.info).title + if @file.isImage = /(jpg|png|gif|svg)$/i.test @file.name # I want to believe. + @file.dimensions = @file.text.textContent.match(/\d+x\d+/)[0] + + @isReply = $.hasClass post, 'reply' + + g.posts["#{board}.#{@}"] = thread.posts[@] = board.posts[@] = @ + Main = init: -> @@ -576,7 +663,12 @@ Main = threads.push thread for child in thread.root.children continue unless $.hasClass child, 'postContainer' - posts.push new Post child, thread, g.BOARD + try + posts.push new Post child, thread, g.BOARD + catch err + # Skip posts that we failed to parse. + # XXX handle error + # Post parser crashed for post No.#{child.id[2..]} Main.callbackNodes Thread, threads, true Main.callbackNodes Post, posts, true @@ -589,7 +681,7 @@ Main = for i in [0...len] callback.cb.call nodes[i] catch err - # handle error if notify + # XXX handle error if notify return settings: ->