Support parsing Tinyboard posts.

2016-10-09 22:03:24 -07:00 · 2016-10-09 22:03:24 -07:00 · ffdb2cbd5c
commit ffdb2cbd5c
parent b26a1806cf
6 changed files with 214 additions and 107 deletions
--- a/src/General/Get.coffee
+++ b/src/General/Get.coffee
@ -63,8 +63,3 @@ Get =
    quotelinks.filter (quotelink) ->
      {boardID, postID} = Get.postDataFromLink quotelink
      boardID is post.board.ID and postID is post.ID
-
-  scriptData: ->
-    for script in $$ 'script:not([src])', d.head
-      return script.textContent if /\bcooldowns *=/.test script.textContent
-    ''
--- a/src/classes/Post.Clone.coffee
+++ b/src/classes/Post.Clone.coffee
@ -11,10 +11,10 @@ Post.Clone = class extends Post
      @cloneWithoutVideo nodes.root
    else
      nodes.root.cloneNode true
-    Post.Clone.prefix or= 0
+    Post.Clone.suffix or= 0
    for node in [root, $$('[id]', root)...]
-      node.id = Post.Clone.prefix + node.id
-    Post.Clone.prefix++
+      node.id += "_#{Post.Clone.suffix}"
+    Post.Clone.suffix++

    @nodes = @parseNodes root

@ -44,12 +44,10 @@ Post.Clone = class extends Post
      @file = {}
      for key, val of @origin.file
        @file[key] = val
-      {fileRoot} = @nodes
-      @file.text  = fileRoot.firstElementChild
-      @file.link  = $ '.fileText > a, .fileText-original', fileRoot
-      @file.thumb = $ 'a.fileThumb > [data-md5]', fileRoot
+      for key, selector of Site.selectors.file
+        @file[key] = $ selector, @nodes.root
      @file.thumbLink = @file.thumb?.parentNode
-      @file.fullImage = $ '.full-image', fileRoot
+      @file.fullImage = $ '.full-image', @file.thumbLink if @file.thumbLink
      @file.videoControls = $ '.video-controls', @file.text

      @file.thumb.muted = true if @file.videoThumb
--- a/src/classes/Post.coffee
+++ b/src/classes/Post.coffee
@ -6,7 +6,7 @@ class Post
    @normalizedOriginal = Build.Test.normalize root
    <% } %>

-    @ID       = +root.id[2..]
+    @ID       = +root.id.match(/\d*$/)[0]
    @threadID = @thread.ID
    @boardID  = @board.ID
    @fullID   = "#{@board}.#{@ID}"
@ -16,12 +16,13 @@ class Post

    @nodes = @parseNodes root

-    if not (@isReply = $.hasClass @nodes.post, 'reply')
+    if not (@isReply = @ID isnt @threadID)
      @thread.OP = @
-      @thread.isArchived = !!$ '.archivedIcon', @nodes.info
-      @thread.isSticky   = !!$ '.stickyIcon', @nodes.info
-      @thread.isClosed   = @thread.isArchived or !!$ '.closedIcon', @nodes.info
-      @thread.kill() if @thread.isArchived
+      for key in ['isSticky', 'isClosed', 'isArchived']
+        @thread[key] = if (selector = Site.selectors.icons[key]) then !!$(selector, @nodes.info) else false
+      if @thread.isArchived
+        @thread.isClosed = true
+        @thread.kill()

    @info =
      subject:   @nodes.subject?.textContent or undefined
@ -32,7 +33,7 @@ class Post
      pass:      @nodes.pass?.title.match(/\d*$/)[0]
      flagCode:  @nodes.flag?.className.match(/flag-(\w+)/)?[1].toUpperCase()
      flag:      @nodes.flag?.title
-      date:      if @nodes.date then new Date(@nodes.date.dataset.utc * 1000)
+      date:      if @nodes.date then new Date(@nodes.date.getAttribute('datetime') or (@nodes.date.dataset.utc * 1000))

    if Conf['Anonymize']
      @info.nameBlock = 'Anonymous'
@ -62,29 +63,20 @@ class Post
    g.posts.push   @fullID, @

  parseNodes: (root) ->
-    post = $ '.post',     root
-    info = $ '.postInfo', post
+    s = Site.selectors
+    post = $(s.post, root) or root
+    info = $ s.infoRoot, post
    nodes =
-      root:         root
-      post:         post
-      info:         info
-      subject:      $ '.subject',            info
-      name:         $ '.name',               info
-      email:        $ '.useremail',          info
-      tripcode:     $ '.postertrip',         info
-      uniqueIDRoot: $ '.posteruid',          info
-      uniqueID:     $ '.posteruid > .hand',  info
-      capcode:      $ '.capcode.hand',       info
-      pass:         $ '.n-pu',               info
-      flag:         $ '.flag, .countryFlag', info
-      date:         $ '.dateTime',           info
-      nameBlock:    $ '.nameBlock',          info
-      quote:        $ '.postNum > a:nth-of-type(2)', info
-      reply:        $ '.replylink',          info
-      fileRoot:     $ '.file',        post
-      comment:      $ '.postMessage', post
-      quotelinks:   []
+      root:       root
+      post:       post
+      info:       info
+      comment:    $ s.comment, post
+      quotelinks: []
      archivelinks: []
+    for key, selector of s.info
+      nodes[key] = $ selector, info
+    Site.parseNodes?(@, nodes)
+    nodes.uniqueIDRoot or= nodes.uniqueID

    # XXX Edge invalidates HTMLCollections when an ancestor node is inserted into another node.
    # https://developer.microsoft.com/en-us/microsoft-edge/platform/issues/7560353/
@ -108,7 +100,7 @@ class Post
    #   'Comment too long'...
    #   EXIF data. (/p/)
    @nodes.commentClean = bq = @nodes.comment.cloneNode true
-    @cleanComment bq
+    Site.cleanComment?(bq)
    @info.comment = @nodesToText bq

  commentDisplay: ->
@ -121,7 +113,7 @@ class Post
    #   Trailing spaces.
    bq = @nodes.commentClean.cloneNode true
    @cleanSpoilers bq unless Conf['Remove Spoilers'] or Conf['Reveal Spoilers']
-    @cleanCommentDisplay bq
+    Site.cleanCommentDisplay?(bq)
    @nodesToText(bq).trim().replace(/\s+$/gm, '')

  nodesToText: (bq) ->
@ -132,29 +124,15 @@ class Post
      text += node.data or '\n'
    text

-  cleanComment: (bq) ->
-    if (abbr = $ '.abbr', bq) # 'Comment too long' or 'EXIF data available'
-      for node in $$ '.abbr + br, .exif', bq
-        $.rm node
-      for i in [0...2]
-        $.rm br if (br = abbr.previousSibling) and br.nodeName is 'BR'
-      $.rm abbr
-
  cleanSpoilers: (bq) ->
-    spoilers = $$ 's', bq
+    spoilers = $$ Site.selectors.spoiler, bq
    for node in spoilers
      $.replace node, $.tn '[spoiler]'
    return

-  cleanCommentDisplay: (bq) ->
-    $.rm b if (b = $ 'b', bq) and /^Rolled /.test(b.textContent)
-    $.rm $('.fortune', bq)
-
  parseQuotes: ->
    @quotes = []
-    # XXX https://github.com/4chan/4chan-JS/issues/77
-    # 4chan currently creates quote links inside [code] tags; ignore them
-    for quotelink in $$ ':not(pre) > .quotelink', @nodes.comment
+    for quotelink in $$ Site.selectors.quotelink, @nodes.comment
      @parseQuote quotelink
    return

@ -165,13 +143,7 @@ class Post
    #  - catalog links. (>>>/b/catalog or >>>/b/search)
    #  - rules links. (>>>/a/rules)
    #  - text-board quotelinks. (>>>/img/1234)
-    match = quotelink.href.match ///
-      ^https?://boards\.4chan\.org/+
-      ([^/]+) # boardID
-      /+(?:res|thread)/+\d+(?:[/?][^#]*)?#p
-      (\d+)   # postID
-      $
-    ///
+    match = quotelink.href.match Site.regexp.quotelink
    return unless match or (@isClone and quotelink.dataset.postID) # normal or resurrected quote

    @nodes.quotelinks.push quotelink
@ -183,32 +155,24 @@ class Post
    @quotes.push fullID unless fullID in @quotes

  parseFile: ->
-    {fileRoot} = @nodes
-    return unless fileRoot
-    return if not (link = $ '.fileText > a, .fileText-original > a', fileRoot)
-    return if not (info = link.nextSibling?.textContent.match /\(([\d.]+ [KMG]?B).*\)/)
-    fileText = fileRoot.firstElementChild
-    @file =
-      text:       fileText
-      link:       link
-      url:        link.href
-      name:       fileText.title or link.title or link.textContent
-      size:       info[1]
-      isImage:    /(jpg|png|gif)$/i.test link.href
-      isVideo:    /webm$/i.test link.href
-      dimensions: info[0].match(/\d+x\d+/)?[0]
-      tag:        info[0].match(/,[^,]*, ([a-z]+)\)/i)?[1]
-    size  = +@file.size.match(/[\d.]+/)[0]
-    unit  = ['B', 'KB', 'MB', 'GB'].indexOf @file.size.match(/\w+$/)[0]
+    file = {}
+    for key, selector of Site.selectors.file
+      file[key] = $ selector, @nodes.root
+    file.thumbLink = file.thumb?.parentNode
+
+    return if not (file.text and file.link)
+    return if not Site.parseFile @, file
+
+    $.extend file,
+      url:     file.link.href
+      isImage: /(jpg|png|gif)$/i.test file.link.href
+      isVideo: /(webm|mp4)$/i.test file.link.href
+    size  = +file.size.match(/[\d.]+/)[0]
+    unit  = ['B', 'KB', 'MB', 'GB'].indexOf file.size.match(/\w+$/)[0]
    size *= 1024 while unit-- > 0
-    @file.sizeInBytes = size
-    if (thumb = $ 'a.fileThumb > [data-md5]', fileRoot)
-      $.extend @file,
-        thumb:     thumb
-        thumbLink: thumb.parentNode
-        thumbURL:  if m = link.href.match(/\d+(?=\.\w+$)/) then "#{location.protocol}//i.4cdn.org/#{@board}/#{m[0]}s.jpg"
-        MD5:       thumb.dataset.md5
-        isSpoiler: $.hasClass thumb.parentNode, 'imgspoiler'
+    file.sizeInBytes = size
+
+    @file = file

  @deadMark =
    # \u00A0 is nbsp
--- a/src/main/Main.coffee
+++ b/src/main/Main.coffee
@ -274,15 +274,18 @@ Main =
      $.event '4chanXInitFinished'

  initThread: ->
-    if (board = $ '.board')
+    s = Site.selectors
+    if (board = $ s.board)
      threads = []
      posts   = []

-      for threadRoot in $$ '.board > .thread', board
-        thread = new Thread +threadRoot.id[1..], g.BOARD
+      for threadRoot in $$(s.thread, board)
+        thread = new Thread +threadRoot.id.match(/\d*$/)[0], g.BOARD
        thread.nodes.root = threadRoot
        threads.push thread
-        for postRoot in $$('.thread > .postContainer', threadRoot) when $('.postMessage', postRoot)
+        postRoots = $$ s.postContainer, threadRoot
+        postRoots.unshift threadRoot if Site.isOPContainerThread
+        for postRoot in postRoots when $(s.comment, postRoot)
          try
            posts.push new Post postRoot, thread, g.BOARD
          catch err
@ -295,17 +298,7 @@ Main =
      Main.handleErrors errors if errors

      if g.VIEW is 'thread'
-        scriptData = Get.scriptData()
-        threads[0].postLimit = /\bbumplimit *= *1\b/.test scriptData
-        threads[0].fileLimit = /\bimagelimit *= *1\b/.test scriptData
-        threads[0].ipCount   = if m = scriptData.match /\bunique_ips *= *(\d+)\b/ then +m[1]
-
-      if g.BOARD.ID is 'f' and g.VIEW is 'thread'
-        $.ajax "//a.4cdn.org/f/thread/#{g.THREADID}.json",
-          timeout: $.MINUTE
-          onloadend: ->
-            if @response and posts[0].file
-              posts[0].file.text.dataset.md5 = posts[0].file.MD5 = @response.posts[0].md5
+        Site.parseThreadMetadata?(threads[0])

      Main.callbackNodes 'Thread', threads
      Main.callbackNodesDB 'Post', posts, ->
--- a/src/site/SW.tinyboard.coffee
+++ b/src/site/SW.tinyboard.coffee
@ -1 +1,68 @@
-SW.tinyboard = {}
+SW.tinyboard =
+  isOPContainerThread: true
+
+  selectors:
+    board:         'form[name="postcontrols"]'
+    thread:        'div[id^="thread_"]'
+    postContainer: '.reply' # postContainer is thread for OP
+    infoRoot:      '.intro'
+    info:
+      subject:   '.subject'
+      name:      '.name'
+      email:     '.email'
+      tripcode:  '.trip'
+      uniqueID:  '.poster_id'
+      capcode:   '.capcode'
+      flag:      '.flag'
+      date:      'time'
+      nameBlock: 'label'
+      quote:     'a[href*="#q"]'
+      reply:     'a[href*="/res/"]:not([href*="#"])'
+    icons:
+      isSticky:   '.fa-thumb-tack'
+      isClosed:   '.fa-lock'
+    file:
+      text:  '.fileinfo'
+      link:  '.fileinfo > a'
+      thumb: 'a > .post-image'
+    comment:   '.body'
+    spoiler:   '.spoiler'
+    quotelink: 'a[onclick^="highlightReply("]'
+
+  regexp:
+    quotelink:
+      ///
+        /
+        ([^/]+) # boardID
+        /res/\d+\.html#
+        (\d+)   # postID
+        $
+      ///
+
+  parseNodes: (post, nodes) ->
+    # Add vichan's span.poster_id around the ID if not already present.
+    return if nodes.uniqueID
+    nodes.info.normalize()
+    {nextSibling} = nodes.nameBlock
+    if nextSibling.nodeType is 3 and (m = nextSibling.textContent.match /(\s*ID:\s*)(\S+)/)
+      nextSibling = nextSibling.splitText m[1].length
+      nextSibling.splitText m[2].length
+      nodes.uniqueID = uniqueID = $.el 'span', {className: 'poster_id'}
+      $.replace nextSibling, uniqueID
+      $.add uniqueID, nextSibling
+
+  parseFile: (post, file) ->
+    {text, link, thumb} = file
+    return false if text.parentNode isnt post.nodes.root # file belongs to a reply
+    return false if not (infoNode = $ '.unimportant', text)
+    return false if not (info = infoNode.textContent.match /\((Spoiler Image, )?([\d.]+ [KMG]?B).*\)/)
+    nameNode = $ '.postfilename', text
+    $.extend file,
+      name:       if nameNode then (nameNode.title or nameNode.textContent) else link.pathname.match(/[^/]*$/)[0]
+      size:       info[2]
+      dimensions: info[0].match(/\d+x\d+/)?[0]
+    if thumb
+      $.extend file,
+        thumbURL:  if '/static/' in thumb.src then link.href else thumb.src
+        isSpoiler: !!info[1]
+    true
--- a/src/site/SW.yotsuba.coffee
+++ b/src/site/SW.yotsuba.coffee
@ -1,6 +1,96 @@
 SW.yotsuba =
+  isOPContainerThread: false
+
+  selectors:
+    board:         '.board'
+    thread:        '.thread'
+    postContainer: '.postContainer'
+    post:          '.post'
+    infoRoot:      '.postInfo'
+    info:
+      subject:   '.subject'
+      name:      '.name'
+      email:     '.useremail'
+      tripcode:  '.postertrip'
+      uniqueIDRoot: '.posteruid'
+      uniqueID:  '.posteruid > .hand'
+      capcode:   '.capcode.hand'
+      pass:      '.n-pu'
+      flag:      '.flag, .countryFlag'
+      date:      '.dateTime'
+      nameBlock: '.nameBlock'
+      quote:     '.postNum > a:nth-of-type(2)'
+      reply:     '.replylink'
+    icons:
+      isSticky:   '.stickyIcon'
+      isClosed:   '.closedIcon'
+      isArchived: '.archivedIcon'
+    file:
+      text:  '.file > :first-child'
+      link:  '.fileText > a'
+      thumb: 'a.fileThumb > [data-md5]'
+    comment:   '.postMessage'
+    spoiler:   's'
+    quotelink: ':not(pre) > .quotelink' # XXX https://github.com/4chan/4chan-JS/issues/77: 4chan currently creates quote links inside [code] tags; ignore them
+
+  regexp:
+    quotelink:
+      ///
+        ^https?://boards\.4chan\.org/+
+        ([^/]+) # boardID
+        /+thread/+\d+(?:[/?][^#]*)?#p
+        (\d+)   # postID
+        $
+      ///
+
  isThisPageLegit: ->
    # not 404 error page or similar.
    location.hostname is 'boards.4chan.org' and
    !$('link[href*="favicon-status.ico"]', d.head) and
    d.title not in ['4chan - Temporarily Offline', '4chan - Error', '504 Gateway Time-out']
+
+  scriptData: ->
+    for script in $$ 'script:not([src])', d.head
+      return script.textContent if /\bcooldowns *=/.test script.textContent
+    ''
+
+  parseThreadMetadata: (thread) ->
+    scriptData = @scriptData()
+    thread.postLimit = /\bbumplimit *= *1\b/.test scriptData
+    thread.fileLimit = /\bimagelimit *= *1\b/.test scriptData
+    thread.ipCount   = if (m = scriptData.match /\bunique_ips *= *(\d+)\b/) then +m[1]
+
+    if g.BOARD.ID is 'f' and thread.OP.file
+      {file} = thread.OP
+      $.ajax "//a.4cdn.org/f/thread/#{thread}.json",
+        timeout: $.MINUTE
+        onloadend: ->
+          if @response
+            file.text.dataset.md5 = file.MD5 = @response.posts[0].md5
+
+  parseFile: (post, file) ->
+    {text, link, thumb} = file
+    return false if not (info = link.nextSibling?.textContent.match /\(([\d.]+ [KMG]?B).*\)/)
+    $.extend file,
+      name:       text.title or link.title or link.textContent
+      size:       info[1]
+      dimensions: info[0].match(/\d+x\d+/)?[0]
+      tag:        info[0].match(/,[^,]*, ([a-z]+)\)/i)?[1]
+    if thumb
+      $.extend file,
+        thumbURL:  if (m = link.href.match /\d+(?=\.\w+$)/) then "#{location.protocol}//i.4cdn.org/#{post.board}/#{m[0]}s.jpg"
+        MD5:       thumb.dataset.md5
+        isSpoiler: $.hasClass thumb.parentNode, 'imgspoiler'
+    true
+
+  cleanComment: (bq) ->
+    if (abbr = $ '.abbr', bq) # 'Comment too long' or 'EXIF data available'
+      for node in $$ '.abbr + br, .exif', bq
+        $.rm node
+      for i in [0...2]
+        $.rm br if (br = abbr.previousSibling) and br.nodeName is 'BR'
+      $.rm abbr
+
+  cleanCommentDisplay: (bq) ->
+    $.rm b if (b = $ 'b', bq) and /^Rolled /.test(b.textContent)
+    $.rm $('.fortune', bq)