ايران ويج

نسخه‌ی کامل: خواندن اطلاعات یک PDF
شما در حال مشاهده‌ی نسخه‌ی متنی این صفحه می‌باشید. مشاهده‌ی نسخه‌ی کامل با قالب بندی مناسب.
این کد برای کنسول نوشته شده و می تونید هرجا خروجی هست یک MsgBox بذارید اما من دست

به سورس نزدم و همون اصلش رو گذاشتم چون زیاد سورس از کنسول نذاشته بودم

کد:
#Compile Exe
#Dim All

#Include "Win32API.inc"

Type TPdfObj
    number As Long
    offset As Long
End Type

Type TPdfInfo
    author          As Asciiz * 80
    creator         As Asciiz * 80
    producer        As Asciiz * 80
    keywords        As Asciiz * 512
    subject         As Asciiz * %MAX_PATH
    title           As Asciiz * %MAX_PATH
    creationDate    As Asciiz * 32
    modDate         As Asciiz * 32
    pageCount       As Long
    fileSize        As Quad
    version         As Asciiz * 6
    linearized      As Byte
    tagged          As Byte
    encrypted       As Byte
End Type

%BUF_SIZE       = 1024

%ERR_PASSED_BOF = 151
%ERR_NAN        = 152
%ERR_NO_TRAILER = 153
%ERR_NOT_FOUND  = 154

Sub STDOUT(sOut As String)
    Static hConsole As Dword
    Local bWritten As Dword
    If hConsole = 0 Then AllocConsole: hConsole = GetStdHandle(%STD_OUTPUT_HANDLE)
    WriteFile hConsole,ByVal StrPtr(sOut),Len(sOut),bWritten,ByVal %NULL
End Sub

Sub Usage()
    STDOUT "Usage:" & $CrLf
    STDOUT "pdfinfo path" & $CrLf
End Sub

'-- Avoid need for global variable hFile
Function GetOrSetHandle(h As Long) As Long
    Static hFile As Long

    If hFile = 0 Then
        hFile = h
    End If

    Function = hFile

End Function

Function GetData(offset As Quad, ln As Long) As String

    Local tmp As String
    Local hFile As Long

    hFile = GetOrSetHandle(0)

    Seek #hFile, offset
    Get$ #hFile, ln, tmp
    Function = tmp

End Function

Function GetNumber(p As Quad, num As Long) As Long

    Local tmpStr As String
    Local ch As String

    Function = %FALSE

    Do While GetData(p, 1) < "!"
        Incr p
    Loop

    ch = GetData(p, 1)
    Do While (ch >= "0" And ch <= "9")
        tmpStr = tmpStr & ch
        Incr p
        ch = GetData(p, 1)
    Loop
    If tmpStr = "" Then Exit Function
    num = Val(tmpStr)
    Function = %TRUE

End Function

Function GetString(p As Quad, str As Asciiz) As Long

    Local tmp As String
    Local lp As Long
    Local ms As String

    Function = %FALSE
    ms = "/" & $CrLf

    Do While GetData(p, 1) < "!"
        Incr p
    Loop

    tmp = GetData(p, %BUF_SIZE)

    '-- Find terminator
    lp = InStr(tmp, Any ms)
    tmp = Left$(tmp, lp-1)

    '-- Is it Unicode encoded?
    If Asc(Mid$(tmp,2,1)) = 254 Then
        '-- If yes, convert to ascii
        tmp = ACode$(Mid$(tmp, 3))
    End If

    '-- Trim enclosing ()
    lp = InStr(-1, tmp, ")")
    If lp > 0 Then
        tmp = Left$(tmp, lp-1)
    End If
    If Left$(tmp, 1) = "(" Then
        tmp = Mid$(tmp, 2)
    End If

    '-- Remove escape char
    tmp = Remove$(tmp, "\")

    If Len(tmp) > 0 Then
        Function = %TRUE
        str = Trim$(tmp)
    Else
        str = ""
    End If

End Function

Function IsString(p As Quad, str As String) As Long

    Local ln As Long

    ln = Len(str)
    If GetData(p, ln) = str Then
        Function = %TRUE
    Else
        Function = %FALSE
    End If
    p = p + ln

End Function

Function FindStrInDict(p As Quad, str As String) As Long
    Local tmp As String
    Local lp As Long

    Function = %FALSE

    tmp = GetData(p, %BUF_SIZE)
    lp = InStr(tmp, str)
    If lp > 0 Then
        p = p + lp -1
        Function = IsString(p, str)
        Exit Function
    End If

End Function

Function GetPdfInfo(filename As String, PdfInfo As TPdfInfo) As Long
    Dim PdfObjList(0 To 0) As TPdfObj
    Local k As Long
    Local cnt As Long
    Local pagesNum As Long
    Local rootNum As Long
    Local infoNum As Long
    Local ch As String
    Local p As Quad
    Local p2 As Quad
    Local hFile As Long

    Function = 0

    Try
        hFile = FreeFile
        Call GetOrSetHandle(hFile)
        Open filename For Binary Access Read As #hFile Base=0

        '-- Get file size
        PdfInfo.fileSize = Lof(hFile)

        '-- Get the PDF version
        p = 5
        Call GetString(p, PdfInfo.version)

        '-- Find 'startxref' ignoring '%%EOF'
        p = Lof(hFile) - 5
        p2 = 0
        Do
            ch = GetData(p, 1)
            Do While (p > p2) And (ch <> "f")
                Decr p
                ch = GetData(p, 1)
            Loop
            If (p <= p2) Then Error %ERR_PASSED_BOF
            If LCase$(GetData(p-8, 9)) = "startxref" Then Exit Do
            Decr p
        Loop
        Incr p

        rootNum = -1 '-- Flags not yet found
        infoNum = -1

        '-- xref offset ==> k
        If IsFalse GetNumber(p, k) Then Error %ERR_NAN
        p = k + 4

        Do
            '-- get base object number ==> k
            If IsFalse GetNumber(p, k) Then Error %ERR_NAN

            '-- get object count ==> cnt
            If IsFalse GetNumber(p, cnt) Then Error %ERR_NAN

            ch = GetData(p, 1)
            Do While IsFalse(ch >= "0" And ch <= "9")
                Incr p
                ch = GetData(p, 1)
            Loop
            p2 = p

            '-- add all objects in section to list ...
            For cnt = 0 To cnt-1
                ReDim Preserve PdfObjList(UBound(PdfObjList) + 1)
                PdfObjList(UBound(PdfObjList)).number = k + cnt
                If IsFalse GetNumber(p, PdfObjList(UBound(PdfObjList)).offset) Then Error %ERR_NAN
                p2 = p2 + 20
                p = p2
            Next cnt

            ch = GetData(p, 1)
            If IsTrue(ch >= "0" And ch <= "9") Then Iterate Loop

            If IsFalse IsString(p, "trailer") Then Error %ERR_NO_TRAILER

            '-- Find the /Encrypt object
            p2 = p
            If IsTrue FindStrInDict(p, "/Encrypt") Then
                PdfInfo.encrypted = %TRUE
            End If
            p = p2

            '-- Find the /Info object
            p2 = p
            If (infoNum = -1) And IsTrue FindStrInDict(p, "/Info") Then
                If IsFalse GetNumber(p, infoNum) Then Error %ERR_NAN
            End If
            p = p2

            '-- Find the /Root object
            If (rootNum = -1) And IsTrue FindStrInDict(p, "/Root") Then
                If IsFalse GetNumber(p, rootNum) Then Error %ERR_NAN
            End If
            p = p2

            If (rootNum <> -1) And (infoNum <> -1) Then
                If IsFalse FindStrInDict(p, "/Prev") Then Exit Do
            End If

            If IsFalse GetNumber(p, k) Then Error %ERR_NAN
            p = k + 4
        Loop

        If rootNum < 0 Then Error %ERR_NOT_FOUND

        '-- Find "Linearized" (Optimized) and "StructTreeRoot" (Tagged) keys
        k = 0
        Do While k < UBound(PdfObjList)
            p = PdfObjList(k).offset
            If FindStrInDict(p, "/Linearized 1") Then
                PdfInfo.linearized = %TRUE
'            elseif FindStrInDict(p, "/StructTreeRoot") then
'                PdfInfo.tagged = %TRUE
            End If
            Incr k
        Loop

        If infoNum > 0 Then
            Try
                k = 0
                Do While k < UBound(PdfObjList)
                    If PdfObjList(k).number = infoNum Then Exit Do
                    Incr k
                Loop
                If k = UBound(PdfObjList) Then Error %ERR_NOT_FOUND
                p = PdfObjList(k).offset
                If IsFalse GetNumber(p, k) Or (k <> infoNum) Then Error %ERR_NAN

                p2 = p
                If FindStrInDict(p, "/CreationDate") Then GetString p, PdfInfo.creationDate

                p = p2
                If FindStrInDict(p, "/ModDate") Then GetString p, pdfInfo.modDate

                p = p2
                If FindStrInDict(p, "/Producer") Then GetString p, PdfInfo.producer

                p = p2
                If FindStrInDict(p, "/Author") Then GetString p, PdfInfo.author

                p = p2
                If FindStrInDict(p, "/Creator") Then GetString p, pdfInfo.creator

                p = p2
                If FindStrInDict(p, "/Title") Then GetString p, pdfInfo.title

                p = p2
                If FindStrInDict(p, "/Subject") Then GetString p, pdfInfo.subject

                p = p2
                If FindStrInDict(p, "/Keywords") Then GetString p, pdfInfo.keywords
            Catch
                '-- Ignore info errors
            End Try
        End If

        k = 0
        Do While k < UBound(PdfObjList)
            If PdfObjList(k).number = rootNum Then Exit Do
            Incr k
        Loop

        If k = UBound(PdfObjList) Then Error %ERR_NOT_FOUND

        p = PdfObjList(k).offset
        If IsFalse GetNumber(p, k) Or (k <> rootNum) Then Error %ERR_NAN
        If IsFalse FindStrInDict(p, "/Pages") Then Error %ERR_NOT_FOUND
        If IsFalse GetNumber(p, pagesNum) Then Error %ERR_NAN

        k = 0
        Do While k < UBound(PdfObjList)
            If PdfObjList(k).number = pagesNum Then Exit Do
            Incr k
        Loop

        If k = UBound(pdfObjList) Then Error %ERR_NOT_FOUND

        p = PdfObjList(k).offset

        If IsFalse GetNumber(p, k) Or (k <> pagesNum) Then Error %ERR_NAN

        If IsFalse FindStrInDict(p, "/Count") Then Error %ERR_NOT_FOUND
        If IsFalse GetNumber(p, cnt) Then Error %ERR_NAN

        If IsTrue GetNumber(p, k) And IsString(p, " R") Then
            k = 0
            Do While k <= UBound(PdfObjList)
                If PdfObjList(k).number = cnt Then Exit Do
                Incr k
            Loop
            If k > UBound(PdfObjList) Then Error %ERR_NOT_FOUND

            p = PdfObjList(k).offset
            If  IsFalse GetNumber(p, k) Or _
                IsFalse GetNumber(p, k) Or _
                IsFalse IsString(p, " obj") Or _
                IsFalse GetNumber(p, cnt) Then

                    Error %ERR_NAN
            End If
        End If

    Catch
        Select Case Err
            Case %ERR_NAN
                STDOUT "Error: PDF file is damaged." & $CrLf
            Case %ERR_PASSED_BOF
                STDOUT "Error: PDF file is damaged." & $CrLf
            Case %ERR_NO_TRAILER
                STDOUT "Error: Couldn't find trailer dictionary." & $CrLf
            Case %ERR_NOT_FOUND
                STDOUT "Error: Object not found." & $CrLf
            Case Else
        End Select

        Function = Err
    Finally
        Close #hFile
    End Try

    PdfInfo.pageCount = cnt

End Function

Function PBMain() As Long

    Local ret As Long
    Local Path As String
    Local PdfInfo As TPdfInfo
    Local msg As String

    Path = Remove$(Command$, $Dq)
    If Path = "" Then
        Usage
        Function = 1
        Exit Function
    End If

    ret = GetPdfInfo(Path, PdfInfo)

    If ret = 0 Then
                msg = _
                "Title:        " & PdfInfo.title & $CrLf & _
                "Subject:      " & PdfInfo.subject & $CrLf & _
                "Keywords:     " & PdfInfo.keywords & $CrLf & _
                "Author:       " & PdfInfo.author & $CrLf & _
                "Creator:      " & PdfInfo.creator & $CrLf & _
                "Producer:     " & pdfInfo.producer & $CrLf & _
                "CreationDate: " & pdfInfo.creationDate & $CrLf & _
                "ModDate:      " & pdfInfo.modDate & $CrLf & _
                _ ' "Tagged:       " & iif$(PdfInfo.tagged, "yes", "no") & $CRLF & _
                "Pages:        " & Trim$(Str$(PdfInfo.pageCount)) & $CrLf & _
                "Encrypted:    " & IIf$(PdfInfo.encrypted, "yes", "no") & $CrLf & _
                "File size:    " & Trim$(Str$(PdfInfo.fileSize)) & " bytes" & $CrLf & _
                "Optimized:    " & IIf$(PdfInfo.linearized, "yes", "no") & $CrLf & _
                "PDF version:  " & PdfInfo.version & $CrLf

                STDOUT msg
    End If

    Sleep 2000

End Function