1
0
mirror of synced 2024-11-22 09:14:23 +01:00

Optimize the handling of problematic files

This commit is contained in:
jvoisin 2018-10-22 19:12:39 +02:00
parent 44f267a596
commit 38df679a88
5 changed files with 36 additions and 29 deletions

View File

@ -1,5 +1,6 @@
import abc import abc
import os import os
import re
from typing import Set, Dict, Union from typing import Set, Dict, Union
assert Set # make pyflakes happy assert Set # make pyflakes happy
@ -17,6 +18,11 @@ class AbstractParser(abc.ABC):
""" """
:raises ValueError: Raised upon an invalid file :raises ValueError: Raised upon an invalid file
""" """
if re.search('^[a-z0-9./]', filename) is None:
# Some parsers are calling external binaries,
# this prevents shell command injections
filename = os.path.join('.', filename)
self.filename = filename self.filename = filename
fname, extension = os.path.splitext(filename) fname, extension = os.path.splitext(filename)
self.output_filename = fname + '.cleaned' + extension self.output_filename = fname + '.cleaned' + extension

View File

@ -1,11 +1,7 @@
import json import json
import os import os
import re
import shutil
import subprocess import subprocess
import tempfile from typing import Dict, Union, Set
from typing import Dict, Union, Set, Callable, Any
from . import abstract from . import abstract
@ -20,23 +16,8 @@ class ExiftoolParser(abstract.AbstractParser):
""" """
meta_whitelist = set() # type: Set[str] meta_whitelist = set() # type: Set[str]
def _handle_problematic_filename(self, callback: Callable[[str], Any]) -> bytes:
""" This method takes a filename with a potentially problematic name,
and safely applies a `callback` to it.
"""
if re.search('^[a-z0-9/]', self.filename) is not None:
return callback(self.filename)
tmpdirname = tempfile.mkdtemp()
fname = os.path.join(tmpdirname, "temp_file")
shutil.copy(self.filename, fname)
out = callback(fname)
shutil.rmtree(tmpdirname)
return out
def get_meta(self) -> Dict[str, Union[str, dict]]: def get_meta(self) -> Dict[str, Union[str, dict]]:
fun = lambda f: subprocess.check_output([_get_exiftool_path(), '-json', f]) out = subprocess.check_output([_get_exiftool_path(), '-json', self.filename])
out = self._handle_problematic_filename(fun)
meta = json.loads(out.decode('utf-8'))[0] meta = json.loads(out.decode('utf-8'))[0]
for key in self.meta_whitelist: for key in self.meta_whitelist:
meta.pop(key, None) meta.pop(key, None)

View File

@ -24,10 +24,9 @@ class AVIParser(exiftool.ExiftoolParser):
'SampleRate', 'AvgBytesPerSec', 'BitsPerSample', 'SampleRate', 'AvgBytesPerSec', 'BitsPerSample',
'Duration', 'ImageSize', 'Megapixels'} 'Duration', 'ImageSize', 'Megapixels'}
def remove_all(self):
def __remove_all_internal(self, filename: str):
cmd = [_get_ffmpeg_path(), cmd = [_get_ffmpeg_path(),
'-i', filename, # input file '-i', self.filename, # input file
'-y', # overwrite existing output file '-y', # overwrite existing output file
'-loglevel', 'panic', # Don't show log '-loglevel', 'panic', # Don't show log
'-hide_banner', # hide the banner '-hide_banner', # hide the banner
@ -38,11 +37,8 @@ class AVIParser(exiftool.ExiftoolParser):
'-flags:v', '+bitexact', # don't add any metadata '-flags:v', '+bitexact', # don't add any metadata
'-flags:a', '+bitexact', # don't add any metadata '-flags:a', '+bitexact', # don't add any metadata
self.output_filename] self.output_filename]
subprocess.check_call(cmd)
def remove_all(self) -> bool:
try: try:
self._handle_problematic_filename(self.__remove_all_internal) subprocess.check_call(cmd)
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
logging.error("Something went wrong during the processing of %s: %s", self.filename, e) logging.error("Something went wrong during the processing of %s: %s", self.filename, e)
return False return False

View File

@ -204,3 +204,14 @@ class TestCorruptedFiles(unittest.TestCase):
p = video.AVIParser('./tests/data/clean.avi') p = video.AVIParser('./tests/data/clean.avi')
self.assertFalse(p.remove_all()) self.assertFalse(p.remove_all())
os.remove('./tests/data/clean.avi') os.remove('./tests/data/clean.avi')
def test_avi_injection(self):
try:
video._get_ffmpeg_path()
except RuntimeError:
raise unittest.SkipTest
shutil.copy('./tests/data/dirty.torrent', './tests/data/--output.avi')
p = video.AVIParser('./tests/data/--output.avi')
self.assertFalse(p.remove_all())
os.remove('./tests/data/--output.avi')

View File

@ -46,10 +46,23 @@ class TestParameterInjection(unittest.TestCase):
shutil.copy('./tests/data/dirty.avi', './--output') shutil.copy('./tests/data/dirty.avi', './--output')
p = video.AVIParser('--output') p = video.AVIParser('--output')
meta = p.get_meta() meta = p.get_meta()
print(meta)
self.assertEqual(meta['Software'], 'MEncoder SVN-r33148-4.0.1') self.assertEqual(meta['Software'], 'MEncoder SVN-r33148-4.0.1')
os.remove('--output') os.remove('--output')
def test_ffmpeg_injection_complete_path(self):
try:
video._get_ffmpeg_path()
except RuntimeError:
raise unittest.SkipTest
shutil.copy('./tests/data/dirty.avi', './tests/data/ --output.avi')
p = video.AVIParser('./tests/data/ --output.avi')
meta = p.get_meta()
self.assertEqual(meta['Software'], 'MEncoder SVN-r33148-4.0.1')
self.assertTrue(p.remove_all())
os.remove('./tests/data/ --output.avi')
os.remove('./tests/data/ --output.cleaned.avi')
class TestUnsupportedEmbeddedFiles(unittest.TestCase): class TestUnsupportedEmbeddedFiles(unittest.TestCase):
def test_odt_with_svg(self): def test_odt_with_svg(self):